0% found this document useful (0 votes)

14 views12 pages

Solution8 12

Uploaded by

Thịnh

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

14 views12 pages

Solution8 12

Uploaded by

Thịnh

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 12

HW8:

1.1:

def one_resampled_percentage(tbl):

bootstrap_sample = tbl.sample(with_replacement=True)

imm_votes = bootstrap_sample.where("Vote", "Imm Thai").num_rows

total_votes = bootstrap_sample.num_rows

imm_percentage = (imm_votes / total_votes) * 100

return imm_percentage

one_resampled_percentage(votes)

1.2:

def percentages_in_resamples():

percentage_imm = make_array()

for i in range(2023):

percentage_imm = np.append(percentage_imm,
one_resampled_percentage(votes))

return percentage_imm

1.3:

imm_lower_bound = np.percentile(resampled_percentages, 2.5)

imm_upper_bound = np.percentile(resampled_percentages, 97.5)

print(f"Bootstrapped 95% confidence interval for the percentage of Imm Thai

voters in the population: [{imm_lower_bound:.2f},
{imm_upper_bound:.2f}]")

1.4:

def one_resampled_difference(tbl):

bootstrap = tbl.sample(with_replacement=True)

imm_percentage = bootstrap.where("Vote", "Imm Thai").num_rows /

bootstrap.num_rows * 100

lead = imm_percentage - (100 - imm_percentage)

return lead

1.6:

diff_lower_bound = np.percentile(sampled_leads, 2.5)

diff_upper_bound = np.percentile(sampled_leads, 97.5)

print("Bootstrapped 95% confidence interval for Imm Thai's true lead over
Lucky House, Thai Temple, and Thai Basil combined: [{:f}%, {:f}
%]".format(diff_lower_bound, diff_upper_bound))

2.2:

true_percentage_intervals = 6000 * 0.95

HW9:

# Question 2.1

smallest = 1112

smallest

# Question 2.2

grader.check("q2_1")

# Question 2.3

smallest_num = 767

smallest_num

# Question 2.4

grader.check("q2_3")

# Question 2.5

option = 1

option
# Question 2.6

grader.check("q2_5")

# Question 3.1

sample = Table().with_columns(

"Vote", make_array("Yes", "No"),

"Count", make_array(210, 190))

sample_size = sum(sample.column("Count"))

sample_with_proportions = sample.with_column("Proportion",
sample.column("Count") / sample_size)

sample_with_proportions

# Question 3.2

resample_yes_proportions = make_array()

for i in np.arange(10000):

resample = sample_proportions(400, [0.525, 0.475])

resample_yes_proportions = np.append(resample_yes_proportions,
resample.item(0))

Table().with_column("Resample Yes proportion",

resample_yes_proportions).hist(bins=np.arange(.2, .8, .01))

resample_yes_proportions

# Question 3.3

grader.check("q3_1")

# Question 3.4
approx_pop_sd = np.sqrt(0.525 * 0.475)

approximate_sd = approx_pop_sd / np.sqrt(400)

approximate_sd

# Question 3.5

grader.check("q3_3")

# Question 3.6

exact_sd = np.std(resample_yes_proportions)

exact_sd

# Question 3.7

grader.check("q3_4")

# Question 3.8

lower_limit = np.mean(resample_yes_proportions) - 1.96 * approximate_sd

upper_limit = np.mean(resample_yes_proportions) + 1.96 * approximate_sd

print('lower:', lower_limit, 'upper:', upper_limit)

# Question 3.9

grader.check("q3_5")

# Question 3.10

estimated_population_sd = np.sqrt(0.525 * 0.475)

ella_sample_size = 9975

ella_sample_mean_sd = estimated_population_sd / np.sqrt(ella_sample_size)

print("With Ella's sample size, you would predict a sample mean SD of %f." %
ella_sample_mean_sd)

# Question 3.11

grader.check("q3_6")

# Question 3.12

smaller_sample_size = 4000

smaller_sample_mean_sd = estimated_population_sd /
np.sqrt(smaller_sample_size)

print("With this smaller sample size, you would predict a sample mean SD of
%f" % smaller_sample_mean_sd)

# Question 3.13

grader.check("q3_7")

# Question 3.14

larger_sample_size = 11000

larger_sample_mean_sd = estimated_population_sd /
np.sqrt(larger_sample_size)

print("With this larger sample size, you would predict a sample mean SD of
%f" % larger_sample_mean_sd)

# Question 3.15

grader.check("q3_8")

# Question 3.16

min_sufficient = False
min_sufficient

# Question 3.17

grader.check("q3_9")

# Final Submission

# Save your notebook first, then run this cell to export your submission.

grader.export(pdf=False, run_tests=True)

HW10:

1.1:

def standard_units(data):
"""Converts data to standard units."""
return (data - np.mean(data)) / np.std(data)

1.2:

standard_array = make_array(2,3,4,5)

1.3:

def correlation(x, y):

"""Computes the correlation between two arrays."""
return np.mean(standard_units(x) * standard_units(y))

1.4:

r_array = make_array(1, 2, 3, 4)

1.5:

def slope(x, y):

r = correlation(x, y)
return r * np.std(y) / np.std(x)

1.6:

slope_array = make_array(2, 4, 5)

1.7:
def intercept(x, y):
"""Computes the intercept of the regression line."""
return np.mean(y) - slope(x, y) * np.mean(x)

1.8:

intercept_array = make_array(1, 4)

1.9:

def predict(tbl, col1, col2):

x = tbl.column(col1)
y = tbl.column(col2)
return slope(x, y) * x + intercept(x, y)

2.2:

r_guess = -0.75

2.7:

def rmse(slope, intercept):

predictions = slope * ages + intercept
errors = predictions - values
squared_errors = errors ** 2
mse = np.mean(squared_errors)
return np.sqrt(mse)

2.10:

error_array = make_array(2, 4)

2.11:

scoring_array = make_array(3)

HW11:

# Question 0.3

secret_word = 'abc'

# Question 0.4

grader.check("q0_1")

# Question 1.1

birds = Table.read_table('snowy_plover.csv')
birds

# Question 1.2

# Just run this cell and examine the scatter plot.

birds.scatter('Egg Weight', "Bird Weight", fit_line=True)

# Question 1.3

def standard_units(arr):

return (arr - np.mean(arr)) / np.std(arr)

def correlation(tbl, x_col, y_col):

return np.mean(standard_units(tbl.column(x_col)) *
standard_units(tbl.column(y_col)))

# Question 1.4

def fit_line(tbl, x_col, y_col):

slope = correlation(tbl, x_col, y_col) * np.std(tbl.column(y_col)) /

np.std(tbl.column(x_col))

intercept = np.mean(tbl.column(y_col)) - slope *

np.mean(tbl.column(x_col))

return make_array(slope, intercept)

fit_line(birds, "Egg Weight", "Bird Weight")

# Question 1.5

resampled_slopes = make_array()

for i in np.arange(1000):

birds_bootstrap = birds.sample(with_replacement=True)

bootstrap_line = fit_line(birds, 'Egg Weight', 'Bird Weight')

bootstrap_slope = bootstrap_line.item(0)
resampled_slopes = np.append(resampled_slopes, bootstrap_slope)

# Question 1.6

lower_end = np.percentile(resampled_slopes, 2.5)

upper_end = np.percentile(resampled_slopes, 97.5)

print("95% confidence interval for slope: [{:g}, {:g}]".format(lower_end,

upper_end))

# Question 2.1

def fitted_value(table, x_col, y_col, given_x):

line = fit_line(table, x_col, y_col)

slope = line.item(0)

intercept = line.item(1)

return slope * given_x + intercept

egg_weight_eight = fitted_value(birds, "Egg Weight", "Bird Weight", 8)

egg_weight_eight

grader.check("q2_1")

# Question 2.2

experts_egg = fitted_value(birds, 'Egg Weight', 'Bird Weight', 9)

experts_egg

grader.check("q2_2")

# Question 2.3

def compute_resampled_line(tbl, x_col, y_col):

resample = tbl.sample(with_replacement=True)
resampled_line = fit_line(resample, x_col, y_col)

return resampled_line

grader.check("q2_3")

# Question 2.4

predictions_for_eight = regression_lines.column('Slope') * 8 +
regression_lines.column('Intercept')

# This will make a histogram of your predictions:

table_of_predictions = Table().with_column('Predictions at Egg Weight=8',

predictions_for_eight)

table_of_predictions.hist('Predictions at Egg Weight=8', bins=20)

grader.check("q2_4")

# Question 2.5

lower_bound = np.percentile(predictions_for_eight, 2.5)

upper_bound = np.percentile(predictions_for_eight, 97.5)

print('95% Confidence interval for predictions for x=8: (', lower_bound, ",",
upper_bound, ')')

grader.check("q2_5")

# Question 2.6

plover_statements = make_array(1)

grader.check("q2_6")

HW12:

1.1.1:
def distance(arr1, arr2):

return np.sqrt(np.sum((arr1 - arr2) ** 2))

1.2:

shuffled_table = coordinates.sample(with_replacement=False) # Shuffle the

table

train = shuffled_table.take(np.arange(0, 75))

test = shuffled_table.take(np.arange(75, 100))

print("Training set:\t", train.num_rows, "examples")

print("Test set:\t", test.num_rows, "examples")

train.show(5), test.show(5);

1.3:

features = make_array("longitude", "latitude")

features

1.4:

def row_to_array(row, features):

arr = make_array()

for feature in features:

arr = np.append(arr, row.item(feature))

return arr

def classify(test_row, k, train):

test_row_features_array = row_to_array(test_row, features)

distances = make_array()

for train_row in train.rows:

train_row_features_array = row_to_array(train_row, features)

row_distance = distance(test_row_features_array,
train_row_features_array)

distances = np.append(distances, row_distance)

train_with_distances = train.with_column("distance", distances)

nearest_neighbors =
train_with_distances.sort("distance").take(np.arange(k))

most_common_label = nearest_neighbors.group('school').sort('count',
descending=True).column('school').item(0)

return most_common_label

# Don't modify/delete the code below

first_test = classify(test.row(0), 5, train)

first_test

1.5:

def three_classify(row):

return classify(row, 3, train)

test_with_prediction = test.with_column("prediction",
test.apply(three_classify))

labels_correct = np.count_nonzero(test_with_prediction.column("school") ==
test_with_prediction.column("prediction"))

accuracy = labels_correct / test.num_rows

accuracy

1.9.1:

prob_test_given_stanford = 0.5

prob_stanford = 23 / 100

prob_test_given_berkeley = 0.2

prob_berkeley = 77 / 100

prob_test = (prob_test_given_stanford * prob_stanford) +

(prob_test_given_berkeley * prob_berkeley)

prob_furd = (prob_test_given_stanford * prob_stanford) / prob_test

1.9.2: prob_test=0.5

4 12
No ratings yet
4 12
17 pages
Fha-Pyhton Program Unit 1-4
No ratings yet
Fha-Pyhton Program Unit 1-4
13 pages
Bda Assign
No ratings yet
Bda Assign
15 pages
Stat Lab
No ratings yet
Stat Lab
24 pages
Data Warehousing and Data Mining
No ratings yet
Data Warehousing and Data Mining
24 pages
Experimenting With Data Analysis Packages and Statistical Operations
No ratings yet
Experimenting With Data Analysis Packages and Statistical Operations
18 pages
Aiml Lab
No ratings yet
Aiml Lab
14 pages
Regression Prac 9
No ratings yet
Regression Prac 9
8 pages
AD3411 DATA SCIENCE AND ANALYTICS LAB (2) - Removed
No ratings yet
AD3411 DATA SCIENCE AND ANALYTICS LAB (2) - Removed
24 pages
Data Analysis and Visualization Guide
No ratings yet
Data Analysis and Visualization Guide
16 pages
DVA Lab Manual
No ratings yet
DVA Lab Manual
20 pages
Ai Lab
No ratings yet
Ai Lab
19 pages
ML All Projectpdf Removed
No ratings yet
ML All Projectpdf Removed
41 pages
Wa0002.
No ratings yet
Wa0002.
5 pages
AI&ML
No ratings yet
AI&ML
9 pages
Data Mining Solve
No ratings yet
Data Mining Solve
5 pages
''' Function To Load Dataset ''': Open List Range Len Float
No ratings yet
''' Function To Load Dataset ''': Open List Range Len Float
3 pages
DA Lab ANSWERS
No ratings yet
DA Lab ANSWERS
10 pages
Machine Learning Algorithms in Python
No ratings yet
Machine Learning Algorithms in Python
18 pages
Ai Lab
No ratings yet
Ai Lab
11 pages
Da Rec
No ratings yet
Da Rec
29 pages
Data Mining Lab Manual
No ratings yet
Data Mining Lab Manual
7 pages
Ankit Python
No ratings yet
Ankit Python
26 pages
Name: Muhammad Sarfraz Seat: EP1850086 Section: A Course Code: 514 Course Name: Data Warehousing and Data Mining
No ratings yet
Name: Muhammad Sarfraz Seat: EP1850086 Section: A Course Code: 514 Course Name: Data Warehousing and Data Mining
39 pages
Program 1
No ratings yet
Program 1
25 pages
ML Record
No ratings yet
ML Record
19 pages
Argha's ML LAB - 240927 - 121838
No ratings yet
Argha's ML LAB - 240927 - 121838
13 pages
Dav Lab Manual
No ratings yet
Dav Lab Manual
28 pages
Pattern Recognition
No ratings yet
Pattern Recognition
26 pages
Data Science Lab Manual
No ratings yet
Data Science Lab Manual
32 pages
Message
No ratings yet
Message
5 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
AI Lab Codes.
No ratings yet
AI Lab Codes.
12 pages
ML
No ratings yet
ML
17 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Fda Batch2program
No ratings yet
Fda Batch2program
18 pages
ML Manual
No ratings yet
ML Manual
30 pages
Python Code - Summary Statistics
No ratings yet
Python Code - Summary Statistics
6 pages
Data Analysis & Processing Guide
100% (2)
Data Analysis & Processing Guide
17 pages
ML Lab Record
No ratings yet
ML Lab Record
33 pages
BAN5
No ratings yet
BAN5
2 pages
21CSC305P ML - Lab Programs 1 - 9
No ratings yet
21CSC305P ML - Lab Programs 1 - 9
36 pages
DA Programs
No ratings yet
DA Programs
44 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
ASSESSMENT2
No ratings yet
ASSESSMENT2
22 pages
Ad3411-Data Science and Analytics Laboratory
No ratings yet
Ad3411-Data Science and Analytics Laboratory
27 pages
Hariks
No ratings yet
Hariks
5 pages
Ex 3
No ratings yet
Ex 3
5 pages
DataAnalytics Lab Manual
No ratings yet
DataAnalytics Lab Manual
35 pages
FDSA Lab Manual
No ratings yet
FDSA Lab Manual
27 pages
ML Lab Programs For Exam
No ratings yet
ML Lab Programs For Exam
10 pages
Machine File
No ratings yet
Machine File
27 pages
Bacdeaf 23032025 115708 Split 1
No ratings yet
Bacdeaf 23032025 115708 Split 1
37 pages
Python Codes
No ratings yet
Python Codes
15 pages
Problem 4.1 A)
No ratings yet
Problem 4.1 A)
11 pages
DA Manual - Part B
No ratings yet
DA Manual - Part B
13 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
ML Lab Works
No ratings yet
ML Lab Works
14 pages
dx500 t3 Specification Sheet English
No ratings yet
dx500 t3 Specification Sheet English
3 pages
Public System Management Barun Kumar de Instant Download
No ratings yet
Public System Management Barun Kumar de Instant Download
82 pages
7.2.3.5 Lab - Using Wireshark To Examine A UDP DNS Capture - ILM
No ratings yet
7.2.3.5 Lab - Using Wireshark To Examine A UDP DNS Capture - ILM
6 pages
Using Ticker Timers To Measure Speed
No ratings yet
Using Ticker Timers To Measure Speed
7 pages
Singson Vs Coa
50% (2)
Singson Vs Coa
2 pages
Oriya Hinduism: Gouraparsada Stories
No ratings yet
Oriya Hinduism: Gouraparsada Stories
1 page
Technology Globalization and Ethics
No ratings yet
Technology Globalization and Ethics
30 pages
Xray Pin in Hole Reflow
No ratings yet
Xray Pin in Hole Reflow
6 pages
MCQ TQM
No ratings yet
MCQ TQM
4 pages
List of LEED Projects
No ratings yet
List of LEED Projects
4 pages
Florence Student Housing Guide
No ratings yet
Florence Student Housing Guide
1 page
Rice V Wells Fargo
No ratings yet
Rice V Wells Fargo
15 pages
HPE - Dp00004663en - Us - HPE OneView 9.0 Best Practices Guide For Deployment and Management
No ratings yet
HPE - Dp00004663en - Us - HPE OneView 9.0 Best Practices Guide For Deployment and Management
398 pages
Qatar Tourism
No ratings yet
Qatar Tourism
3 pages
FM - MCQ - Part 2
No ratings yet
FM - MCQ - Part 2
7 pages
Chathum Dinal Dickkumbura: Visa Expired: 20 Salary Expectation: 4,500 AED
No ratings yet
Chathum Dinal Dickkumbura: Visa Expired: 20 Salary Expectation: 4,500 AED
2 pages
Perineal+Care+and+Repair PPG v1 0 240304 155135 240305 073111
No ratings yet
Perineal+Care+and+Repair PPG v1 0 240304 155135 240305 073111
20 pages
Final Examination in Eng121
No ratings yet
Final Examination in Eng121
24 pages
Holcim Compensation Report 2023
No ratings yet
Holcim Compensation Report 2023
32 pages
Pie Chart - Pie Chart & Note Set - 1
No ratings yet
Pie Chart - Pie Chart & Note Set - 1
17 pages
Accounts
No ratings yet
Accounts
8 pages
GS-Input Data
No ratings yet
GS-Input Data
1 page
006 - Washtest Performance Test
No ratings yet
006 - Washtest Performance Test
21 pages
Group AFM Assignment
No ratings yet
Group AFM Assignment
11 pages
Hampi Express Sleeper Class (SL)
No ratings yet
Hampi Express Sleeper Class (SL)
3 pages
Creating and Opening Presentations
No ratings yet
Creating and Opening Presentations
26 pages
EBOOK - Netguru - Culture Book PDF
No ratings yet
EBOOK - Netguru - Culture Book PDF
50 pages
Test 3 Global WF With Answers
No ratings yet
Test 3 Global WF With Answers
5 pages
2019-Planning and Execution of Knowledge Management Assist Visits For Nuclear Organizations - Tecdoc-1880
No ratings yet
2019-Planning and Execution of Knowledge Management Assist Visits For Nuclear Organizations - Tecdoc-1880
66 pages
02 Laboratory Exercise 31
No ratings yet
02 Laboratory Exercise 31
8 pages

Solution8 12

Uploaded by

Solution8 12

Uploaded by

HW8:

imm_votes = bootstrap_sample.where("Vote", "Imm Thai").num_rows

imm_percentage = (imm_votes / total_votes) * 100

imm_lower_bound = np.percentile(resampled_percentages, 2.5)

imm_upper_bound = np.percentile(resampled_percentages, 97.5)

print(f"Bootstrapped 95% confidence interval for the percentage of Imm Thai

imm_percentage = bootstrap.where("Vote", "Imm Thai").num_rows /

lead = imm_percentage - (100 - imm_percentage)

diff_lower_bound = np.percentile(sampled_leads, 2.5)

diff_upper_bound = np.percentile(sampled_leads, 97.5)

true_percentage_intervals = 6000 * 0.95

"Vote", make_array("Yes", "No"),

"Count", make_array(210, 190))

resample = sample_proportions(400, [0.525, 0.475])

Table().with_column("Resample Yes proportion",

approximate_sd = approx_pop_sd / np.sqrt(400)

lower_limit = np.mean(resample_yes_proportions) - 1.96 * approximate_sd

upper_limit = np.mean(resample_yes_proportions) + 1.96 * approximate_sd

print('lower:', lower_limit, 'upper:', upper_limit)

estimated_population_sd = np.sqrt(0.525 * 0.475)

ella_sample_mean_sd = estimated_population_sd / np.sqrt(ella_sample_size)

def correlation(x, y):

def slope(x, y):

def predict(tbl, col1, col2):

def rmse(slope, intercept):

# Just run this cell and examine the scatter plot.

birds.scatter('Egg Weight', "Bird Weight", fit_line=True)

return (arr - np.mean(arr)) / np.std(arr)

def correlation(tbl, x_col, y_col):

def fit_line(tbl, x_col, y_col):

slope = correlation(tbl, x_col, y_col) * np.std(tbl.column(y_col)) /

intercept = np.mean(tbl.column(y_col)) - slope *

return make_array(slope, intercept)

fit_line(birds, "Egg Weight", "Bird Weight")

bootstrap_line = fit_line(birds, 'Egg Weight', 'Bird Weight')

lower_end = np.percentile(resampled_slopes, 2.5)

upper_end = np.percentile(resampled_slopes, 97.5)

print("95% confidence interval for slope: [{:g}, {:g}]".format(lower_end,

def fitted_value(table, x_col, y_col, given_x):

line = fit_line(table, x_col, y_col)

return slope * given_x + intercept

egg_weight_eight = fitted_value(birds, "Egg Weight", "Bird Weight", 8)

experts_egg = fitted_value(birds, 'Egg Weight', 'Bird Weight', 9)

def compute_resampled_line(tbl, x_col, y_col):

# This will make a histogram of your predictions:

table_of_predictions = Table().with_column('Predictions at Egg Weight=8',

table_of_predictions.hist('Predictions at Egg Weight=8', bins=20)

lower_bound = np.percentile(predictions_for_eight, 2.5)

upper_bound = np.percentile(predictions_for_eight, 97.5)

return np.sqrt(np.sum((arr1 - arr2) ** 2))

shuffled_table = coordinates.sample(with_replacement=False) # Shuffle the

train = shuffled_table.take(np.arange(0, 75))

test = shuffled_table.take(np.arange(75, 100))

print("Training set:\t", train.num_rows, "examples")

print("Test set:\t", test.num_rows, "examples")

features = make_array("longitude", "latitude")

def row_to_array(row, features):

for feature in features:

arr = np.append(arr, row.item(feature))

def classify(test_row, k, train):

test_row_features_array = row_to_array(test_row, features)

for train_row in train.rows:

train_row_features_array = row_to_array(train_row, features)

distances = np.append(distances, row_distance)

# Don't modify/delete the code below

first_test = classify(test.row(0), 5, train)

return classify(row, 3, train)

accuracy = labels_correct / test.num_rows

prob_test = (prob_test_given_stanford * prob_stanford) +

prob_furd = (prob_test_given_stanford * prob_stanford) / prob_test

You might also like