0% found this document useful (0 votes)
14 views12 pages

Solution8 12

a

Uploaded by

Thịnh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views12 pages

Solution8 12

a

Uploaded by

Thịnh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 12

HW8:

1.1:

def one_resampled_percentage(tbl):

bootstrap_sample = tbl.sample(with_replacement=True)

imm_votes = bootstrap_sample.where("Vote", "Imm Thai").num_rows

total_votes = bootstrap_sample.num_rows

imm_percentage = (imm_votes / total_votes) * 100

return imm_percentage

one_resampled_percentage(votes)

1.2:

def percentages_in_resamples():

percentage_imm = make_array()

for i in range(2023):

percentage_imm = np.append(percentage_imm,
one_resampled_percentage(votes))

return percentage_imm

1.3:

imm_lower_bound = np.percentile(resampled_percentages, 2.5)

imm_upper_bound = np.percentile(resampled_percentages, 97.5)

print(f"Bootstrapped 95% confidence interval for the percentage of Imm Thai


voters in the population: [{imm_lower_bound:.2f},
{imm_upper_bound:.2f}]")

1.4:

def one_resampled_difference(tbl):

bootstrap = tbl.sample(with_replacement=True)

imm_percentage = bootstrap.where("Vote", "Imm Thai").num_rows /


bootstrap.num_rows * 100

lead = imm_percentage - (100 - imm_percentage)


return lead

1.6:

diff_lower_bound = np.percentile(sampled_leads, 2.5)

diff_upper_bound = np.percentile(sampled_leads, 97.5)

print("Bootstrapped 95% confidence interval for Imm Thai's true lead over
Lucky House, Thai Temple, and Thai Basil combined: [{:f}%, {:f}
%]".format(diff_lower_bound, diff_upper_bound))

2.2:

true_percentage_intervals = 6000 * 0.95

HW9:

# Question 2.1

smallest = 1112

smallest

# Question 2.2

grader.check("q2_1")

# Question 2.3

smallest_num = 767

smallest_num

# Question 2.4

grader.check("q2_3")

# Question 2.5

option = 1

option
# Question 2.6

grader.check("q2_5")

# Question 3.1

sample = Table().with_columns(

"Vote", make_array("Yes", "No"),

"Count", make_array(210, 190))

sample_size = sum(sample.column("Count"))

sample_with_proportions = sample.with_column("Proportion",
sample.column("Count") / sample_size)

sample_with_proportions

# Question 3.2

resample_yes_proportions = make_array()

for i in np.arange(10000):

resample = sample_proportions(400, [0.525, 0.475])

resample_yes_proportions = np.append(resample_yes_proportions,
resample.item(0))

Table().with_column("Resample Yes proportion",


resample_yes_proportions).hist(bins=np.arange(.2, .8, .01))

resample_yes_proportions

# Question 3.3

grader.check("q3_1")

# Question 3.4
approx_pop_sd = np.sqrt(0.525 * 0.475)

approximate_sd = approx_pop_sd / np.sqrt(400)

approximate_sd

# Question 3.5

grader.check("q3_3")

# Question 3.6

exact_sd = np.std(resample_yes_proportions)

exact_sd

# Question 3.7

grader.check("q3_4")

# Question 3.8

lower_limit = np.mean(resample_yes_proportions) - 1.96 * approximate_sd

upper_limit = np.mean(resample_yes_proportions) + 1.96 * approximate_sd

print('lower:', lower_limit, 'upper:', upper_limit)

# Question 3.9

grader.check("q3_5")

# Question 3.10

estimated_population_sd = np.sqrt(0.525 * 0.475)

ella_sample_size = 9975

ella_sample_mean_sd = estimated_population_sd / np.sqrt(ella_sample_size)


print("With Ella's sample size, you would predict a sample mean SD of %f." %
ella_sample_mean_sd)

# Question 3.11

grader.check("q3_6")

# Question 3.12

smaller_sample_size = 4000

smaller_sample_mean_sd = estimated_population_sd /
np.sqrt(smaller_sample_size)

print("With this smaller sample size, you would predict a sample mean SD of
%f" % smaller_sample_mean_sd)

# Question 3.13

grader.check("q3_7")

# Question 3.14

larger_sample_size = 11000

larger_sample_mean_sd = estimated_population_sd /
np.sqrt(larger_sample_size)

print("With this larger sample size, you would predict a sample mean SD of
%f" % larger_sample_mean_sd)

# Question 3.15

grader.check("q3_8")

# Question 3.16

min_sufficient = False
min_sufficient

# Question 3.17

grader.check("q3_9")

# Final Submission

# Save your notebook first, then run this cell to export your submission.

grader.export(pdf=False, run_tests=True)

HW10:

1.1:

def standard_units(data):
"""Converts data to standard units."""
return (data - np.mean(data)) / np.std(data)

1.2:

standard_array = make_array(2,3,4,5)

1.3:

def correlation(x, y):


"""Computes the correlation between two arrays."""
return np.mean(standard_units(x) * standard_units(y))

1.4:

r_array = make_array(1, 2, 3, 4)

1.5:

def slope(x, y):


r = correlation(x, y)
return r * np.std(y) / np.std(x)

1.6:

slope_array = make_array(2, 4, 5)

1.7:
def intercept(x, y):
"""Computes the intercept of the regression line."""
return np.mean(y) - slope(x, y) * np.mean(x)

1.8:

intercept_array = make_array(1, 4)

1.9:

def predict(tbl, col1, col2):


x = tbl.column(col1)
y = tbl.column(col2)
return slope(x, y) * x + intercept(x, y)

2.2:

r_guess = -0.75

2.7:

def rmse(slope, intercept):


predictions = slope * ages + intercept
errors = predictions - values
squared_errors = errors ** 2
mse = np.mean(squared_errors)
return np.sqrt(mse)

2.10:

error_array = make_array(2, 4)

2.11:

scoring_array = make_array(3)

HW11:

# Question 0.3

secret_word = 'abc'

# Question 0.4

grader.check("q0_1")

# Question 1.1

birds = Table.read_table('snowy_plover.csv')
birds

# Question 1.2

# Just run this cell and examine the scatter plot.

birds.scatter('Egg Weight', "Bird Weight", fit_line=True)

# Question 1.3

def standard_units(arr):

return (arr - np.mean(arr)) / np.std(arr)

def correlation(tbl, x_col, y_col):

return np.mean(standard_units(tbl.column(x_col)) *
standard_units(tbl.column(y_col)))

# Question 1.4

def fit_line(tbl, x_col, y_col):

slope = correlation(tbl, x_col, y_col) * np.std(tbl.column(y_col)) /


np.std(tbl.column(x_col))

intercept = np.mean(tbl.column(y_col)) - slope *


np.mean(tbl.column(x_col))

return make_array(slope, intercept)

fit_line(birds, "Egg Weight", "Bird Weight")

# Question 1.5

resampled_slopes = make_array()

for i in np.arange(1000):

birds_bootstrap = birds.sample(with_replacement=True)

bootstrap_line = fit_line(birds, 'Egg Weight', 'Bird Weight')

bootstrap_slope = bootstrap_line.item(0)
resampled_slopes = np.append(resampled_slopes, bootstrap_slope)

# Question 1.6

lower_end = np.percentile(resampled_slopes, 2.5)

upper_end = np.percentile(resampled_slopes, 97.5)

print("95% confidence interval for slope: [{:g}, {:g}]".format(lower_end,


upper_end))

# Question 2.1

def fitted_value(table, x_col, y_col, given_x):

line = fit_line(table, x_col, y_col)

slope = line.item(0)

intercept = line.item(1)

return slope * given_x + intercept

egg_weight_eight = fitted_value(birds, "Egg Weight", "Bird Weight", 8)

egg_weight_eight

grader.check("q2_1")

# Question 2.2

experts_egg = fitted_value(birds, 'Egg Weight', 'Bird Weight', 9)

experts_egg

grader.check("q2_2")

# Question 2.3

def compute_resampled_line(tbl, x_col, y_col):

resample = tbl.sample(with_replacement=True)
resampled_line = fit_line(resample, x_col, y_col)

return resampled_line

grader.check("q2_3")

# Question 2.4

predictions_for_eight = regression_lines.column('Slope') * 8 +
regression_lines.column('Intercept')

# This will make a histogram of your predictions:

table_of_predictions = Table().with_column('Predictions at Egg Weight=8',


predictions_for_eight)

table_of_predictions.hist('Predictions at Egg Weight=8', bins=20)

grader.check("q2_4")

# Question 2.5

lower_bound = np.percentile(predictions_for_eight, 2.5)

upper_bound = np.percentile(predictions_for_eight, 97.5)

print('95% Confidence interval for predictions for x=8: (', lower_bound, ",",
upper_bound, ')')

grader.check("q2_5")

# Question 2.6

plover_statements = make_array(1)

grader.check("q2_6")

HW12:

1.1.1:
def distance(arr1, arr2):

return np.sqrt(np.sum((arr1 - arr2) ** 2))

1.2:

shuffled_table = coordinates.sample(with_replacement=False) # Shuffle the


table

train = shuffled_table.take(np.arange(0, 75))

test = shuffled_table.take(np.arange(75, 100))

print("Training set:\t", train.num_rows, "examples")

print("Test set:\t", test.num_rows, "examples")

train.show(5), test.show(5);

1.3:

features = make_array("longitude", "latitude")

features

1.4:

def row_to_array(row, features):

arr = make_array()

for feature in features:

arr = np.append(arr, row.item(feature))

return arr

def classify(test_row, k, train):

test_row_features_array = row_to_array(test_row, features)

distances = make_array()

for train_row in train.rows:

train_row_features_array = row_to_array(train_row, features)

row_distance = distance(test_row_features_array,
train_row_features_array)

distances = np.append(distances, row_distance)


train_with_distances = train.with_column("distance", distances)

nearest_neighbors =
train_with_distances.sort("distance").take(np.arange(k))

most_common_label = nearest_neighbors.group('school').sort('count',
descending=True).column('school').item(0)

return most_common_label

# Don't modify/delete the code below

first_test = classify(test.row(0), 5, train)

first_test

1.5:

def three_classify(row):

return classify(row, 3, train)

test_with_prediction = test.with_column("prediction",
test.apply(three_classify))

labels_correct = np.count_nonzero(test_with_prediction.column("school") ==
test_with_prediction.column("prediction"))

accuracy = labels_correct / test.num_rows

accuracy

1.9.1:

prob_test_given_stanford = 0.5

prob_stanford = 23 / 100

prob_test_given_berkeley = 0.2

prob_berkeley = 77 / 100

prob_test = (prob_test_given_stanford * prob_stanford) +


(prob_test_given_berkeley * prob_berkeley)

prob_furd = (prob_test_given_stanford * prob_stanford) / prob_test

1.9.2: prob_test=0.5

You might also like