CP 3

The document discusses various techniques for modeling relationships between variables using regression and classification trees, including decision trees, regression splines, smoothing splines, local regression, and generalized additive models (GAM). It covers the basic structure and optimization goals of decision trees, as well as pros and cons compared to linear models. Regression tree techniques are also discussed for classification problems using classification error rate, Gini index, and cross-entropy for evaluating splits.

Uploaded by

Ankita Mishra

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

58 views2 pages

CP 3

Uploaded by

Ankita Mishra

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 2

library(ISLR) attach(Wage) fit = lm(wage˜poly(age,4), data=Wage) % orthogonal polynomials∗

coef(summary(fit)) % print out fit2 = lm(wage˜poly(age,4,raw=T), data=Wage) fit2a =

lm(wage˜age+I(ageˆ2)+I(ageˆ3)+I(ageˆ4), data=Wage) fit2b =
lm(wage˜cbind(age,ageˆ2,ageˆ3,ageˆ4), data=Wage) % original/raw polynomial % fitting the same,
coefs change agelims = range(age) age.grid = seq(from=agelims[1], to=agelims[2]) % grid: 18,19,...,90
preds = predict(fit,newdata=list(age=age.grid),se=TRUE) % make prediction se.bands =
cbind(preds$fit+2∗preds$se.fit, preds$fit-2∗preds$se.fit) % show standard error band at 2se
par(mfrow=c(1,2), mar=c(4.5,4.5,1,1), oma=c(0,0,4,0)) % 1 row 2 col grid % margin:
(bottom,left,top,right) % oma: outer margin plot(age, wage, xlim=agelims, cex=.5, col=’darkgrey’)
title(’D-4 Poly’, outer=T) lines(age.grid, preds$fit , lwd=2, col=’blue’) % add fit curve
matlines(age.grid, se.bands, lwd=1, col=’blue’, lty=3) % add standard error band fit .1 = lm(wage˜age,
data=Wage) fit .2 = lm(wage˜poly(age,2), data=Wage) fit .3 = lm(wage˜poly(age,3), data=Wage) fit .4
= lm(wage˜poly(age,4), data=Wage) fit .5 = lm(wage˜poly(age,5), data=Wage) anova(fit.1, fit .2, fit .3,
fit .4, fit .5) % model comparison for choosing degree of polynomial % cutting point is where value is
insignificant % Polynomial Regression II ( logistic ) fit = glm(I(wage>250)˜poly(age,4), data=Wage,
family=binomial) % create fit preds = predict(fit, newdata=list(age=age.grid), se=T)

% make prediction % alternative: % preds = predict(fit, newdata=list(age=age.grid), %

type=’response’, se=T) pfit = exp(preds$fit) / (1+exp(preds$fit)) % convert logit to estimate
se.bands.logit = cbind(preds$fit+2∗preds$se.fit, preds$fit-2∗preds$se.fit) se.bands =
exp(se.bands.logit) / (1+exp(se.bands.logit)) % show standard error band at 2se plot(age,
I(wage>250), xlim=agelims, type=’n’, ylim=c(0,.2)) points(jitter(age), I((wage>250)/5), cex=.5, pch=’|’,
col=’darkgrey’) % jitter(): ‘rug plot’ that makes values non-overlap lines (age.grid, pfit , lwd=2,
col=’blue’) matlines(age.grid, se.bands, lwd=1, col=’blue’, lty=3) % plot i) fit , ii ) 2se bands
table(cut(age,4)) % table representation of prediction % 4 ‘age-buckets’ fit = lm(wage˜cut(age,4),
data=Wage) % partitioned fit coef(summary(fit)) % Splines I ( regression splines ) library(splines) fit =
lm(wage˜bs(age,knots=c(25,40,60)), data=Wage) % fit % bs(): generate matrix of basis functions for
specified knots pred = predict(fit, newdata=list(age=age.grid), se=T) % make prediction plot(age,
wage, col=’gray’) lines(age.grid, pred$fit , lwd=2) lines(age.grid, pred$fit+2∗pred$se, lty=’dashed’)
lines(age.grid, pred$fit -2∗pred$se, lty=’dashed’) dim(bs(age, knots=c(25,40,60))) % two ways to
check df attr(bs(age,df=6), ’knots’) % show quantile percentages % Splines II (natural splines ) fit2 =
lm(wage˜ns(age,df=4), data=Wage) pred2 = predict(fit2, newdata=list(age=age.grid), se=T)

lines(age.grid, pred2$fit, col=’red’, lwd=2) % Splines III (smoothing splines) fit = smooth.spline(age,
wage, df=16) fit2 = smooth.spline(age, wage, cv=T) $ fit2 $df: 6.8 plot(age, wage, xlim=agelims,
cex=.5, col=’darkgrey’) lines( fit , col=’red’, lwd=2) lines( fit2 , col=’blue’, lwd=2) % Local Regression fit
= loess(wageãge, span=.2, data=Wage) % span=.2: neighborhood consists of 20% of the
observations fit2 = loess(wageãge, span=.5, data=Wage) plot(age, wage, xlim=agelims, cex=.5,
col=’darkgrey’) lines(age.grid, predict(fit, data.frame(age=age.grid)), col=’red’, lwd=2) lines(age.grid,
predict(fit2,data.frame(age=age.grid)), col=’blue’, lwd=2) % GAM gam1 =
lm(wageñs(year,4)+ns(age,5)+education, data=Wage) % ns(data, df, ...) for year & age % regular
qualitative for education library(gam) gam.m3 = gam(wage˜s(year,4)+s(age,5)+education,
data=Wage) par(mfrow=c(1,3)) plot(gam.m3, se=T, col=’blue’) % 3 plots for 3 predictors % each
shows respective predictor’s fit to response gam.m1 = gam(wage˜s(age,5)+education, data=Wage)
gam.m2 = gam(wage˜year+s(age,5)+education, data=Wage) anova(gam.m1, gam.m2, gam.m3,
test=’F’) % model comparison gam.lo = gam(wage˜s(year,df=4)+lo(age,span=.7)+education,
data=Wage) gam.lo.i = gam(wage˜lo(year,age,span=.5)+education, data=Wage) % make use of local
regression gam.lr = gam(I(wage>250)˜year+s(age,df=5)+education, family=binomial, data=Wage)
par(mfrow=c(1,3)) plot(gam.lr, se=T, col=’green’) % logistic GAM
7 Tree-Based Models 7.1 Decision Trees 7.1.1 Model of DT In a typical Decision Tree task, we have n
observations x1 , ..., xn and p predictors/parameters, and we would like to compute estimate ˆyi for
each response yi . Graphically, the following example illustrates how the predictors year and hits are
used to predict a baseball player’s salary36 . In the example, the two predictors are binarily
factorified by an artificial dividing point which minimizes RSS (coming up soon). The tree can also be
represented with a graph of decision regions, as Fig 7.2: Having the basic setup of a decision tree task
in mind, we now formulate the prediction-making and optimization goal of a decision tree, given . •
Prediction – Given a set of possible values of observations X1 , ..., Xp characterized by p predictors,
partition the values into J distinct and nonoverlapping regions R1 , ..., RJ . – For every observation xi
in region Rj , the prediction/estimate for its corresponding ˆyi is the mean of the response values yi
which are in

• Optimization Goal X J j=1 X i∈Rj (yi − yˆRj ) 2 (7.1) Essentially, in constructing a decision tree, we
make two decisions: • The cutting points s1 , ..., sk for each predictor Xj , by which each predictor
gives two decision regions: R1 (j, sj ) = {X|Xj < sj} and R2 (j, sj ) = {X|Xj ≥ sj} (7.2) • The sequence of
predictors X1 , ..., Xk, where k ≤ p, by which the partitioning of the decision space is done. The
sequence should minimize the combined RSS of all decision regions. X j∈J X i:xi∈Rj (j,sj ) (yi − yˆRj ) 2
(7.3) In practice, it is apparently inefficient to scan through all possible sequences (i.e. all possible
tree structures). Further, for the simplicity of a model, we would like to have as less predictors (thus
decision regions) involved at a reasonable cost of RSS37. Therefore, the optimization goal in Eq. 7.1 is
modified to include a penalty term to minimize also the number of terminal nodes in a tree (Eq. 7.4,
where |T| is the number of terminal nodes, m is the index for decision regions). X |T| m=1 X xi∈Rm
(yi − yˆRm ) 2 + α|T| (7.4) 37It is clear that the more predictors we use, the lower the RSS will be on
the training set. This, however, risks overfitting for our model. 6

The sequence selection can be carried out with some variation of forward/backward/hybrid
selection procedure (cf. Ch 5.1), which will not be elaborated here. To guard against overfitting, each
tree is also subject to a cross-validation where the MSE is computed to evaluate a particular tree’s
performance. Regression tree in a classification task differs in both the way in which prediction is
made and the optimization goal. • Prediction Each observation goes to the most commonly occurring
class of training observations in a decision region. • Optimization Goals – Classification Error Rate38 E
= 1 − max k (ˆpmk) (7.5) – Gini Index 39 G = X K k=1 pˆmk(1 − pˆmk) (7.6) – Cross-Entropy40 D = − X K
k=1 pˆmklogpˆmk (7.7) In building a classification tree, Gini or Cross-Entropy is used to evaluate the
quality of a particular split. While Gini and Cross-Entropy are effective with tree pruning,
Classification Error Rate is preferable if the objective is making predictions. Finally, note that node
purity is important because it reduces the uncertainty in a decision when information is incomplete.
7.1.2 DT: Pros & Cons Many tasks can be approached with either a DT or a linear model, therefore we
need to decide which one is more ideal for a particular data set and task. A general rule of thumb is
as follows: A linear model works better if the relationship between the predictors and the response is
close to linear. On the other hand, if this relationship is highly non-linear and complex, then DT
makes a better bet. More generally, the pros and cons of DT are listed as follows: 38 pˆmk represents
the proportion of training observations in the mth region that are from the kth class. 39Gini is a
measure of node purity, in the sense that a small Gini indicates that a node contains predominantly
observations from a single class. 40Cross-Entropy also measures node purity.

6 - CART Models
No ratings yet
6 - CART Models
15 pages
Cart Animation en Feb19 Final
No ratings yet
Cart Animation en Feb19 Final
60 pages
Unit IV
No ratings yet
Unit IV
36 pages
Decision Trees, Bagging, and Boosting Guide
No ratings yet
Decision Trees, Bagging, and Boosting Guide
2 pages
Chap 8
No ratings yet
Chap 8
9 pages
Decision Tree & Regression
No ratings yet
Decision Tree & Regression
33 pages
Module09 TreeBasedMethods
No ratings yet
Module09 TreeBasedMethods
36 pages
Classification & Regression Trees Guide
No ratings yet
Classification & Regression Trees Guide
80 pages
Module10 TreeBasedMethods
No ratings yet
Module10 TreeBasedMethods
33 pages
Week 2 Watermark
No ratings yet
Week 2 Watermark
84 pages
Reg Tree
No ratings yet
Reg Tree
38 pages
Ml2 Summary
No ratings yet
Ml2 Summary
8 pages
Classification and Regression Trees
100% (1)
Classification and Regression Trees
60 pages
M01 Tree-Based Methods
No ratings yet
M01 Tree-Based Methods
38 pages
08 Decision - Tree
No ratings yet
08 Decision - Tree
9 pages
MI - Unit 4
No ratings yet
MI - Unit 4
79 pages
Chap9 Cart 574 1
No ratings yet
Chap9 Cart 574 1
42 pages
Lecture 7 - Decision Tree Regression Imran 19032025 103416am
No ratings yet
Lecture 7 - Decision Tree Regression Imran 19032025 103416am
40 pages
BSC ML Ch3
No ratings yet
BSC ML Ch3
106 pages
Decision Trees and Random Forests
No ratings yet
Decision Trees and Random Forests
17 pages
ML Assignment-01
No ratings yet
ML Assignment-01
7 pages
Trees Handout
No ratings yet
Trees Handout
51 pages
Longintro
No ratings yet
Longintro
60 pages
Statistical Machine Learning: Yiqiao YIN Department of Statistics Columbia University
No ratings yet
Statistical Machine Learning: Yiqiao YIN Department of Statistics Columbia University
204 pages
Machine Learning: Classification & Decision Trees
No ratings yet
Machine Learning: Classification & Decision Trees
24 pages
Decision Trees
67% (3)
Decision Trees
14 pages
Unit II
No ratings yet
Unit II
34 pages
Statlearn PDF
No ratings yet
Statlearn PDF
123 pages
Tree Based Methods
No ratings yet
Tree Based Methods
21 pages
Ch8 Tree Based Methods
No ratings yet
Ch8 Tree Based Methods
81 pages
ES335
No ratings yet
ES335
22 pages
Unit-7 ML
No ratings yet
Unit-7 ML
11 pages
Unit 7 Deterministic Models
No ratings yet
Unit 7 Deterministic Models
71 pages
Lecture 16
No ratings yet
Lecture 16
5 pages
Chapter Non-Parametric Methods
No ratings yet
Chapter Non-Parametric Methods
9 pages
Machine Learning Regression Guide
No ratings yet
Machine Learning Regression Guide
6 pages
Machine Learning Course Notes
No ratings yet
Machine Learning Course Notes
112 pages
ML Cheat
No ratings yet
ML Cheat
9 pages
2024 Decision Trees
No ratings yet
2024 Decision Trees
28 pages
08 Tree Regression 1
No ratings yet
08 Tree Regression 1
49 pages
Practical Machine Learning Course Notes
No ratings yet
Practical Machine Learning Course Notes
76 pages
Introduction To Big Data and Data Mining
No ratings yet
Introduction To Big Data and Data Mining
130 pages
Decision Tree
No ratings yet
Decision Tree
15 pages
Chapter 03
No ratings yet
Chapter 03
30 pages
Decision Tree Learning
No ratings yet
Decision Tree Learning
22 pages
Decision Tree in R Programming Language
No ratings yet
Decision Tree in R Programming Language
22 pages
Tree Based Methods
No ratings yet
Tree Based Methods
64 pages
UnivariateRegression Summary
No ratings yet
UnivariateRegression Summary
36 pages
Decision Tree R
No ratings yet
Decision Tree R
5 pages
Session3 Eng
No ratings yet
Session3 Eng
44 pages
Classification and Regression Tree Construction
No ratings yet
Classification and Regression Tree Construction
18 pages
STAT 451: Machine Learning Lecture Notes: Sebastian Raschka Department of Statistics University of Wisconsin-Madison
No ratings yet
STAT 451: Machine Learning Lecture Notes: Sebastian Raschka Department of Statistics University of Wisconsin-Madison
18 pages
08 CSE358 Intro To Machine Learning II
No ratings yet
08 CSE358 Intro To Machine Learning II
100 pages
Decision Tree in ML
No ratings yet
Decision Tree in ML
21 pages
BigData Week13
No ratings yet
BigData Week13
62 pages
Model Selection and Multiple Hypothesis Testing
No ratings yet
Model Selection and Multiple Hypothesis Testing
6 pages
Recursive Partitioning in R
No ratings yet
Recursive Partitioning in R
67 pages
Stats 1
No ratings yet
Stats 1
3 pages
CP 7
No ratings yet
CP 7
2 pages
Lifi
100% (1)
Lifi
16 pages
Factor Hair Revised Project Report PDF
No ratings yet
Factor Hair Revised Project Report PDF
23 pages
Wholesale Custumer
100% (1)
Wholesale Custumer
32 pages
Car Insurance Data Insights
No ratings yet
Car Insurance Data Insights
4 pages
PCA and Clustering Analysis Guide
No ratings yet
PCA and Clustering Analysis Guide
20 pages
Predictive Modelling Alternative Firm Level PDF
100% (4)
Predictive Modelling Alternative Firm Level PDF
26 pages
Predictive Modeling for Analysts
100% (1)
Predictive Modeling for Analysts
28 pages
State Wise Health Income Clustering 18th December 2021 PDF
100% (2)
State Wise Health Income Clustering 18th December 2021 PDF
29 pages
Predictive Model: Submitted by
100% (3)
Predictive Model: Submitted by
27 pages
CMSU Student Survey Analysis
No ratings yet
CMSU Student Survey Analysis
16 pages
Rahul Mishra: Certified Scrummaster® (CSM®)
No ratings yet
Rahul Mishra: Certified Scrummaster® (CSM®)
2 pages
Ank SMDM PDF
No ratings yet
Ank SMDM PDF
39 pages
Final Document of SQL Project With Questions
0% (2)
Final Document of SQL Project With Questions
5 pages
Time Series Models. AR, MA, ARMA, ARIMA - by Charanraj Shetty - Towards Data Science
No ratings yet
Time Series Models. AR, MA, ARMA, ARIMA - by Charanraj Shetty - Towards Data Science
3 pages
Bag of Words
No ratings yet
Bag of Words
3 pages
LEC.3 Channel Coding
No ratings yet
LEC.3 Channel Coding
37 pages
Electronics 13 00095
No ratings yet
Electronics 13 00095
27 pages
Cryptography Standards in Quantum Time:: New Wine in An Old Wineskin?
No ratings yet
Cryptography Standards in Quantum Time:: New Wine in An Old Wineskin?
7 pages
Age and Optimism Correlation Study
No ratings yet
Age and Optimism Correlation Study
6 pages
FEM of Spring
No ratings yet
FEM of Spring
8 pages
Option Trading Strategies Based On Semi-Parametric
No ratings yet
Option Trading Strategies Based On Semi-Parametric
34 pages
DAA 2marks With Answers
No ratings yet
DAA 2marks With Answers
11 pages
Tukey Test for Additivity Guide
No ratings yet
Tukey Test for Additivity Guide
3 pages
Val Fun Iter
No ratings yet
Val Fun Iter
10 pages
Elliptic Curve Cryptography: Presented by Nemi Chandra Rathore M.Tech WCC IWC2008013
No ratings yet
Elliptic Curve Cryptography: Presented by Nemi Chandra Rathore M.Tech WCC IWC2008013
34 pages
CSE408 Modular Arithmetic & Chinese Remainder Theorem
No ratings yet
CSE408 Modular Arithmetic & Chinese Remainder Theorem
54 pages
402 Ada Lab Manual
No ratings yet
402 Ada Lab Manual
64 pages
Homework 7: Stats 217
No ratings yet
Homework 7: Stats 217
4 pages
Physics-Informed Neural Networks in Fluid Simulations
No ratings yet
Physics-Informed Neural Networks in Fluid Simulations
9 pages
Robust LMI-Based PID for Power Systems
No ratings yet
Robust LMI-Based PID for Power Systems
9 pages
Crusher PBM TM
No ratings yet
Crusher PBM TM
4 pages
Data Science
No ratings yet
Data Science
2 pages
Data Science and Machine Learning With Python
No ratings yet
Data Science and Machine Learning With Python
11 pages
Chapter 6
No ratings yet
Chapter 6
172 pages
Convolutional - Autoencoder - and - Transfer - Learning - For - Automatic - Virtual - Metrology (IEEE RA-L, July 2022)
No ratings yet
Convolutional - Autoencoder - and - Transfer - Learning - For - Automatic - Virtual - Metrology (IEEE RA-L, July 2022)
8 pages
Nearest Neighbor Classifier Guide
No ratings yet
Nearest Neighbor Classifier Guide
16 pages
T1 CN Issn
No ratings yet
T1 CN Issn
17 pages
Advanced Digital Signal Processing: Linear Prediction and Optimum Linear Filters
No ratings yet
Advanced Digital Signal Processing: Linear Prediction and Optimum Linear Filters
52 pages
Linear Algebra Exam Fall 2020
No ratings yet
Linear Algebra Exam Fall 2020
3 pages
Process Capability
No ratings yet
Process Capability
17 pages
Chapter 1 Slides
No ratings yet
Chapter 1 Slides
39 pages
ML FA24 Final Term Exam (Solution)
No ratings yet
ML FA24 Final Term Exam (Solution)
19 pages
DPCM: Efficient Signal Conversion
No ratings yet
DPCM: Efficient Signal Conversion
20 pages