# Adaptado de http://www.statmethods.net/advstats/cart.html
 
# Arbol de clasificación de regresión
# La regresión se usa para clasificar variables continuas
 
 
library(rpart)
 
# grow tree
fit <- rpart(Mileage~Price + Country + Reliability + Type,
             method="anova", data=cu.summary)
 
printcp(fit) # display the results
plotcp(fit) # visualize cross-validation results
summary(fit) # detailed summary of splits
 
# create additional plots
par(mfrow=c(1,2)) # two plots on one page
rsq.rpart(fit) # visualize cross-validation results
 
# plot tree
plot(fit, uniform=TRUE,
     main="Regression Tree for Mileage ")
text(fit, use.n=TRUE, all=TRUE, cex=.8)
 
# create attractive postcript plot of tree
post(fit, file = "tree2.ps",
     title = "Regression Tree for Mileage ")
 
# prune the tree
pfit<- prune(fit, cp=0.01160389) # from cptable
 
# plot the pruned tree
plot(pfit, uniform=TRUE,
     main="Pruned Regression Tree for Mileage")
text(pfit, use.n=TRUE, all=TRUE, cex=.8)
post(pfit, file = "ptree2.ps",
     title = "Pruned Regression Tree for Mileage")
 
Probar este programa
Regression tree:
rpart(formula = Mileage ~ Price + Country + Reliability + Type,
    data = cu.summary, method = "anova")
 
Variables actually used in tree construction:
[1] Price Type
 
Root node error: 1354.6/60 = 22.576
 
n=60 (57 observations deleted due to missingness)
 
        CP nsplit rel error  xerror     xstd
1 0.622885      0   1.00000 1.01543 0.174384
2 0.132061      1   0.37711 0.52498 0.101224
3 0.025441      2   0.24505 0.37255 0.079313
4 0.011604      3   0.21961 0.35420 0.080563
5 0.010000      4   0.20801 0.38274 0.082100
Call:
rpart(formula = Mileage ~ Price + Country + Reliability + Type,
    data = cu.summary, method = "anova")
  n=60 (57 observations deleted due to missingness)
 
          CP nsplit rel error    xerror       xstd
1 0.62288527      0 1.0000000 1.0154262 0.17438352
2 0.13206061      1 0.3771147 0.5249834 0.10122444
3 0.02544094      2 0.2450541 0.3725467 0.07931334
4 0.01160389      3 0.2196132 0.3542012 0.08056341
5 0.01000000      4 0.2080093 0.3827437 0.08209999
 
Variable importance
  Price    Type Country
     48      42      10
 
Node number 1: 60 observations,    complexity param=0.6228853
  mean=24.58333, MSE=22.57639
  left son=2 (48 obs) right son=3 (12 obs)
  Primary splits:
      Price       < 9446.5  to the right, improve=0.6228853, (0 missing)
      Type        splits as  LLLRLL,      improve=0.5044405, (0 missing)
      Reliability splits as  LLLRR,       improve=0.1263005, (11 missing)
      Country     splits as  --LRLRRRLL,  improve=0.1243525, (0 missing)
  Surrogate splits:
      Type    splits as  LLLRLL,     agree=0.950, adj=0.750, (0 split)
      Country splits as  --LLLLRRLL, agree=0.833, adj=0.167, (0 split)
 
Node number 2: 48 observations,    complexity param=0.1320606
  mean=22.70833, MSE=8.498264
  left son=4 (23 obs) right son=5 (25 obs)
  Primary splits:
      Type        splits as  RLLRRL,      improve=0.43853830, (0 missing)
      Price       < 12154.5 to the right, improve=0.25748500, (0 missing)
      Country     splits as  --RRLRL-LL,  improve=0.13345700, (0 missing)
      Reliability splits as  LLLRR,       improve=0.01637086, (10 missing)
  Surrogate splits:
      Price   < 12215.5 to the right, agree=0.812, adj=0.609, (0 split)
      Country splits as  --RRLRL-RL,  agree=0.646, adj=0.261, (0 split)
 
Node number 3: 12 observations
  mean=32.08333, MSE=8.576389
 
Node number 4: 23 observations,    complexity param=0.02544094
  mean=20.69565, MSE=2.907372
  left son=8 (10 obs) right son=9 (13 obs)
  Primary splits:
      Type    splits as  -LR--L,      improve=0.515359600, (0 missing)
      Price   < 14962   to the left,  improve=0.131259400, (0 missing)
      Country splits as  ----L-R--R,  improve=0.007022107, (0 missing)
  Surrogate splits:
      Price < 13572   to the right, agree=0.609, adj=0.1, (0 split)
 
Node number 5: 25 observations,    complexity param=0.01160389
  mean=24.56, MSE=6.4864
  left son=10 (14 obs) right son=11 (11 obs)
  Primary splits:
      Price       < 11484.5 to the right, improve=0.09693168, (0 missing)
      Reliability splits as  LLRRR,       improve=0.07767167, (4 missing)
      Type        splits as  L--RR-,      improve=0.04209834, (0 missing)
      Country     splits as  --LRRR--LL,  improve=0.02201687, (0 missing)
  Surrogate splits:
      Country splits as  --LLLL--LR, agree=0.80, adj=0.545, (0 split)
      Type    splits as  L--RL-,     agree=0.64, adj=0.182, (0 split)
 
Node number 8: 10 observations
  mean=19.3, MSE=2.21
 
Node number 9: 13 observations
  mean=21.76923, MSE=0.7928994
 
Node number 10: 14 observations
  mean=23.85714, MSE=7.693878
 
Node number 11: 11 observations
  mean=25.45455, MSE=3.520661
 
 
Regression tree:
rpart(formula = Mileage ~ Price + Country + Reliability + Type,
    data = cu.summary, method = "anova")
 
Variables actually used in tree construction:
[1] Price Type
 
Root node error: 1354.6/60 = 22.576
 
n=60 (57 observations deleted due to missingness)
 
        CP nsplit rel error  xerror     xstd
1 0.622885      0   1.00000 1.01543 0.174384
2 0.132061      1   0.37711 0.52498 0.101224
3 0.025441      2   0.24505 0.37255 0.079313
4 0.011604      3   0.21961 0.35420 0.080563
5 0.010000      4   0.20801 0.38274 0.082100
ptree.png