File: tilt.boot.Rd

package info (click to toggle)
boot 1.3-20-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,188 kB
  • sloc: makefile: 2
file content (209 lines) | stat: -rw-r--r-- 8,243 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
\name{tilt.boot}
\alias{tilt.boot}
\title{
  Non-parametric Tilted Bootstrap
}
\description{
  This function will run an initial bootstrap with equal resampling 
  probabilities (if required) and will use the output of the initial run to 
  find resampling probabilities which put the
  value of the statistic at required values.  It then runs an importance
  resampling bootstrap using the calculated probabilities as the resampling
  distribution.
}
\usage{
tilt.boot(data, statistic, R, sim = "ordinary", stype = "i", 
          strata = rep(1, n), L = NULL, theta = NULL, 
          alpha = c(0.025, 0.975), tilt = TRUE, width = 0.5, 
          index = 1, \dots)
}
\arguments{
  \item{data}{
    The data as a vector, matrix or data frame.  If it is a matrix or
    data frame then each row is considered as one (multivariate)
    observation.
  }
  \item{statistic}{
    A function which when applied to data returns a vector containing the
    statistic(s) of interest.  It must take at least two arguments.  The first
    argument will always be \code{data} and the second should be a
    vector of indices, weights or frequencies describing the bootstrap
    sample.  Any other arguments must be supplied to \code{tilt.boot}
    and will be passed unchanged to statistic each time it is called.
  }
  \item{R}{
    The number of bootstrap replicates required.  This will generally be
    a vector, the first value stating how many uniform bootstrap
    simulations are to be performed at the initial stage.  The remaining
    values of \code{R} are the number of simulations to be performed
    resampling from each reweighted distribution. The first value of
    \code{R} must always be present, a value of 0 implying that no
    uniform resampling is to be carried out.  Thus \code{length(R)}
    should always equal \code{1+length(theta)}.
  }
  \item{sim}{
    This is a character string indicating the type of bootstrap
    simulation required.  There are only two possible values that this
    can take: \code{"ordinary"} and \code{"balanced"}.  If other
    simulation types are required for the initial un-weighted bootstrap
    then it will be necessary to run \code{boot}, calculate the weights
    appropriately, and run \code{boot} again using the calculated
    weights.
  }
  \item{stype}{
    A character string indicating the type of second argument expected
    by \code{statistic}.  The possible values that \code{stype} can take
    are \code{"i"} (indices), \code{"w"} (weights) and \code{"f"}
    (frequencies).
  }
  \item{strata}{
    An integer vector or factor representing the strata for multi-sample
    problems.
  }
  \item{L}{
    The empirical influence values for the statistic of interest.  They
    are used only for exponential tilting when \code{tilt} is
    \code{TRUE}.  If \code{tilt} is \code{TRUE} and they are not
    supplied then \code{tilt.boot} uses \code{empinf} to calculate
    them.
  }
  \item{theta}{
    The required parameter value(s) for the tilted distribution(s).
    There should be one value of \code{theta} for each of the
    non-uniform distributions.  If \code{R[1]} is 0 \code{theta} is a
    required argument.  Otherwise \code{theta} values can be estimated
    from the initial uniform bootstrap and the values in \code{alpha}.
  }
  \item{alpha}{
    The alpha level to which tilting is required.  This parameter is
    ignored if \code{R[1]} is 0 or if \code{theta} is supplied,
    otherwise it is used to find the values of \code{theta} as quantiles
    of the initial uniform bootstrap.  In this case \code{R[1]} should
    be large enough that \code{min(c(alpha, 1-alpha))*R[1] > 5}, if this
    is not the case then a warning is generated to the effect that the
    \code{theta} are extreme values and so the tilted output may be
    unreliable. 
  }
  \item{tilt}{
    A logical variable which if \code{TRUE} (the default) indicates that
    exponential tilting should be used, otherwise local frequency
    smoothing (\code{smooth.f}) is used.  If \code{tilt} is \code{FALSE}
    then \code{R[1]} must be positive.  In fact in this case the value
    of \code{R[1]} should be fairly large (in the region of 500 or
    more).
  }
  \item{width}{
    This argument is used only if \code{tilt} is \code{FALSE}, in which
    case it is passed unchanged to \code{smooth.f} as the standardized
    bandwidth for the smoothing operation.  The value should generally
    be in the range (0.2, 1). See \code{smooth.f} for for more details.
  }
  \item{index}{
    The index of the statistic of interest in the output from
    \code{statistic}.  By default the first element of the output of
    \code{statistic} is used.
  }
  \item{\dots}{
    Any additional arguments required by \code{statistic}.  These are
    passed unchanged to \code{statistic} each time it is called.
  }
}
\value{
An object of class \code{"boot"} with the following components

\item{t0}{
  The observed value of the statistic on the original data.
}
\item{t}{
  The values of the bootstrap replicates of the statistic.  There will
  be \code{sum(R)} of these, the first \code{R[1]} corresponding to the
  uniform bootstrap and the remainder to the tilted bootstrap(s).
}
\item{R}{
  The input vector of the number of bootstrap replicates.
}
\item{data}{
  The original data as supplied.
}
\item{statistic}{
  The \code{statistic} function as supplied.
}
\item{sim}{
  The simulation type used in the bootstrap(s), it can either be
  \code{"ordinary"} or \code{"balanced"}.
}
\item{stype}{
  The type of statistic supplied, it is the same as the input value
  \code{stype}.
}
\item{call}{
  A copy of the original call to \code{tilt.boot}.
}
\item{strata}{
  The strata as supplied.
}
\item{weights}{
  The matrix of weights used.  If \code{R[1]} is greater than 0 then the
  first row will be the uniform weights and each subsequent row the
  tilted weights. If \code{R[1]} equals 0 then the uniform weights are
  omitted and only the tilted weights are output.
}
\item{theta}{
  The values of \code{theta} used for the tilted distributions.  These
  are either the input values or the values derived from the uniform
  bootstrap and \code{alpha}.
}
}
\references{
Booth, J.G., Hall, P. and Wood, A.T.A. (1993) Balanced importance resampling 
for the bootstrap. \emph{Annals of Statistics}, \bold{21}, 286--298.

Davison, A.C. and Hinkley, D.V. (1997) 
\emph{Bootstrap Methods and Their Application}. Cambridge University Press.

Hinkley, D.V. and Shi, S. (1989) Importance sampling and the nested bootstrap. 
\emph{Biometrika}, \bold{76}, 435--446.
}
\seealso{
\code{\link{boot}}, \code{\link{exp.tilt}}, \code{\link{Imp.Estimates}}, \code{\link{imp.weights}}, \code{\link{smooth.f}}
}
\examples{
# Note that these examples can take a while to run.

# Example 9.9 of Davison and Hinkley (1997).
grav1 <- gravity[as.numeric(gravity[,2]) >= 7, ]
grav.fun <- function(dat, w, orig) {
     strata <- tapply(dat[, 2], as.numeric(dat[, 2]))
     d <- dat[, 1]
     ns <- tabulate(strata)
     w <- w/tapply(w, strata, sum)[strata]
     mns <- as.vector(tapply(d * w, strata, sum)) # drop names
     mn2 <- tapply(d * d * w, strata, sum)
     s2hat <- sum((mn2 - mns^2)/ns)
     c(mns[2]-mns[1],s2hat,(mns[2]-mns[1]-orig)/sqrt(s2hat))
}
grav.z0 <- grav.fun(grav1, rep(1, 26), 0)
tilt.boot(grav1, grav.fun, R = c(249, 375, 375), stype = "w", 
          strata = grav1[,2], tilt = TRUE, index = 3, orig = grav.z0[1]) 


#  Example 9.10 of Davison and Hinkley (1997) requires a balanced 
#  importance resampling bootstrap to be run.  In this example we 
#  show how this might be run.  
acme.fun <- function(data, i, bhat) {
     d <- data[i,]
     n <- nrow(d)
     d.lm <- glm(d$acme~d$market)
     beta.b <- coef(d.lm)[2]
     d.diag <- boot::glm.diag(d.lm)
     SSx <- (n-1)*var(d$market)
     tmp <- (d$market-mean(d$market))*d.diag$res*d.diag$sd
     sr <- sqrt(sum(tmp^2))/SSx
     c(beta.b, sr, (beta.b-bhat)/sr)
}
acme.b <- acme.fun(acme, 1:nrow(acme), 0)
acme.boot1 <- tilt.boot(acme, acme.fun, R = c(499, 250, 250), 
                        stype = "i", sim = "balanced", alpha = c(0.05, 0.95), 
                        tilt = TRUE, index = 3, bhat = acme.b[1])
}
\keyword{nonparametric}