Announcement
/* The bootstrap macro is an improved (by me) version of the one at
http://www.utexas.edu/cc/docs/stat56.html */
/* The outfile libname specifies the location where the results file
is to be saved, It need not be saved. In that case, remove
outfile. from the proc datasets and proc append below. */
options ls=80 ps=32767;
libname outfile 'c:\';
/* Use the following data step to generate a set of random
normal variables with correlation 'rho' between any two
of them.*/
data analysis;
rho = .5;
c = (rho/(1-rho))**0.5;
do i = 1 to 100;
d = c * rannor(1810);
y = rannor(0) + d;
x1 = rannor(0) + d;
x2 = rannor(0) + d;
x3 = rannor(0) + d;
x4 = rannor(0) + d;
x5 = rannor(0) + d;
x6 = rannor(0) + d;
x7 = rannor(0) + d;
x8 = rannor(0) + d;
x9 = rannor(0) + d;
x10 = rannor(0) + d;
output;
end;
keep y x1 x2 x3 x4 x5 x6 x7 x8 x9 x10;
/*-------------------------------------------------------------------
The results will be stored in a permanent dataset called 'results.
The output from each model is appended to the end of the file.
The' proc datasets' commands erase the file if it already exists.
Without them, the results from a run of the program will be
appended to whatever results had been collected previously. Be
sure to move an existing results file if you want to keep it.
-----------------------------------------------------------------*/;
proc datasets lib=outfile;
delete results;
/*-------------------------------------------------------------------
The following commands apply the procedure to the original data
set.
-----------------------------------------------------------------*/;
proc reg data=analysis;
model y = x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 /
selection=forward sle=0.05 ;
proc reg data=analysis;
model y = x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 /
selection=cp;
run;
*Bootstrap models; %MACRO boot;
/*-------------------------------------------------------------------
In the next line, the number at the end of the %DO statement is
the number of bootstrap samples that will be generated.
-----------------------------------------------------------------*/;
%DO i=1 %to 100;
/*-------------------------------------------------------------------
(3) The following code generates the temporary dataset "bootstrp",
which is obtained from the temporary dataset "analysis" (see the
'set' command three lines down.
'n' is the number of cases in the analysis file. Its value is
set by the NOBS option in the set command. The NOBS option is
run before the DO loop is executed, which makes 'n' available
in the 'CHOICE=' statment and choice can be used in the POINT
option.
'23456' (two lines down) is the seed for the random number
generator (if a random sample has not been generated), which
controls where the random number stream starts. The same results
are generated whenever the the same positive initial seed is used.
A non-positive initial seed generates different samples every
time the program is run.
'j+1' indicates a counter. Counters are initilized to 0 and are
incremented the indicated amount each time the command is
encounterd. Here, it will be incremented by 1 each time at the
start of the loop.
-----------------------------------------------------------------*/;
data bootstrp;
choice = INT(RANUNI(23456+&i)*n)+1;
set analysis POINT = choice NOBS = n;
ch = choice;
j+1;
IF j > n then stop;
run;
/*-----------------------------------------------------------------
(2) The following code applies the procedure to each bootstrap data set.
Notice that the name of the dataset being analyzed is the
temporary dataset "bootstrp".
-----------------------------------------------------------------*/;
proc reg data=bootstrp;
model y = x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 / details=summary
selection=forward sle=0.05 ;
ods output SelectionSummary=model;
run;
proc append base=outfile.results data=model;
%end;
%MEND boot;
%boot
proc freq data=outfile.results;
tables varentered ;
run;