/* 

The following program represents the second in a series of primers on basic commands in GAUSS.
Written by James Morley, Feb. 1st, 2008

In this case, we will conduct a simple Monte Carlo experiment to determine the size and power of t-tests

*/


new;    @this command clears the workspace memory and should be at the start of any program@

library pgraph;     @"library" tells GAUSS that it is going to read in an extra module, in this case "pgraph", which allows us to plot graphs directly in GAUSS@


/* The first substantive part of this program will set up globals for the Monte Carlo experiment*/


nsim=1000;  @This will determine the number of times a given experiment is conducted@
t=50;      @This sets the sample size in a given experiment@
nominal_size=0.05;  @This sets the nominal size (i.e., fraction of times we should draw wrong conclusion from test) for experiment when null is true@
diff_stdev=0.01;     @This determines the increments of how different the null is from truth in terms of standard deviations of data@

@Parameters for Data Generating Process (DGP)@
mu_true=1;       @This sets the true mean of the DGP@
sig_true=1;    @This sets the true std. dev. of the DGP@

diff=diff_stdev*sig_true;   @This sets the difference in the null from the true mu_true based on diff_stdev set above@

seed1=347878;   @sets the initial seed for the pseudo random number generator so that draws are the same each time you run program@


/* The second substantive part of this program represents a loop around the experiment*/

t_mat={};  @Creates space for storing t-stat for each experiment@
size=0; @We will add to this and then divide across experiments to calculate the size (% times conclusion is wrong (i.e., reject when true)) of the t-test@
        @Nominal size of test is 5%. We will check if this holds in small samples@
power=zeros(100,1); @As with size, we will add to this and divide across experiments to determine the power (% times conclusion is right (i.e., reject when false)) of the t-test@
                    @We will consider power for 100 different nulls that get progressively further from the truth@


j=1;
do until j>nsim;    @This starts the do loop for the experiments@


@Step 1: Draw Sample from true DGP@

y=mu_true+sig_true*rndns(t,1,seed1); @DGP is serially uncorrelated draws with mean mu_true and variance sig_true^2@


@Step 2: Construct Sample Statistics@


@Case 1: H0 is true. I.e., mu=mu_true@

X=ones(rows(y),1);  @regressor is just a constant@

b= inv(X'X)*X'y;

mu_hat=b;    @Least squares estimate of mu@
s2=(1/(t-1))*sumc((y-mu_hat)^2);  @Least squares estimate of sig^2@

var_b = inv(X'X)*s2;    @General formula for LS estimate of Variance of LS estimator of beta@
se_b = sqrt(diag(var_b));      @General formula for finding Standard Errors. In this case, there is only one parameter@

t_1 = (mu_hat - mu_true)/se_b;

t_mat=t_mat|t_1;     @store realized value of t-stat for this sample@

if abs(t_1)>cdftci(nominal_size/2,t-1); @This logic expression will be true if test stat is greater than critical value for a two-tailed test at nominal size level@
                              @E.g., if nominal size =5%, dividing by 2 means that the abs is in the upper 97.5% percentile of t-dist. This conforms to a two tailed-test at 5% because we took absolute value of t-stat@
                              @t-1 is the degrees of freedom for the t-distribution@

    size=size+1;    @This is a trick if you don't have enough memory to store each draw, but you only need an average. You sum for now and then divide by number of experiments at end to get average@

endif;


i=1;
do until i>100; @loop for power experiment for 100 different alternatives@


@Case 2: H0 is false. I.e., mu=mu_true+diff@

t_2 = (mu_hat - (mu_true+i*diff))/se_b;     @by adding i*diff, we get different alternatives that are progressively further from truth as i goes from 1 to 100@

if abs(t_2)>cdftci(nominal_size/2,t-1); @This logic expression will be true if test stat is greater than critical value for a two-tailed test at nominal size level@
                              @E.g., if nominal size =5%, dividing by 2 means that the abs is in the upper 97.5% percentile of t-dist. This conforms to a two tailed-test at 5% because we took absolute value of t-stat@
                              @t-1 is the degrees of freedom for the t-distribution@

    power[i]=power[i]+1;    @Again, this is a trick if you don't have enough memory to store each draw, but you only need an average. You sum for now and then divide by number of experiments at end to get average@

endif;

i=i+1;
endo;   @end of loop over alternatives for power experiment@


j=j+1;
endo; @end of loop for each simulated sample@


power=power/nsim; @expresses power of test as a fraction rather than a frequency@


@The following code shows how to plot multiple panel graphs@

begwind;
window(2,1,0);
setwind(1);
    title("Monte Carlo dist for t-stat");
    hist(t_mat,50);   @histogram with 50 equal sized bins@
nextwind;
    title("Power Function (x-axis is multiples of diff_stdev)");
    xy(rows(power),power);
endwind;


size=size/nsim; @expresses size as a fraction rather than a frequency in terms of experiments@

"Nominal Size=";;0.05;
"Actual Size=";;size;


end;