options nocenter nodate ls=80;


/* 1. Here we read data from an existing ASCII file and then                 */
/*    sort and display the first 10 lines of the data set                    */
data RATS;
  infile "c:/WORKSHOP/rats.dat";
  input id start stop status enum trt;
run;

proc sort data=RATS; by id enum; run;
proc print data=RATS(obs=10); run;


/* 2. Here we fit a Semi-Parametric (Andersen-Gill) Poisson Model             */
/*    - '(start, stop)' indicates the subject is at risk during (start, stop] */
/*    - 'status(0)' indicates when status = 0 the time "stop" is censored     */
/*    - 'ties = breslow' (default method) specifies the method for handling   */
/*      ties in the failure times.  Other tie-handling methods are discrete,  */
/*      efron and exact.                                                      */
proc phreg data=RATS;
  model (start,stop)*status(0)= trt / ties=breslow;
run;

/* The above data are in the 'counting process' format.  The same estimates   */
/* can be obtained if we specify left truncation times.                       */
proc phreg data=RATS;
  model stop*status(0) = trt / entry=start ties=breslow;
run;


/* 2(A). Test of the Proportional Hazards Assumption                          */

/* (i) Similar to cox.zph(, tranform="log") in R or Splus                     */
/*     - 'trtt = trt*log(stop)' is a 'time-dependent' variable which is a     */
/*       function of the failure times in log scale                           */
proc phreg data=RATS;
  model (start,stop)*status(0)= trt trtt / ties=breslow;
  trtt = trt*log(stop);
run;

/* (ii) Similar to cox.zph(, transform="identity") in R or Splus              */
/*      - Here, no transformation is applied to the failure times             */
proc phreg data=RATS;
  model (start,stop)*status(0)= trt trtt / ties=breslow;
  trtt = trt*stop;
run;


/* 2(B). The residuals of a model are used to examine the lack of fit        */
/*       Keywords for residuals :                                            */
/*         resdev   = deviance residuals                                     */
/*         resmart  = martingale residuals                                   */
/*         ressch   = Schoenfeld residuals                                   */
/*         ressco   = score residuals                                        */
/*         wtressch = Weighted Schoenfeld residuals                          */
/*       - The output statement is included to obtain the specified          */
/*         residuals.                                                        */
proc phreg data=RATS;
  model (start,stop)*status(0)= trt / ties=breslow;
  output out=RESIDUALS resmart=martingale_res ressch=schoenfeld_res;
run;

proc print data=RESIDUALS; run;


/* 3. Here we fit a Semi-Parametric (Andersen-Gill) Model but obtain robust */
/*    variance estimates                                                    */
/*    - 'covs(aggregate)' or 'covsandwich(aggregate)' requests the robust   */
/*      covariance estimate (Lin & Wei, 1989; Lawless and Nadean, 1995).    */
/*    - The id statement must be specified to link lines in the data file.  */
proc phreg data=RATS covs(aggregate);
  model (start,stop)*status(0)= trt / ties=breslow fconv=1e-04;
  id id;
run;


/* 4. Nonparametric Estimates */

/* 4(A). Here we compute the cumulative mean function estimates for       */
/*       control and treatment arms separately                            */
/*       - The baseline statement will create a SAS data set, CMFDATA0    */
/*         that contains the specified nonparametric estimates.           */
/*       - Some available keywords are                                    */
/*           cmf       = cumulative mean function (Nelson (2002))         */
/*           cumhaz    = cumulative hazard function                       */
/*           stdcmf    = standard error of the cumulative mean function   */
/*                       [this gives robust variance estimates]           */
/*           stdcumhaz = standard error of the cumulative hazard function */
/*                       [this gives Poisson variance estimates]          */
/*       - Survival statistics are computed based on the empirical        */
/*         cumulative hazards function.                                   */
proc phreg data=RATS(where = (trt = 0)) covs(aggregate);
  model (start,stop)*status(0)=  / ties=breslow;
  id id;
  baseline out=CMFDATA0
           cmf=cmf stdcumhaz=cmf_se_poisson stdcmf=cmf_se_robust;
run;
 
proc print data=CMFDATA0;
  var stop cmf cmf_se_poisson cmf_se_robust;
run;

proc phreg data=RATS(where = (trt = 1)) covs(aggregate);
  model (start,stop)*status(0)=  / ties=breslow;
  id id;
  baseline out=CMFDATA1
           cmf=cmf stdcumhaz=cmf_se_poisson stdcmf=cmf_se_robust;
run;
  
proc print data=CMFDATA1;
  var stop cmf cmf_se_poisson cmf_se_robust;
run;

data CMFDATA;  set CMFDATA0; trt = 0; run;
data CMFDATA1; set CMFDATA1; trt = 1; run;
proc append base=CMFDATA data=CMFDATA1 force; run;


/* Here we create plot of cumulative mean function estimates for both arms */
goptions rotate=landscape papersize=(11,8.5);

title;
legend1 label=none shape=symbol(2, .8)
        value=(font=times height=.8 'CONTROL' 'TREATED');
axis1 label=(height=1 font=times angle=90 'NELSON-AALEN ESTIMATE') minor=(n=1);
axis2 label=(height=1 font=times 'TIME (DAYS)') minor=(n=1);

proc gplot data=CMFDATA;
  plot cmf*stop=trt / legend=legend1 vaxis=axis1 haxis=axis2 cframe=ligr;
  symbol1 interpol=stepLJ height=1 line=3 color=blue;
  symbol2 interpol=stepLJ height=1 line=1 color=red;
run;


/* 4(B). Here we computed the estimates of the cumulative mean function   */
/*       from an Andersen-Gill model with treatment effect                */
data NEWDATA;
  input trt;
  datalines;
0
1 
;
run;

/* - 'covariates=NEWDATA' is added so that the estimates are computed for */
/*   the specific values in this data set.                                */
/* - If 'nomean' is excluded, the estimates are based on the sample mean  */
/*   of the treatment variable, which does not make sense in this context */ 
proc phreg data=RATS covs(aggregate);
  model (start,stop)*status(0)= trt / ties=breslow;
  id id;
  baseline covariates=NEWDATA out=CMFDATA
           cmf=cmf stdcumhaz=cmf_se_poisson stdcmf=cmf_se_robust / nomean;
run;

proc print data=CMFDATA;
  var trt stop cmf cmf_se_poisson cmf_se_robust;
run;