The Sheepster is an avid SAS programmer, but my statistician friends are now learning and using R more and more. So, that means the Sheepster has to learn R! Smile One of the things I tried to do was create a SAS Macro to create an R Data Frame from a SAS Dataset. Now, to be clear, I am creating the type of file R generates by using the SAVE command and which the LOAD command then reads in. This generated file is a text file which can be transferred to the location of your R software.

It took quite a while of trolling through the R source code from the R Project website in order to come up with this SAS Macro, so please consider the code below a BETA version! Let me know if it works for you or if you have any improvements!

options mlogic symbolgen mprint;
run;

data TestData(keep=PATKEY DOS DOSDT TEXT1 PI);
 length PATKEY 8 DOS 8 TEXT1 $ 50 PI 8;
 format DOS DATE9. DOSDT DATETIME.;
 do i=1 to 13;
  PATKEY=473573320;
  DOS=input("25APR1964",date9.);
  DOSDT=input("25APR1964:08:32:00",datetime.);
  TEXT1="NOW IS THE TIME!";
  pi=3.141592653589793;
  output;
 end;
run;

proc print data=TestData;
run;

*----------------------------------------------------------------------------*;
* Macro Name: SASDS2RDA                                                      *;
*                                                                            *;
* SAS Version: 9.1.3                                                         *;
*                                                                            *;
* Purpose: This macro creates a text file containing the R-version of the    *;
*          XDR save ASCII format containing the data within the SAS dataset  *;
*          provided to it.                                                   *;
*                                                                            *;
* Parameters: (1) SDSIN : The name of the desired SAS Dataset.               *;
*             (2) KEEPVARS : A blank-delimited list of variables in the SAS  *;
*                            dataset named in SDSIN.                         *;
*             (3) R_DFNM : The name of the R Data Frame.  Please try to keep *;
*                          to 30 characters or less.                         *;
*             (4) OUTFIL : The output filename on disk that will contain the *;
*                          R-version of the XDR data.  Please surround this  *;
*                          with %str() to prevent problems.  Also, although  *;
*                          not strictly required, please use the extension   *;
*                          .rdata.                                           *;
*                                                                            *;
* Example:                                                                   *;
*          %SASDS2RDA(sdsin=MyData,                                          *;
*                     keepvars=PATKEY DOS,                                   *;
*                     r_dfnm=mydata1,                                        *;
*                     outfil=%str(C:\TEMP\mydata.rdata) )                    *;
*                                                                            *;
* Usage: Step 1: Run this SAS macro on your desired dataset.                 *;
*        Step 2: Download the file to the machine where you run R.           *;
*        Step 3: Issue the following command in R in order to load the data  *;
*                frame into the R environment:                               *;
*                                                                            *;
* > load(file="C:\TEMP\mydata.rdata")                                        *;
*                                                                            *;
* Notes:                                                                     *;
*  1. The name of the OUTFIL is not used in the creation of the R Dataframe  *;
*     name.  The name of the data frame comes from the R_DFNM parameter.     *;
*  2. The program handles DATETIME variables and applies the DATEPART()      *;
*     function to the value to write the value as a date to R.               *;
*  3. The program handles DATE type variables, but does NOT apply DATEPART().*;
*----------------------------------------------------------------------------*;
%macro SASDS2RDA(sdsin=,keepvars=,r_dfnm=,outfil=);
 
 %*-------------------------------------------------------*;
 %* File handle to the output text file.                  *;
 %*-------------------------------------------------------*;
 filename _outtxt "&outfil.";
 run;

 %*-------------------------------------------------------*;
 %* Determine the variables, their types and the lengths  *;
 %* within the incoming SAS dataset.                      *;
 %*-------------------------------------------------------*;
 data _tmp;
  set &sdsin.(keep=&keepvars. obs=1);
 run;
 %Let m_dsid=%sysfunc(open(_tmp,i));
 %Let m_NbrVars=%sysfunc(attrn(&m_dsid.,nvars));
 %Let VN0=&m_NbrVars.;
 %do i=1 %to &m_NbrVars.;
  %Let VN&i.=%sysfunc(varname(&m_dsid.,&i.)); /* Variable Name */
 %end;
 %Let m_rc=%sysfunc(close(&m_dsid.));

 proc datasets library=work nolist;
  delete _tmp;
 run;
 quit;

 %*-------------------------------------------------------*;
 %* Process the incoming SAS dataset.                     *;
 %*-------------------------------------------------------*;
 data _null_;

  file _outtxt lrecl=1000 termstr=LF;

 
  /* Determine the length of the data frame name. */
  LenOfDataFrameName = length("&r_dfnm.");

  /* Output the RDA headers */
  put "RDA2"
     /"A"
     /"2"
     /"134401"
     /"131840";

  /* Output the R Data Frame header */
  put "1026"
     /"1"
     /"9";
  put LenOfDataFrameName;
  put "&r_dfnm.";

  /* Output the section headers */
  put "787"
     /"&VN0.";

  /* Process each variable in turn. */
  %do i=1 %to &VN0.;

   /* Determine the variable`s type (N=numeric, C=character).  Adjust if variable is a date (D) or datetime (T). */
   VarType=vtype(&&VN&i.);
   VarFmt=vformat(&&VN&i.);
   if VarType="N" then do;
    /* Check if this numeric variable has a date or datetime type format associated with it. */
    if index(VarFmt,"DATETIME")>0 then do;
     VarType="T";
    end;
    else if index(VarFmt,"DATE")>0 then do;
     VarType="D";
    end;
   end;

   /* Process each row in turn for this iteration`s variable. */
   if VarType="C" then do;

    /* Output headers */
    put "16"
       /NbrObs;

    /* Output each character one at a time converting those characters to octal where necessary. */
    do row=1 to NbrObs;
     set &sdsin.(keep=&keepvars.) nobs=NbrObs point=row;
     put "9";
     LenOfText=length(&&VN&i.);
     put LenOfText;
     do l=1 to LenOfText;
      AChar=strip(substr(&&VN&i.,l,1));
      RankCode=rank(AChar);
      if (RankCode<=32 | RankCode>126) then do;
       ACharOctal=strip("\" || put(RankCode,octal3.-L));
       put +(-1) ACharOctal@@;
      end;
      else do;
       put +(-1) AChar@@;
      end;
     end;
     put;
    end;

   end;
   else if VarType="N" then do;

    /* Output headers */
    put "14"
       /NbrObs;

    do row=1 to NbrObs;
     set &sdsin.(keep=&keepvars.) nobs=NbrObs point=row;
     put &&VN&i.;
    end;

   end;
   else if VarType="D" then do;

    /* DATE Only -- Must output numeric values. */
    /* Need to Adjust SAS date values with origin at 1/1/1960 */
    /* to R POSIXct Date Class origin of 1/1/1970. */
    
    /* Output headers */
    put "782"
    /NbrObs;

    do row=1 to NbrObs;
     set &sdsin.(keep=&keepvars.) nobs=NbrObs point=row;
     InDateValue=&&VN&i.;
     OutDateValue=InDateValue - 3653;
     put OutDateValue;
    end;

    /* Output Footers -- these indicate to R that these are the POSIXct Date Class values. */
    put "1026"
       /"1"
       /"9"
       /"5"
       /"class";
    put "16"
       /"1"
       /"9"
       /"4"
       /"Date";
    put "254";

   end;
   else if VarType="T" then do;

    /* DATETIME Only */
    /* Use DATEPART() on these values! */
    /* Need to Adjust SAS date values with origin at 1/1/1960 */
    /* to R POSIXct Date Class origin of 1/1/1970. */
    
    /* Output headers */
    put "782"
       /NbrObs;

    do row=1 to NbrObs;
     set &sdsin.(keep=&keepvars.) nobs=NbrObs point=row;
     InDateValue=datepart(&&VN&i.);
     OutDateValue=InDateValue - 3653;
     put OutDateValue;
    end;

    /* Output Footers -- these indicate to R that these are the POSIXct Date Class values. */
    put "1026"
       /"1"
       /"9"
       /"5"
       /"class";
    put "16"
       /"1"
       /"9"
       /"4"
       /"Date";
    put "254";

   end;

  %end;

  /* Output the variable headers. */
  put "1026"
     /"1"
     /"9"
     /"5"
     /"names"
     /"16"
     /"&VN0.";
  %do i=1 %to &VN0.;
   put "9";
   LenOfVarName=length("&&VN&i.");
   put LenOfVarName;
   put "&&VN&i.";
  %end;

  /* Output the data frame headers. */
  put "1026"
     /"767"
     /"16"
     /"1"
     /"9"
     /"10"
     /"data.frame";

  /* Output the row names headers. */
  put "1026"
     /"1"
     /"9"
     /"9"
     /"row.names";
  put NbrObs;
  put "2"
     /"NA";
  NegOneNbrObs=-1*NbrObs;
  put NegOneNbrObs;
 
  /* Output ending of file values. */
  put "254"
     /"254";

  /* Stop processing...required for POINT=. */
  stop;

 run;

 %*-------------------------------------------------------*;
 %* Clear the filename.                                   *;
 %*-------------------------------------------------------*;
 filename _outtxt clear;
 run;

%mend SASDS2RDA;
%SASDS2RDA(sdsin=TestData,
           keepvars=PATKEY DOS DOSDT TEXT1 PI,
           r_dfnm=testdata,
           outfil=%str(C:\TEMP\testdata.rdata));