- Details
- Published: Friday, 17 April 2015 13:16
- Hits: 2565
The Sheepster is an avid SAS programmer, but my statistician friends are now learning and using R more and more. So, that means the Sheepster has to learn R! One of the things I tried to do was create a SAS Macro to create an R Data Frame from a SAS Dataset. Now, to be clear, I am creating the type of file R generates by using the SAVE command and which the LOAD command then reads in. This generated file is a text file which can be transferred to the location of your R software.
It took quite a while of trolling through the R source code from the R Project website in order to come up with this SAS Macro, so please consider the code below a BETA version! Let me know if it works for you or if you have any improvements!
options mlogic symbolgen mprint;
run;
data TestData(keep=PATKEY DOS DOSDT TEXT1 PI);
length PATKEY 8 DOS 8 TEXT1 $ 50 PI 8;
format DOS DATE9. DOSDT DATETIME.;
do i=1 to 13;
PATKEY=473573320;
DOS=input("25APR1964",date9.);
DOSDT=input("25APR1964:08:32:00",datetime.);
TEXT1="NOW IS THE TIME!";
pi=3.141592653589793;
output;
end;
run;
proc print data=TestData;
run;
*----------------------------------------------------------------------------*;
* Macro Name: SASDS2RDA *;
* *;
* SAS Version: 9.1.3 *;
* *;
* Purpose: This macro creates a text file containing the R-version of the *;
* XDR save ASCII format containing the data within the SAS dataset *;
* provided to it. *;
* *;
* Parameters: (1) SDSIN : The name of the desired SAS Dataset. *;
* (2) KEEPVARS : A blank-delimited list of variables in the SAS *;
* dataset named in SDSIN. *;
* (3) R_DFNM : The name of the R Data Frame. Please try to keep *;
* to 30 characters or less. *;
* (4) OUTFIL : The output filename on disk that will contain the *;
* R-version of the XDR data. Please surround this *;
* with %str() to prevent problems. Also, although *;
* not strictly required, please use the extension *;
* .rdata. *;
* *;
* Example: *;
* %SASDS2RDA(sdsin=MyData, *;
* keepvars=PATKEY DOS, *;
* r_dfnm=mydata1, *;
* outfil=%str(C:\TEMP\mydata.rdata) ) *;
* *;
* Usage: Step 1: Run this SAS macro on your desired dataset. *;
* Step 2: Download the file to the machine where you run R. *;
* Step 3: Issue the following command in R in order to load the data *;
* frame into the R environment: *;
* *;
* > load(file="C:\TEMP\mydata.rdata") *;
* *;
* Notes: *;
* 1. The name of the OUTFIL is not used in the creation of the R Dataframe *;
* name. The name of the data frame comes from the R_DFNM parameter. *;
* 2. The program handles DATETIME variables and applies the DATEPART() *;
* function to the value to write the value as a date to R. *;
* 3. The program handles DATE type variables, but does NOT apply DATEPART().*;
*----------------------------------------------------------------------------*;
%macro SASDS2RDA(sdsin=,keepvars=,r_dfnm=,outfil=);
%*-------------------------------------------------------*;
%* File handle to the output text file. *;
%*-------------------------------------------------------*;
filename _outtxt "&outfil.";
run;
%*-------------------------------------------------------*;
%* Determine the variables, their types and the lengths *;
%* within the incoming SAS dataset. *;
%*-------------------------------------------------------*;
data _tmp;
set &sdsin.(keep=&keepvars. obs=1);
run;
%Let m_dsid=%sysfunc(open(_tmp,i));
%Let m_NbrVars=%sysfunc(attrn(&m_dsid.,nvars));
%Let VN0=&m_NbrVars.;
%do i=1 %to &m_NbrVars.;
%Let VN&i.=%sysfunc(varname(&m_dsid.,&i.)); /* Variable Name */
%end;
%Let m_rc=%sysfunc(close(&m_dsid.));
proc datasets library=work nolist;
delete _tmp;
run;
quit;
%*-------------------------------------------------------*;
%* Process the incoming SAS dataset. *;
%*-------------------------------------------------------*;
data _null_;
file _outtxt lrecl=1000 termstr=LF;
/* Determine the length of the data frame name. */
LenOfDataFrameName = length("&r_dfnm.");
/* Output the RDA headers */
put "RDA2"
/"A"
/"2"
/"134401"
/"131840";
/* Output the R Data Frame header */
put "1026"
/"1"
/"9";
put LenOfDataFrameName;
put "&r_dfnm.";
/* Output the section headers */
put "787"
/"&VN0.";
/* Process each variable in turn. */
%do i=1 %to &VN0.;
/* Determine the variable`s type (N=numeric, C=character). Adjust if variable is a date (D) or datetime (T). */
VarType=vtype(&&VN&i.);
VarFmt=vformat(&&VN&i.);
if VarType="N" then do;
/* Check if this numeric variable has a date or datetime type format associated with it. */
if index(VarFmt,"DATETIME")>0 then do;
VarType="T";
end;
else if index(VarFmt,"DATE")>0 then do;
VarType="D";
end;
end;
/* Process each row in turn for this iteration`s variable. */
if VarType="C" then do;
/* Output headers */
put "16"
/NbrObs;
/* Output each character one at a time converting those characters to octal where necessary. */
do row=1 to NbrObs;
set &sdsin.(keep=&keepvars.) nobs=NbrObs point=row;
put "9";
LenOfText=length(&&VN&i.);
put LenOfText;
do l=1 to LenOfText;
AChar=strip(substr(&&VN&i.,l,1));
RankCode=rank(AChar);
if (RankCode<=32 | RankCode>126) then do;
ACharOctal=strip("\" || put(RankCode,octal3.-L));
put +(-1) ACharOctal@@;
end;
else do;
put +(-1) AChar@@;
end;
end;
put;
end;
end;
else if VarType="N" then do;
/* Output headers */
put "14"
/NbrObs;
do row=1 to NbrObs;
set &sdsin.(keep=&keepvars.) nobs=NbrObs point=row;
put &&VN&i.;
end;
end;
else if VarType="D" then do;
/* DATE Only -- Must output numeric values. */
/* Need to Adjust SAS date values with origin at 1/1/1960 */
/* to R POSIXct Date Class origin of 1/1/1970. */
/* Output headers */
put "782"
/NbrObs;
do row=1 to NbrObs;
set &sdsin.(keep=&keepvars.) nobs=NbrObs point=row;
InDateValue=&&VN&i.;
OutDateValue=InDateValue - 3653;
put OutDateValue;
end;
/* Output Footers -- these indicate to R that these are the POSIXct Date Class values. */
put "1026"
/"1"
/"9"
/"5"
/"class";
put "16"
/"1"
/"9"
/"4"
/"Date";
put "254";
end;
else if VarType="T" then do;
/* DATETIME Only */
/* Use DATEPART() on these values! */
/* Need to Adjust SAS date values with origin at 1/1/1960 */
/* to R POSIXct Date Class origin of 1/1/1970. */
/* Output headers */
put "782"
/NbrObs;
do row=1 to NbrObs;
set &sdsin.(keep=&keepvars.) nobs=NbrObs point=row;
InDateValue=datepart(&&VN&i.);
OutDateValue=InDateValue - 3653;
put OutDateValue;
end;
/* Output Footers -- these indicate to R that these are the POSIXct Date Class values. */
put "1026"
/"1"
/"9"
/"5"
/"class";
put "16"
/"1"
/"9"
/"4"
/"Date";
put "254";
end;
%end;
/* Output the variable headers. */
put "1026"
/"1"
/"9"
/"5"
/"names"
/"16"
/"&VN0.";
%do i=1 %to &VN0.;
put "9";
LenOfVarName=length("&&VN&i.");
put LenOfVarName;
put "&&VN&i.";
%end;
/* Output the data frame headers. */
put "1026"
/"767"
/"16"
/"1"
/"9"
/"10"
/"data.frame";
/* Output the row names headers. */
put "1026"
/"1"
/"9"
/"9"
/"row.names";
put NbrObs;
put "2"
/"NA";
NegOneNbrObs=-1*NbrObs;
put NegOneNbrObs;
/* Output ending of file values. */
put "254"
/"254";
/* Stop processing...required for POINT=. */
stop;
run;
%*-------------------------------------------------------*;
%* Clear the filename. *;
%*-------------------------------------------------------*;
filename _outtxt clear;
run;
%mend SASDS2RDA;
%SASDS2RDA(sdsin=TestData,
keepvars=PATKEY DOS DOSDT TEXT1 PI,
r_dfnm=testdata,
outfil=%str(C:\TEMP\testdata.rdata));