1
votes

I am trying to import a large CSV file (approximately 7k variables and 355 observations). Proc Import stops reading variable names after ~2k columns, I'm not really sure why. I have found that using infile will get me the whole csv into SAS, but the variable names are in the first row, and the variables are named v1-vn. I just need to take the variable names from the first row, and modify/rename my dataset using those.

So far I have: Used infile and transpose to get all of my variable names into one column in a separate dataset. Used proc sql to select this column into a list. Banged my head against my keyboard for a day and a half using macros and this list to try and modify the original variable names.

I have used the code below in my most recent (unsuccessful) attempt. Bear in mind that with ~7k variables I cannot rename them by hand, or even an appreciable fraction of them. I need to use do loops or macros in some way to do this, or else get infile to properly read in variable names.

data LabImportRaw;
    length v1-v6876 $300;
    infile 'C:\xxxxxxxxxxxx\LabImportListing.csv' delimiter=',' firstobs=2 missover lrecl=250000;
    input v1-v6876 ;
run;
data LabImportVNames;
    length v1-v6876 $300;
    infile 'C:\xxxxxxxxxx\LabImportListing.csv' delimiter=',' obs=1 missover lrecl=250000;
    input v1-v6876 ;
    Array VNames(6876) v1-v6876;
run;

proc transpose
data=LabImportVNames
Out=LabImportVNames;
var v1-v6876;
run;

*Create a list of new variable names;
proc sql;
 select Col1
 into :renamelist
 from LabImportVNames;
quit;

*Create Rename Macro;
%macro rename(oldvarlist, newvarlist);
  %let k=1;
  %let old = %scan(&oldvarlist, &k);
  %let new = %scan(&newvarlist, &k);
     %do %while(("&old" NE "") & ("&new" NE ""));
      rename &old = &new;
      %let k = %eval(&k + 1);
      %let old = %scan(&oldvarlist, &k);
      %let new = %scan(&newvarlist, &k);
  %end;
%mend;

*Do the renames;
proc datasets lib=work;
modify LabImportRaw;
%rename(v1-v6786, renamelist)
run;
3

3 Answers

0
votes

You're on the right track, but I wouldn't use the macro looping; just construct a renaming macro that works on 1 variable and call that 1000 times or whatever.

The below will work if the &renamelist fits into a macro variable; depending on your variable names it might not (I'd even say probably will not). You can get around that a few ways; you can shorten %rename to %r or something like that (saves 5 characters*number of variables), you can use filtering criteria to create two or more lists (the first 1000, the next 1000, etc.), or instead of using PROC SQL you can use a data step and write the macro calls to a temporary file, then include that.

%macro rename(oldvar,newvar);
  rename &oldvar.=&newvar.;
%mend rename;

proc sql;
 select cats('%rename(',_name_,',',Col1,')')
  into :renamelist separated by ' '
  from LabImportVNames;
quit;

proc datasets;
 modify LabImportRaw;
 &renamelist;
quit;
0
votes

I would do something different. Start reading all the variable names into one variable of a dataset:

data LabImportVNames;
    length var $300;
    infile 'MyPath\LabImportListing.csv' delimiter=',' obs=1  lrecl=250000;
    input var @@ ;
run;

then use this dataset to write your input code directly with the correct variable names (I just took your step and had sas write it with put statements), you write your code into an external file in two pieces because you need to go through the variable list twice and the second time you write into the file you append (option mod)

data _NULL_;
file "MyPath\ReadCSV.sas";
set LabImportVNames end=fine;
if _N_=1 then do;
put "data LabImportRaw;";
put "    length        ";
end;
put "       " var       ;
if fine then
put "   $300;";
run;    


data _NULL_;
file "MyPath\ReadCSV.sas" mod;
set LabImportVNames end=fine;
if  _N_=1 then do;
    put "infile 'C:\xxxxxxxxxxxx\LabImportListing.csv' delimiter=',' firstobs=2 missover lrecl=250000;";
    put "input                                                                                        ";
end;
put "      " var;
if fine then do; 
    put "    ;";
    put "run;";
end;
run;

and in the end you include the code:

%include "MyPath\ReadCSV.sas";
0
votes

You can store all the variables in macro variables by using the following method. You don't have to specify number of variables either. Just a bit customized for my own data but it should give you some clues.

%macro simport(inname,outname);
data vars&i;
    length v1-v10000 $10;
    infile "&inname" delimiter=',' obs=1 missover dsd lrecl=250000;
    input v1-v10000 ;
    Array VNames(10000) v1-v10000;
run;

proc transpose
data=vars
Out=vars;
var v1-v10000;
run;

/* You can only extract valid variables*/
data vars;
set  vars;
if col1^=' ' then output;
run;

data _null_;
set  vars end=eof;
call symput("var"||left(_n_),compress(COL1));
if eof then call symput("vobs",left(_n_));
run;
%put &vobs;
%put &var1;

data &outname;
    infile "&inname" delimiter=',' firstobs=2 missover DSD lrecl=250000;
%do i=1 %to &vobs;
    %let m=%sysfunc(mod((&i-1),6));
    %if &m=0 %then %do;
    informat &&var&i mmddyy10.;
    %end;
    %else %do;
    informat &&var&i best32.;
    %end;
%end;
%do i=1 %to &vobs;
    %let m=%sysfunc(mod((&i-1),6));
    %if &m=0 %then %do;
    format &&var&i mmddyy10.;
    %end;
    %else %do;
    format &&var&i best12.;
    %end;
%end;
    input 
%do i=1 %to &vobs;
        &&var&i 
%end;
;
run;
%mend simport;
options nomprint;
%simport(%str(E:\Users\test\Dropbox\TradingData\Stocks\Master\CSV\STOCK1.csv),Dstocks.master1);