1
votes

I am trying to parse a delimited dataset with over 300 fields. Instead of listing all the input fields like

    data test;
    infile "delimited_filename.txt"
            DSD delimiter="|" lrecl=32767 STOPOVER;

    input   field_A:$200.
            field_B :$200.
            field_C:$200.
            /*continues on */
    ;

I am thinking I can dump all the field names into a file, read in as a sas dataset, and populate the input fields - this also gives me the dynamic control if any of the field names changes (add/remove) in the dataset. What would be some good ways to accomplish this?

Thank you very much - I just started sas, still trying to wrap my head around it.

2
Sounds like you're on the correct path. Have you tried Proc Import or do you want more control? - Reeza
Yes I use proc import to read in the file that defines the variable names, length, type, and label. From my extensive google, seems like the best way is to write a macro --> that read in the header information --> output the data infile input statement into a macro variable --> call that macro variable at the end of the macro. Haven't gotten it to work yet ... - adjfac
You'd have to post a sample of the file you have but you are on the correct track. You should also post the code you've tried but isn't working. - Reeza
I think I figured it out, posted the code below. - adjfac
Is the question how to write a SAS program to read a file when given the metadata about the file as data? If so you should provide an example of the metadata you have. To read a file you need to know the variable names, the order, the type (and for character variables the max length). You also need to know informat for any field (like a DATE or TIME field) that requires it to transform from text to stored value. You probably will also want to provide formats and labels. - Tom

2 Answers

0
votes

This worked for me - Basically "write" data open code using macro language and run it.

Note: my indata_header_file contains 5 columns: Variable_Name, Variable_Length, Variable_Type, Variable_Label, and Notes.

%macro ReadDsFromFile(filename_to_process, indata_header_file, out_dsname);

%local filename_to_process indata_header_file out_dsname;

/* This macro var contain code to read data file*/
%local read_code input_in_line; 
%put *** Processing file: &filename_to_process ...;

/* Read in the header file */
proc import OUT     = ds_header
        DATAFILE    = &indata_header_file.
        DBMS        = EXCEL REPLACE;        /* REPLACE flag */
        SHEET       = "Names";
        GETNAMES    = YES;
        MIXED       = NO;
        SCANTEXT    = YES; 
run;

%let id     = %sysfunc(open(ds_header));
%let NOBS   = %sysfunc(attrn(&id.,NOBS)); 
%syscall set(id); 

/* 
    Generates:
    data &out_dsname.;
    infile "&filename_to_process."
        DSD delimiter="|" lrecl=32767 STOPOVER FIRSTOBS=3;  
        input   
        '7C'x
*/

%let read_code = data &out_dsname. %str(;)
                        infile &filename_to_process.
                        DSD delimiter=%str("|") lrecl=32767 STOPOVER %str(;)
                        input ;

/*
    Generates:
    <field_name> : $<field_length>;
*/
%do i = 1 %to &NObs;
    %let rc             = %sysfunc(fetchobs(&id., &i));
    %let VAR_NAME       = %sysfunc(getvarc(&id., %sysfunc(varnum(&id., Variable_Name)) ));  
    %let VAR_LENGTH     = %sysfunc(getvarn(&id., %sysfunc(varnum(&id., Variable_Length)) ));    
    %let VAR_TYPE       = %sysfunc(getvarc(&id., %sysfunc(varnum(&id., Variable_Type)) ));  
    %let VAR_LABEL      = %sysfunc(getvarc(&id., %sysfunc(varnum(&id., Variable_Label)) ));
    %let VAR_NOTES      = %sysfunc(getvarc(&id., %sysfunc(varnum(&id., Notes)) ));      

    %if %upcase(%trim(&VAR_TYPE.)) eq CHAR %then 
        %let input_in_line = &VAR_NAME :$&VAR_LENGTH..;
    %else
        %let input_in_line = &VAR_NAME :&VAR_LENGTH.;

    /* append in_line statment to main macro var*/
    %let read_code = &read_code. &input_in_line. ;
%end; 

/* Close the fid */
%let rc = %sysfunc(close(&id));

%let read_code = &read_code. %str(;) run %str(;) ;

/* Run the generated code*/
&read_code.

%mend ReadDsFromFile;
0
votes

Sounds like you want to generate code based on metadata. A data step is actually a lot easier to code and debug than a macro. Let's assume you have metadata that describes the input data. For example let's use the metadata about the SASHELP.CARS. We can build our metadata from the existing DICTIONARY.COLUMNS metadata on the existing dataset. Let's set the INFORMAT to the FORMAT since that table does not have INFORMAT value assigned.

proc sql noprint ;
 create table varlist as
   select memname,varnum,name,type,length,format,format as informat,label
   from dictionary.columns
   where libname='SASHELP' and memname='CARS'
 ;
quit;

Now let's make a sample text file with the data in it.

filename mydata temp;
data _null_;
  set sashelp.cars ;
  file mydata dsd ;
  put (_all_) (:);
run;

Now we just need to use the metadata to write a program that could read that data. All we really need to do is define the variables and then add a simple INPUT firstvar -- lastvar statement to read the data.

filename code temp;
data _null_;
  set varlist end=eof ;
  by varnum ;
  file code ;
  if _n_=1 then do ;
    firstvar=name ; 
    retain firstvar ;
    put 'data ' memname ';'
      / '  infile mydata dsd truncover lrecl=1000000;'
    ;
  end;
  put '  attrib ' name 'length=' @;
  if type = 'char' then put '$'@ ;
  put length ;
  if informat ne ' ' then put @10 informat= ;
  if format ne ' ' then put @10 format= ;
  if label ne ' ' then put @10 label= :$quote. ;
  put '  ;' ;
  if eof then do ;
    put '  input ' firstvar '-- ' name ';' ;
    put 'run;' ;
  end;
run;

Now we can just run the generated code using %INCLUDE.

%include code / source2 ;