1
votes

I've got code as below and I would like to make an if statements [in new column, let's call it flag] on data test, where i could use macro defined quintiles [from proc univariate step 1 in macro]. In the end i would like to see if each record falls in defined quantiles or if not, flag should be something else.

I stopped on the step where I'm defining macro variables. How can I order SAS to define Q1 as pct10 which is output from proc univariate?

data test;
do x=1 to 100;
output;
end;
x=.;
output; output;
run;

example %quint(test,x,10 20 30 70)

%macro quint(input=,var=, pcts=);

/* calculate the cutpoints for the quintiles */
proc univariate data=&input;
  var &var;
  output out=quintile pctlpts=&pcts pctlpre=pct;
run;


/* write the quintiles to macro variables */
data _null_;
set quintile;
%do i=1 %to %sysfunc(countw(&pcts));
call symput(cats("Q",&i),cats("pct",%scan(&pcts,&i,' ')));
%put "&&Q&i";
%end;

run;

there should be data test with new column flag based on macrovariables created from proc univariate Q1 to Qx

%mend quint;
2

2 Answers

3
votes

No need for macro variables. Just use the real variables generated by PROC UNIVARIATE.

%macro quint(input=,output=,invar=,outvar=, pcts=);

* calculate the cutpoints for the quintiles ;
proc univariate noprint data=&input;
  var &invar;
  output out=cutpoints pctlpts=&pcts pctlpre=__pct;
run;

* Use cutpoints to calculate the RANK for each value ;
data &output ;
  if _n_=1 then set cutpoints ;
  array cutpoints __pct: ;
  drop __pct: ;
  set &input;
  if missing(&invar) then &outvar=0;
  else do &outvar=1 to dim(cutpoints) while(&invar>cutpoints[&outvar]); end;
run;

%mend quint;

So using your example data we could call the macro like this:

%quint(input=test,output=want,invar=x,outvar=rank,pcts=10 20 30 70)

And to test it let's see what the min/max values of X got assigned to each "rank".

proc means n min max data=want nway;
  class rank ;
  var x;
run;

Output:

enter image description here

1
votes

Another way of doing this I guess:

This assumes that the percentiles are specified in ascending order - else it can give wrong results.

%macro quint(input=,var=, pcts=);

proc univariate data=&input noprint;
  var &var;
  output out=quintile pctlpts=&pcts pctlpre=pct;
run;

%let ii = 1;

data pcts;
  if _N_ = 1 then set quintile;
  set &input;

  %do %while (%scan(&pcts, &ii) ne );
    %let q = %scan(&pcts, &ii);
    %if &ii = 1 %then %do;
      if x < pct&q then qunitile = &q;
    %end;
    %else %do;
      else if x < pct&q then qunitile = &q;
    %end;
    %let ii = %eval(&ii + 1);
  %end;

  drop pct:;

run;

proc print;
run;

%mend quint;

%quint(input=test,var=x,pcts=10 20 30 70);