0
votes

I want the data to be transposed (date) and arranged accordingly and fillin gaps Eg:enter image description here

/*COUNT ID BASED ON bucket and Sold_month*/
    PROC SQL;
CREATE TABLE want AS
SELECT bucket, Sold_month,COUNT(ID) as IID from TRANS_2 GROUP by bucket, 
Sold_month;
quit;

/*Format date*/
proc datasets ;
modify want;
FORMAT Sold_month YYMMN6.;
quit;

/*TRANSPOSE*/
PROC TRANSPOSE data=want out=wantf  (drop=_:)  prefix=D_ ;
by bucket;
var IID;
id Sold_month;
format IID best.;
run;

/*Sorting the dates Sold month*/
proc contents data=wantf out=col_names(keep=name) noprint;
run; 

proc sort data=col_names out=col_names_sorted;
 by name;
 run; 
 proc sql;
 create table col_names_sorted_n as
 select name from col_names_sorted where name<>'bucket';
 run;


 data _null_;
 set col_names_sorted_n;
 by name;
 retain sorted_cols;
 length sorted_cols $2500.;
 if _n_ = 1 then sorted_cols =name ;
 else sorted_cols = catx(' ', sorted_cols, name);
 call symput('sorted_cols', sorted_cols);
 run; 

  %put &sorted_cols;
  /* Final SOLD DATA- */
data output_sorted;
retain bucket &sorted_cols;
set wantf ;
run;

/*ADDING 0 to missing values*/
proc stdize data=output_sorted out=Transaction reponly missing=0;
run;

This works but I dont get the missing column names and rows which I want to get by seeing the maximum and minimum values of bucket(0-max) and sold_dt(all year_month from min year-month to max year-month). I dont want to manually enter all the columns/buckets but derive the max and min and do. But bucket should start from 0 to max available

1

1 Answers

1
votes

Might be easiest to just use PROC SUMMARY with COMPLETETYPES option.

proc summary data=TRANS_2 nway completetypes ;
  class bucket sold_month ;
  var id;
  format sold_month yymmn6. ;
  output out=tall n=iid;
run;

proc transpose data=tall out=want(drop=_name_) prefix=d_ ;
  by bucket ;
  id sold_month;
  var iid ;
run;

If you have totally missing BUCKET values, for example you have no records for BUCKET=4 at all, then you will need add some records. Might be easier to add them on the summarized table in which case you could skip the COMPLETETYPES and do it on your own. So summarize the data:

proc sql noprint;
  create table tall as
    select bucket
         , intnx('month',sold_month,0,'b') as sold_month format=yymmn6.
         , count(id) as IID
    from TRANS_2
    group by 1,2
  ;
  create table tall_dummy as 
    select max(bucket) as bucket
         , min(sold_month) as min   
         , max(sold_month) as max
    from tall
  ;
quit;

Generate table of all bucket numbers and and all months with zero for count.

data tall_dummy ;
  set tall_dummy ;
  keep bucket sold_month iid;
  iid=0;
  do bucket = 0 to bucket ;
    do i=0 to intck('month',min,max);
      sold_month=intnx('month',min,i,'b') ;
      output;
    end;
  end;
  format sold_month yymmn6.;
run;

Then merge with the actually bucket*month combinations and replace the zeros with actual counts.

data tall_full ;
  merge tall_dummy tall ;
  by bucket sold_month;
run; 

Then you can transpose.

proc transpose data=tall_full out=want(drop=_name_) prefix=d_ ;
  by bucket ;
  id sold_month;
  var iid ;
run;