0
votes

The purpose is to check if values for column 2 and 3 in file1 match with column 1 in file2. If any value match, then replace values in file2 for column 2 and 3 using the information of file1 columns 4 and 5.

file1

100,31431,37131,999991.70,2334362.30
100,31431,37471,111113.20,2334363.30
100,31433,36769,777775.60,2334361.90
102,31433,36853,333322.00,2334362.80

file2

3143137113 318512.50 2334387.50 100
3143137131 318737.50 2334387.50 100
3143137201 319612.50 2334387.50 100
3143137219 319837.50 2334387.50 100
3143137471 322987.50 2334387.50 100
3143137491 323237.50 2334387.50 100
3143336687 313187.50 2334412.50 100
3143336723 313637.50 2334412.50 100
3143336769 314212.50 2334412.50 100
3143336825 314912.50 2334412.50 100
3143336853 315262.50 2334412.50 102

Output desired

31431,37113,318512.50,2334387.50,100
31431,37131,999991.70,2334362.30,100
31431,37201,319612.50,2334387.50,100
31431,37219,319837.50,2334387.50,100
31431,37471,111113.20,2334363.30,100
31431,37491,323237.50,2334387.50,100
31433,36687,313187.50,2334412.50,100
31433,36723,313637.50,2334412.50,100
31433,36769,777775.60,2334361.90,100
31433,36825,314912.50,2334412.50,100
31433,36853,333322.00,2334362.80,102

I tried

awk -F[, ] 'FNR==NR{a[$1 $2]=$0;next}$1 in a{print $0 ,a[$1 $2]}' file1 file2

Thanks in advance

2
In your samples 3143137113 is not present in Input_file1 but still your shown output shows it as 31431,37113 please elaborate more that how it has come? - RavinderSingh13
Hi Ravinder, Value 3143137113 is not present in file1.. The purpose is to check if values in file1 are in file2 then, replace the columns 2 and 3 in file2 using values of columns 4 and 5 from file1 - OXXO

2 Answers

2
votes

Could you please try following.

awk '
BEGIN{
  OFS=","
}
FNR==NR{
  a[$2 $3]=$2 OFS $3
  b[$2 $3]=$4;c[$2 $3]=$5
  next
}
($1 in a){
  $2=b[$1]
  $3=c[$1];$1=a[$1]
  print
  next
}
{
  $1=$1
  sub(/^...../,"&,",$1)
  print
}
' FS=","   file1  FS=" " file2

Output will be as follows.

31431,37113,318512.50,2334387.50,100
31431,37131,999991.70,2334362.30,100
31431,37201,319612.50,2334387.50,100
31431,37219,319837.50,2334387.50,100
31431,37471,111113.20,2334363.30,100
31431,37491,323237.50,2334387.50,100
31433,36687,313187.50,2334412.50,100
31433,36723,313637.50,2334412.50,100
31433,36769,777775.60,2334361.90,100
31433,36825,314912.50,2334412.50,100
31433,36853,333322.00,2334362.80,102
1
votes

Try this:

$ awk -F, 'NR==FNR{tmp=$0;sub($1 FS,"",tmp);a[$2 $3]=tmp;next} $1 in a{print a[$1],$NF;next} {$1=substr($1,1,5) OFS substr($1,6,5);} 1' OFS=, file1 FS=' ' file2
31431,37113,318512.50,2334387.50,100
31431,37131,999991.70,2334362.30,100
31431,37201,319612.50,2334387.50,100
31431,37219,319837.50,2334387.50,100
31431,37471,111113.20,2334363.30,100
31431,37491,323237.50,2334387.50,100
31433,36687,313187.50,2334412.50,100
31433,36723,313637.50,2334412.50,100
31433,36769,777775.60,2334361.90,100
31433,36825,314912.50,2334412.50,100
31433,36853,333322.00,2334362.80,102

Above assumes $1 of file does not include regex characters, so to be accurate and safe, better use this:

awk -F, 'NR==FNR{$1="";a[$2 $3]=substr($0,2);next} $1 in a{print a[$1],$NF;next} {$1=substr($1,1,5) OFS substr($1,6,5);} 1' OFS=, file1 FS=' ' file2

However this one assumes the FS of file1 is 1 character only.

And that leads to another change/efficiency improvement:

awk -F, 'NR==FNR{a[$2 $3]=substr($0,length($1 FS)+1);next} $1 in a{print a[$1],$NF;next} {$1=substr($1,1,5) OFS substr($1,6,5);} 1' OFS=, file1 FS=' ' file2