1
votes

example: there is a json array column(type:string) from a hive table like:

"[{"filed":"name", "value":"alice"}, {"filed":"age", "value":"14"}......]"

how to convert it into :

name      age
alice     14

by hive sql? I've tried lateral view explode but it's not working. thanks a lot!

1
Is it STRING column type?leftjoin
yes,string typeyoare

1 Answers

1
votes

This is working example of how it can be parsed in Hive. Customize it yourself and debug on real data, see comments in the code:

with your_table as (
select stack(1,
1,
'[{"field":"name", "value":"alice"}, {"field":"age", "value":"14"}, {"field":"something_else", "value":"somevalue"}]'
) as (id,str) --one row table with id and string with json. Use your table instead of this example
)


select id, 
       max(case when field_map['field'] = 'name' then field_map['value'] end) as name,
       max(case when field_map['field'] = 'age'  then field_map['value'] end) as age        --do the same for all fields 
from
(
select t.id,
       t.str as original_string,
       str_to_map(regexp_replace(regexp_replace(trim(a.field),', +',','),'\\{|\\}|"','')) field_map --remove extra characters and convert to map
  from your_table t
       lateral view outer explode(split(regexp_replace(regexp_replace(str,'\\[|\\]',''),'\\},','}|'),'\\|')) a as field --remove [], replace "}," with '}|" and explode 
) s 
group by id --aggregate in single row
; 

Result:

OK
id      name    age
1       alice   14

One more approach using get_json_object:

with your_table as (
select stack(1,
1,
'[{"field":"name", "value":"alice"}, {"field":"age", "value":"14"}, {"field":"something_else", "value":"somevalue"}]'
) as (id,str) --one row table with id and string with json. Use your table instead of this example
)


select id, 
       max(case when field = 'name' then value end) as name,
       max(case when field = 'age'  then value end) as age        --do the same for all fields 
from
(
select t.id,
       get_json_object(trim(a.field),'$.field') field,
       get_json_object(trim(a.field),'$.value') value
  from your_table t
       lateral view outer explode(split(regexp_replace(regexp_replace(str,'\\[|\\]',''),'\\},','}|'),'\\|')) a as field --remove [], replace "}," with '}|" and explode 
) s 
group by id --aggregate in single row
;

Result:

OK
id      name    age
1       alice   14