I have a problem with a regular expression in hive, it does not recognize ";".
insert overwrite table prueba
SELECT
regexp_extract(col_value, '^(?:([^;]*)\;?){1}', 1) VARIABLE,
regexp_extract(col_value, '^(?:([^;]*)\;?){2}', 1) TipoType
from temp;
The error that occurs is:
H110 Unable to submit statement. Error while compiling statement:
FAILED: ParseException line 3:29 cannot recognize input near '^' ' (' '?'
In select expression [ERROR_STATUS]
Example Data:
VARIABLE;Tipo/Type;
FECHA;DATE;
ID_CLIENTE;CHAR;
CUS_TYPE;CHAR;
CUS_SUBTYPE;CHAR;
NUEVOTITU;NUMBER;
TITULAR;NUMBER;
BAJATITU;NUMBER;
.
.
.
Code:
drop table temp;
drop table prueba;
create table temp (col_value string);
LOAD DATA INPATH '/tmp/data/prueba.csv' OVERWRITE INTO TABLE temp;
create table prueba(variable string, tipotype string);
insert overwrite table prueba
SELECT
regexp_extract(col_value, '^(([^\;]*)\;){1}', 1) variable,
regexp_extract(col_value, '^(([^\;]*)\;){2}', 1) tipotype
from temp;
Temp table:
temp.col_value
Prueba table:
prueba.variable prueba.tipotype

(?:=>(. Try using^(([^;]*);?)with the limiting quantifiers. - Wiktor Stribiżew