It seems to me that Pig 0.11.0-cdh4.3.0
doesn't include PIG-2507.
You either need to patch and rebuild Pig to make it work (Download the patch from here: https://issues.apache.org/jira/secure/attachment/12571848/PIG_2507.patch) or as a workaround you can create a custom UDF based on org.apache.pig.builtin.REPLACE
:
E.g:
package com.example;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.PigWarning;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.FrontendException;
public class MyReplace extends EvalFunc<String> {
private String searchString;
public MyReplace(String searchString) {
this.searchString = searchString;
}
@Override
public String exec(Tuple input) throws IOException {
if (input == null || input.size() < 2)
return null;
try {
String source = (String) input.get(0);
String replacewith = (String) input.get(1);
return source.replaceAll(searchString, replacewith);
}
catch (Exception e) {
warn("Failed to process input; error - " + e.getMessage(), PigWarning.UDF_WARNING_1);
return null;
}
}
@Override
public Schema outputSchema(Schema input) {
return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
}
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
List<FuncSpec> funcList = new ArrayList<FuncSpec>();
Schema s = new Schema();
s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
funcList.add(new FuncSpec(this.getClass().getName(), s));
return funcList;
}
}
Pack it in a jar then you can use it:
register '/path/to/my.jar';
DEFINE myReplace com.example.MyReplace(';');
A = load 'data' as (a:chararray);
B = FOREACH A generate myReplace(a,',');
...