I want to write kettle step in Java that takes input XML and XSLT file and returns output the transformed XML.
How do I do this? I have no idea from where to begin and there is not enough documentation to Java and Pentaho in the web.
I want to write kettle step in Java that takes input XML and XSLT file and returns output the transformed XML.
How do I do this? I have no idea from where to begin and there is not enough documentation to Java and Pentaho in the web.
I Found the answer code for transforming xml using xslt in kettle java :
import java.util.*;
import java.io.FileOutputStream;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
private int infilenameIndex;
private int xsltfilenameIndex;
private int outfilenameIndex;
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
Object[] r=getRow();
if (r==null) {
setOutputDone();
return false;
}
if (first == false) {
infilenameIndex = getInputRowMeta().indexOfValue(getParameter("infilename"));
if (infilenameIndex < 0) {
throw new KettleException("Field not found in the input row, check parameter 'infilename'!");
}
xsltfilenameIndex = getInputRowMeta().indexOfValue(getParameter("xsltfilename"));
if (xsltfilenameIndex < 0) {
throw new KettleException("Field not found in the input row, check parameter 'xsltfilename'!");
}
outfilenameIndex = getInputRowMeta().indexOfValue(getParameter("outfilename"));
if (outfilenameIndex < 0) {
throw new KettleException("Field not found in the input row, check parameter 'outfilename'!");
}
first=false;
}
String infilename = get(Fields.In, "infilename").getString(r);
String xsltfilename = get(Fields.In, "xsltfilename").getString(r);
String outfilename = get(Fields.In, "outfilename").getString(r);
Object[] outputRowData = RowDataUtil.resizeArray(r, data.outputRowMeta.size());
int outputIndex = getInputRowMeta().size();
transform(infilename, xsltfilename, outfilename);
putRow(data.outputRowMeta, outputRowData);
return true;
}
public void transform(String infilename, String xsltfilename, String outfilename) throws KettleException {
javax.xml.transform.stream.StreamSource inss = null;
javax.xml.transform.stream.StreamSource xsltss = null;
javax.xml.transform.stream.StreamResult outss = null;
logBasic("");
logBasic("Transformerar " + infilename + " med " + xsltfilename + " till " + outfilename );
logBasic("");
try {
inss = new javax.xml.transform.stream.StreamSource(infilename);
}
catch (Exception e) {
logError("Infil saknas " + infilename);
throw new KettleException(e);
}
try {
xsltss = new javax.xml.transform.stream.StreamSource(xsltfilename);
}
catch (Exception e) {
logError("Xsltfil saknas " + xsltfilename);
throw new KettleException(e);
}
try {
outss = new javax.xml.transform.stream.StreamResult(outfilename);
}
catch (Exception e) {
logError("Outfil saknas " + outfilename);
throw new KettleException(e);
}
try {
TransformerFactory tFactory = TransformerFactory.newInstance();
// Set the TransformerFactory to the SAXON implementation.
//tFactory = new net.sf.saxon.TransformerFactoryImpl();
Transformer transformer = tFactory.newTransformer(xsltss);
// Do the transfromtation
transformer.transform(inss, outss);
}
catch (Exception e) {
throw new KettleException(e);
}
return;
}
I can see how this would be preferable for large XML files as it allows for file/stream-based processing of the XML instead of holding the entire XML in a Pentaho variable. I maxed out my JVM heap due to a large generated XML dataset so I'm hopeful this code--or a variant--might allow me to not keep the full XML in memory... will report back!
Here's my refactoring/consolidation of the example code:
import java.util.*;
import java.io.FileOutputStream;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.*;
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
Object[] row = getRow();
if (row == null) {
setOutputDone();
return false;
}
String infilename = getStringField(row, "infilename");
String xsltfilename = getStringField(row, "xsltfilename");
String outfilename = getStringField(row, "outfilename");
Object[] outputRowData = RowDataUtil.resizeArray(row, data.outputRowMeta.size());
doXSLT(infilename, xsltfilename, outfilename);
putRow(data.outputRowMeta, outputRowData);
return true;
}
private String getStringField(Object[] row, String paramName) throws KettleException {
String value = get(Fields.In, paramName).getString(row);
if(value == null || "".equals(value)) {
throw new KettleException("XSLT setting '" + paramName + "' not found in input row");
} else {
return value;
}
}
public void doXSLT(String infilename, String xsltfilename, String outfilename) throws KettleException {
logBasic("");
logBasic("Transforming XML file '" + infilename + "' with XSLT '" + xsltfilename + "' and target file '" + outfilename + "'" );
logBasic("");
try {
StreamSource inss = new StreamSource("file:///"+infilename);
StreamSource xsltss = new StreamSource("file:///"+xsltfilename);
StreamResult outss = new StreamResult("file:///"+outfilename);
TransformerFactory tFactory = TransformerFactory.newInstance();
// Set the TransformerFactory to the SAXON implementation.
//tFactory = new net.sf.saxon.TransformerFactoryImpl();
Transformer transformer = tFactory.newTransformer(xsltss);
transformer.transform(inss, outss);
} catch (Exception e) {
logError("Error attempting XSLT: " + e);
throw new KettleException(e);
}
return;
}