0
votes

I have a WordCount MapReduce job, when it runs from hadoop cli it runs well and given the output. but when I run the job through oozie it throwing me error 'Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable'

Here is the code

package Drivers;

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordCount extends Configured implements Tool
{

    public static void main(String[] args) throws Exception 
    {
    int res = ToolRunner.run(new Configuration(), new WordCount(), args);
    System.exit(res);
    }

    @Override
    public int run(String[] args) throws Exception 
    {

    Job job = Job.getInstance(getConf(), "Tool Job");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(WordMap.class);
    job.setReducerClass(RedForSum.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
    }


    //map method
    public static class WordMap extends Mapper<LongWritable,Text,Text,IntWritable>
    {
        public void map(LongWritable k, Text v,Context con) throws IOException, InterruptedException
        {
            String line=v.toString();
            StringTokenizer t = new StringTokenizer(line);
            while(t.hasMoreTokens())
            {
                String word=t.nextToken();
                con.write(new Text(word),new IntWritable(1));
            }

        }
    }
    //reducer method
    public static class RedForSum extends Reducer<Text, IntWritable,Text,IntWritable>
    {
        public void reduce(Text k, Iterable<IntWritable> vlist, Context con) throws IOException, InterruptedException
        {
            int tot=0;
            for(IntWritable v:vlist)
             tot+=v.get();
            con.write(k, new IntWritable(tot));
        }
    }
}

my workflow.xml is here

    <workflow-app xmlns="uri:oozie:workflow:0.1" name="map-reduce-wf">
 <start to="mr-node"/>
 <action name="mr-node">
     <map-reduce>
       <job-tracker>${jobTracker}</job-tracker>
       <name-node>${nameNode}</name-node>
   <configuration>
     <property>
       <name>mapred.mapper.new-api</name>
       <value>true</value>
     </property>
     <property>
       <name>mapred.reducer.new-api</name>
       <value>true</value>
     </property>
     <property>
       <name>mapred.job.queue.name</name>
       <value>${queueName}</value>
     </property>
     <property>
       <name>mapreduce.mapper.class</name>
       <value>Drivers.WordCount$WordMap</value>
     </property>
     <property>
       <name>mapreduce.reducer.class</name>
       <value>Drivers.WordCount$RedForSum</value>
     </property>
     <property>
       <name>mapred.output.key.class</name>
       <value>org.apache.hadoop.io.Text</value>
     </property>
     <property>
       <name>mapred.output.value.class</name>
       <value>org.apache.hadoop.io.IntWritable</value>
     </property>
     <property>
       <name>mapred.input.dir</name>
       <value>${inputDir}</value>
     </property>
     <property>
       <name>mapred.output.dir</name>
       <value>${outputDir}</value>
     </property>
   </configuration>
  </map-reduce>
  <ok to="end"/>
  <error to="fail"/>
 </action>
   <kill name="fail">
   <message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
   </kill>
   <end name="end"/>
</workflow-app>

When I run through oozie

oozie job -oozie http://localhost:11000/oozie -config /home/cloudera/job.properties -run

it throwing me the error

Error: java.io.IOException: Type mismatch in key from map: expected org.apache.hadoop.io.Text, received org.apache.hadoop.io.LongWritable at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:1072) at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:715) at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89) at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112) at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124) at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671) at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)

Can someone please let me know where I was mistaken.

Thanks in Advance.

1

1 Answers

0
votes

The problem seems to be in workflow xml. here the property name should be mapreduce.map.class and mapreduce.reduce.class instead of mapreduce.mapper.class and mapreduce.reducer.class correspondingly. So modified workflow should have these properties.

<property>
       <name>mapreduce.map.class</name>
       <value>Drivers.WordCount$WordMap</value>
     </property>
     <property>
       <name>mapreduce.reduce.class</name>
       <value>Drivers.WordCount$RedForSum</value>
     </property>

For more on this please refere here