I am new to Hadoop's MapReduce. I have written a map reduce task and I am trying to run that on my local machine. But the job hangs after map 100%.
Below is the code, I don't understand what am I missing.
I have a custom key class
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
public class AirlineMonthKey implements WritableComparable<AirlineMonthKey>{
Text airlineName;
Text month;
public AirlineMonthKey(){
super();
}
public AirlineMonthKey(Text airlineName, Text month) {
super();
this.airlineName = airlineName;
this.month = month;
}
public Text getAirlineName() {
return airlineName;
}
public void setAirlineName(Text airlineName) {
this.airlineName = airlineName;
}
public Text getMonth() {
return month;
}
public void setMonth(Text month) {
this.month = month;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.airlineName.readFields(in);
this.month.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
this.airlineName.write(out);
this.month.write(out);
}
@Override
public int compareTo(AirlineMonthKey airlineMonthKey) {
// TODO Auto-generated method stub
int diff = getAirlineName().compareTo(airlineMonthKey.getAirlineName());
if(diff != 0){
return diff;
}
int m1 = Integer.parseInt(getMonth().toString());
int m2 = Integer.parseInt(airlineMonthKey.getMonth().toString());
if(m1>m2){
return -1;
}
else
return 1;
}
}
and The mapper and the reducer class that uses the custom key as below.
package com.mapresuce.secondarysort;
import java.io.IOException;
import java.io.StringReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import com.opencsv.CSVReader;
public class FlightDelayByMonth {
public static class FlightDelayByMonthMapper extends
Mapper<Object, Text, AirlineMonthKey, Text> {
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String str = value.toString();
// Reading Line one by one from the input CSV.
CSVReader reader = new CSVReader(new StringReader(str));
String[] split = reader.readNext();
reader.close();
String airlineName = split[6];
String month = split[2];
String year = split[0];
String delayMinutes = split[37];
String cancelled = split[41];
if (!(airlineName.equals("") || month.equals("") || delayMinutes
.equals(""))) {
if (year.equals("2008") && cancelled.equals("0.00")) {
AirlineMonthKey airlineMonthKey = new AirlineMonthKey(
new Text(airlineName), new Text(month));
Text delay = new Text(delayMinutes);
context.write(airlineMonthKey, delay);
System.out.println("1");
}
}
}
}
public static class FlightDelayByMonthReducer extends
Reducer<AirlineMonthKey, Text, Text, Text> {
public void reduce(AirlineMonthKey key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
for(Text val : values){
context.write(new Text(key.getAirlineName().toString()+" "+key.getMonth().toString()), val);
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage:<in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Average monthly flight dealy");
job.setJarByClass(FlightDelayByMonth.class);
job.setMapperClass(FlightDelayByMonthMapper.class);
job.setReducerClass(FlightDelayByMonthReducer.class);
job.setOutputKeyClass(AirlineMonthKey.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
Also I have created a job and configuration in the main. Don't know what I am missing. I am running all this in local environment.