1
votes

I am new to Hadoop's MapReduce. I have written a map reduce task and I am trying to run that on my local machine. But the job hangs after map 100%.

Below is the code, I don't understand what am I missing.

I have a custom key class

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;

public class AirlineMonthKey implements WritableComparable<AirlineMonthKey>{

Text airlineName;
Text month;

public AirlineMonthKey(){
    super();
}

public AirlineMonthKey(Text airlineName, Text month) {
    super();
    this.airlineName = airlineName;
    this.month = month;
}

public Text getAirlineName() {
    return airlineName;
}

public void setAirlineName(Text airlineName) {
    this.airlineName = airlineName;
}

public Text getMonth() {
    return month;
}

public void setMonth(Text month) {
    this.month = month;
}

@Override
public void readFields(DataInput in) throws IOException {
    // TODO Auto-generated method stub
    this.airlineName.readFields(in);
    this.month.readFields(in);
}

@Override
public void write(DataOutput out) throws IOException {
    // TODO Auto-generated method stub
    this.airlineName.write(out);
    this.month.write(out);      
}

@Override
public int compareTo(AirlineMonthKey airlineMonthKey) {
    // TODO Auto-generated method stub
    int diff = getAirlineName().compareTo(airlineMonthKey.getAirlineName());
    if(diff != 0){
        return diff;
    }

    int m1 = Integer.parseInt(getMonth().toString());
    int m2 = Integer.parseInt(airlineMonthKey.getMonth().toString());

    if(m1>m2){
        return -1;
    }
    else 
        return 1;
}


}

and The mapper and the reducer class that uses the custom key as below.

package com.mapresuce.secondarysort;

import java.io.IOException;
import java.io.StringReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import com.opencsv.CSVReader;

public class FlightDelayByMonth {

public static class FlightDelayByMonthMapper extends
        Mapper<Object, Text, AirlineMonthKey, Text> {
    public void map(Object key, Text value, Context context)
            throws IOException, InterruptedException {
        String str = value.toString();
        // Reading Line one by one from the input CSV.
        CSVReader reader = new CSVReader(new StringReader(str));
        String[] split = reader.readNext();
        reader.close();

        String airlineName = split[6];
        String month = split[2];
        String year = split[0];
        String delayMinutes = split[37];
        String cancelled = split[41];

        if (!(airlineName.equals("") || month.equals("") || delayMinutes
                .equals(""))) {
            if (year.equals("2008") && cancelled.equals("0.00")) {
                AirlineMonthKey airlineMonthKey = new AirlineMonthKey(
                        new Text(airlineName), new Text(month));
                Text delay = new Text(delayMinutes);
                context.write(airlineMonthKey, delay);
                System.out.println("1");
            }
        }

    }
}

public static class FlightDelayByMonthReducer extends
        Reducer<AirlineMonthKey, Text, Text, Text> {


    public void reduce(AirlineMonthKey key, Iterable<Text> values,
            Context context) throws IOException, InterruptedException {
        for(Text val : values){
            context.write(new Text(key.getAirlineName().toString()+" "+key.getMonth().toString()), val);
        }
    }
}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {   
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args)
            .getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage:<in> <out>");
        System.exit(2);
    }
    Job job = new Job(conf, "Average monthly flight dealy");
    job.setJarByClass(FlightDelayByMonth.class);
    job.setMapperClass(FlightDelayByMonthMapper.class);
    job.setReducerClass(FlightDelayByMonthReducer.class);
    job.setOutputKeyClass(AirlineMonthKey.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Also I have created a job and configuration in the main. Don't know what I am missing. I am running all this in local environment.

2

2 Answers

0
votes

Try with writing a custom implementation of toString, equals and hashcode in your AirlineMonthKey class.

Read below link.

http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/WritableComparable.html

It is important for key types to implement hashCode().

Hope this could help you.

0
votes

The issue was I had to use the default Constructor in the AirlineMonthKey (which I did) and initialize the instance variables in the custom key class (which I didn't).