0
votes

I am working with the Hadoop Map-Reduce API and exceuting a simple task to calculate the highest templeratures in year as specified through a large input file of arounf 20MB.

Everything is working fine, viz Mapper tasks are working fine, Reducer tasks are working fine and output file is also generated fine.

But the problem is that, i could not see anything on the Hadoop UI page, neither the Job tab, nor the Job Progress and not even the Job History.

Here is my wordcound java file:

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class WordCount extends Configured implements Tool{

    /**
     * Main function which calls the run method and passes the args using ToolRunner
     * @param args Two arguments input and output file paths
     * @throws Exception 
     */
    public static void main(String[] args) throws Exception{
        int exitCode = ToolRunner.run(new WordCount(), args);
        System.exit(exitCode);
    }

    /**
     * Run method which schedules the Hadoop Job
     * @param args Arguments passed in main function
     */
    public int run(String[] args) throws Exception {
        if (args.length != 2) {
            System.err.printf("Usage: %s needs two arguments <input> <output> files\n",
                    getClass().getSimpleName());
            return -1;
        }

        //Initialize the Hadoop job and set the jar as well as the name of the Job
        Job job = new Job();
        job.setJarByClass(WordCount.class);
        job.setJobName("WordCounter");

        //Add input and output file paths to job based on the arguments passed
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.setOutputFormatClass(TextOutputFormat.class);


        //Set the MapClass and ReduceClass in the job
        job.setMapperClass(MapClass.class);
        job.setReducerClass(ReduceClass.class);

        //Wait for the job to complete and print if the job was successful or not
        int returnValue = job.waitForCompletion(true) ? 0:1;

        if(job.isSuccessful()) {
            System.out.println("Job was successful");
        } else if(!job.isSuccessful()) {
            System.out.println("Job was not successful");           
        }

        return returnValue;
    }
}

Here are my hadoop configuration files:

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>

  <property>
    <name>dfs.namenode.name.dir</name>
    <value>/Users/bng/Documnents/hDir/hdfs/name</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>/Users/bng/Documnents/hDir/hdfs/data</value >
  </property>
</configuration>

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
   <property>
      <name>fs.defaultFS</name>
      <value>hdfs://localhost/</value>
   </property>
   <property>
        <name>dfs.http.address</name>
        <value>50070</value>
    </property>
</configuration>

mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
     <property>
         <name>mapreduce.framework.name</name>
         <value>yarn</value>
     </property>
</configuration>

yarn-site.xml

<?xml version="1.0"?>
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>localhost</value>
    </property>
</configuration>

And here is the screen shot of my hadoop UI:

enter image description here

enter image description here

Attached here are the execution logs as shown on STS console:

    0    [main] WARN  org.apache.hadoop.util.NativeCodeLoader  - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
202  [main] INFO  org.apache.hadoop.conf.Configuration.deprecation  - session.id is deprecated. Instead, use dfs.metrics.session-id
203  [main] INFO  org.apache.hadoop.metrics.jvm.JvmMetrics  - Initializing JVM Metrics with processName=JobTracker, sessionId=
402  [main] WARN  org.apache.hadoop.mapreduce.JobSubmitter  - Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
408  [main] WARN  org.apache.hadoop.mapreduce.JobSubmitter  - No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
419  [main] INFO  org.apache.hadoop.mapreduce.lib.input.FileInputFormat  - Total input paths to process : 1
471  [main] INFO  org.apache.hadoop.mapreduce.JobSubmitter  - number of splits:1
593  [main] INFO  org.apache.hadoop.mapreduce.JobSubmitter  - Submitting tokens for job: job_local1149743576_0001
764  [main] INFO  org.apache.hadoop.mapreduce.Job  - The url to track the job: http://localhost:8080/
766  [Thread-10] INFO  org.apache.hadoop.mapred.LocalJobRunner  - OutputCommitter set in config null
766  [main] INFO  org.apache.hadoop.mapreduce.Job  - Running job: job_local1149743576_0001
774  [Thread-10] INFO  org.apache.hadoop.mapred.LocalJobRunner  - OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
815  [Thread-10] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Waiting for map tasks
816  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Starting task: attempt_local1149743576_0001_m_000000_0
859  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.yarn.util.ProcfsBasedProcessTree  - ProcfsBasedProcessTree currently is supported only on Linux.
860  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  -  Using ResourceCalculatorProcessTree : null
865  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Processing split: file:/Users/bng/Downloads/hadoop-2.6.4/files/input.txt:0+366
995  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - (EQUATOR) 0 kvi 26214396(104857584)
995  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - mapreduce.task.io.sort.mb: 100
995  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - soft limit at 83886080
998  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - bufstart = 0; bufvoid = 104857600
998  [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - kvstart = 26214396; length = 6553600
1003 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
1010 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 
1011 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Starting flush of map output
1011 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Spilling map output
1011 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - bufstart = 0; bufend = 594; bufvoid = 104857600
1011 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - kvstart = 26214396(104857584); kvend = 26214172(104856688); length = 225/6553600
1020 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.MapTask  - Finished spill 0
1024 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Task:attempt_local1149743576_0001_m_000000_0 is done. And is in the process of committing
1032 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - map
1032 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.Task  - Task 'attempt_local1149743576_0001_m_000000_0' done.
1033 [LocalJobRunner Map Task Executor #0] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Finishing task: attempt_local1149743576_0001_m_000000_0
1033 [Thread-10] INFO  org.apache.hadoop.mapred.LocalJobRunner  - map task executor complete.
1035 [Thread-10] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Waiting for reduce tasks
1035 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Starting task: attempt_local1149743576_0001_r_000000_0
1041 [pool-3-thread-1] INFO  org.apache.hadoop.yarn.util.ProcfsBasedProcessTree  - ProcfsBasedProcessTree currently is supported only on Linux.
1041 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.Task  -  Using ResourceCalculatorProcessTree : null
1044 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.ReduceTask  - Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@6a57da8b
1058 [pool-3-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - MergerManager: memoryLimit=1336252800, maxSingleShuffleLimit=334063200, mergeThreshold=881926912, ioSortFactor=10, memToMemMergeOutputsThreshold=10
1060 [EventFetcher for fetching Map Completion Events] INFO  org.apache.hadoop.mapreduce.task.reduce.EventFetcher  - attempt_local1149743576_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
1092 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.LocalFetcher  - localfetcher#1 about to shuffle output of map attempt_local1149743576_0001_m_000000_0 decomp: 710 len: 714 to MEMORY
1108 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput  - Read 710 bytes from map-output for attempt_local1149743576_0001_m_000000_0
1141 [localfetcher#1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - closeInMemoryFile -> map-output of size: 710, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->710
1142 [EventFetcher for fetching Map Completion Events] INFO  org.apache.hadoop.mapreduce.task.reduce.EventFetcher  - EventFetcher is interrupted.. Returning
1143 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 1 / 1 copied.
1143 [pool-3-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
1160 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Merging 1 sorted segments
1160 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Down to the last merge-pass, with 1 segments left of total size: 702 bytes
1162 [pool-3-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merged 1 segments, 710 bytes to disk to satisfy reduce memory limit
1163 [pool-3-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merging 1 files, 714 bytes from disk
1165 [pool-3-thread-1] INFO  org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl  - Merging 0 segments, 0 bytes from memory into reduce
1165 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Merging 1 sorted segments
1166 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.Merger  - Down to the last merge-pass, with 1 segments left of total size: 702 bytes
1167 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 1 / 1 copied.
1186 [pool-3-thread-1] INFO  org.apache.hadoop.conf.Configuration.deprecation  - mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
1193 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task:attempt_local1149743576_0001_r_000000_0 is done. And is in the process of committing
1195 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - 1 / 1 copied.
1195 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task attempt_local1149743576_0001_r_000000_0 is allowed to commit now
1196 [pool-3-thread-1] INFO  org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter  - Saved output of task 'attempt_local1149743576_0001_r_000000_0' to file:/Users/bng/Downloads/hadoop-2.6.4/files/output/_temporary/0/task_local1149743576_0001_r_000000
1197 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - reduce > reduce
1197 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.Task  - Task 'attempt_local1149743576_0001_r_000000_0' done.
1197 [pool-3-thread-1] INFO  org.apache.hadoop.mapred.LocalJobRunner  - Finishing task: attempt_local1149743576_0001_r_000000_0
1197 [Thread-10] INFO  org.apache.hadoop.mapred.LocalJobRunner  - reduce task executor complete.
1772 [main] INFO  org.apache.hadoop.mapreduce.Job  - Job job_local1149743576_0001 running in uber mode : false
1774 [main] INFO  org.apache.hadoop.mapreduce.Job  -  map 100% reduce 100%
1775 [main] INFO  org.apache.hadoop.mapreduce.Job  - Job job_local1149743576_0001 completed successfully
1784 [main] INFO  org.apache.hadoop.mapreduce.Job  - Counters: 30
    File System Counters
        FILE: Number of bytes read=2542
        FILE: Number of bytes written=495530
        FILE: Number of read operations=0
        FILE: Number of large read operations=0
        FILE: Number of write operations=0
    Map-Reduce Framework
        Map input records=1
        Map output records=57
        Map output bytes=594
        Map output materialized bytes=714
        Input split bytes=119
        Combine input records=0
        Combine output records=0
        Reduce input groups=47
        Reduce shuffle bytes=714
        Reduce input records=57
        Reduce output records=47
        Spilled Records=114
        Shuffled Maps =1
        Failed Shuffles=0
        Merged Map outputs=1
        GC time elapsed (ms)=10
        Total committed heap usage (bytes)=468713472
    Shuffle Errors
        BAD_ID=0
        CONNECTION=0
        IO_ERROR=0
        WRONG_LENGTH=0
        WRONG_MAP=0
        WRONG_REDUCE=0
    File Input Format Counters 
        Bytes Read=366
    File Output Format Counters 
        Bytes Written=430
Job was successful

Please suggest what going wrong here.

1
The file should be named as mapred-site.xml, if it is not a typo here.franklinsijo
@franklinsijo yes its a typo error. Updated the questionKayV
Can you post the execution log of the job which is printed as stdout? Looks like it is running in local mode.franklinsijo
@franklinsijo Please see the updated question with logsKayV
@KayV, Please add ` <property> <description>enabling log view from browser</description> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> ` in yarn-site.xml.Ramesh Maharjan

1 Answers

0
votes

The problem was with the job configuration when running through STS/eclipse. So added the Job Configuration inside the run method and configured the yarn resource manager and defaultFS as follows:

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s needs two arguments <input> <output> files\n",
                getClass().getSimpleName());
        return -1;
    }
    Configuration configuration  = getConf();

    configuration.set("fs.defaultFS", "hdfs://172.20.12.168");
    configuration.set("mapreduce.jobtracker.address", "localhost:54311");
    configuration.set("mapreduce.framework.name", "yarn");
    configuration.set("yarn.resourcemanager.address", "127.0.0.1:8032");

    //Initialize the Hadoop job and set the jar as well as the name of the Job
    Job job = new Job();
    job.setJarByClass(WordCount.class);
    job.setJobName("WordCounter");

    //Add input and output file paths to job based on the arguments passed
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);


    //Set the MapClass and ReduceClass in the job
    job.setMapperClass(MapClass.class);
    job.setReducerClass(ReduceClass.class);

    //Wait for the job to complete and print if the job was successful or not
    int returnValue = job.waitForCompletion(true) ? 0:1;

    if(job.isSuccessful()) {
        System.out.println("Job was successful");
    } else if(!job.isSuccessful()) {
        System.out.println("Job was not successful");           
    }

    return returnValue;
}

Now, getting the job tab and job details and job history after making the above changes.