0
votes

I did set up the hadoop Ubuntu OS, followed all the necessary steps, 1.created the hdfs file system 2.Moved the text files to input directory 3.having privilege to access all the directories. but when run the simple word count example, i got:

 import java.io.IOException;
 import java.util.*;

 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class wordcount {

 public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();

    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        StringTokenizer tokenizer = new StringTokenizer(line);
        while (tokenizer.hasMoreTokens()) {
            word.set(tokenizer.nextToken());
            context.write(word, one);
        }
    }
 } 

 public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {

    public void reduce(Text key, Iterable<IntWritable> values, Context context) 
      throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable val : values) {
            sum += val.get();
        }
        context.write(key, new IntWritable(sum));
    }
 }

 public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    conf.addResource(new Path("/HADOOP_HOME/conf/core-site.xml"));
    conf.addResource(new Path("/HADOOP_HOME/conf/hdfs-site.xml"));

    Job job = new Job(conf, "wordcount");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setJarByClass(wordcount.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);





 // FileInputFormat.addInputPath(job, new Path(args[0]));
 //  FileOutputFormat.setOutputPath(job, new Path(args[1]));

    FileInputFormat.setInputPaths(job, new Path("/user/gabriele/input"));
    FileOutputFormat.setOutputPath(job, new Path("/user/gabriele/output"));


    job.waitForCompletion(true);
 }

}

but, input path is valid (checked also from command line) and even can able view the files in that path from eclipse itself, so plz assist were i am wrong.

There was a solution that say to add the following 2 lines:

config.addResource(new Path("/HADOOP_HOME/conf/core-site.xml")); config.addResource(new Path("/HADOOP_HOME/conf/hdfs-site.xml"));

But still does not work.

Here the errors: Run as -> run on hadoop

13/11/08 08:39:11 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 13/11/08 08:39:12 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same. 13/11/08 08:39:12 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String). 13/11/08 08:39:12 INFO mapred.JobClient: Cleaning up the staging area file:/tmp/hadoop-gabriele/mapred/staging/gabriele481581440/.staging/job_local481581440_0001 13/11/08 08:39:12 ERROR security.UserGroupInformation: PriviledgedActionException as:gabriele cause:org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input path does not exist: file:/user/gabriele/input Exception in thread "main" org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input path does not exist: file:/user/gabriele/input at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:235) at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getSplits(FileInputFormat.java:252) at org.apache.hadoop.mapred.JobClient.writeNewSplits(JobClient.java:1054) at org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:1071) at org.apache.hadoop.mapred.JobClient.access$700(JobClient.java:179) at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:983) at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:936) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190) at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:936) at org.apache.hadoop.mapreduce.Job.submit(Job.java:550) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:580) at wordcount.main(wordcount.java:74)

THanks

1
are u sure your file is accessible (permissions) your code?Suvarna Pattayil

1 Answers

0
votes

Unless your Hadoop installation really is rooted at /HADOOP_HOME, i suggest you change the following lines such that HADOOP_HOME is replaced to where your Hadoop is actually installed (/usr/lib/hadoop, /opt/hadoop or wherever you installed it):

conf.addResource(new Path("/usr/lib/hadoop/conf/core-site.xml"));
conf.addResource(new Path("/usr/lib/hadoop/conf/hdfs-site.xml"));

Or in Eclipse, add the /usr/lib/hadoop/conf folder (or wherever you have installed hadoop) to the Build classpath).