0
votes

I'm trying a simple program from the "hadoop in Action" book to merge a series of files from the local file system into one file in the hdfs. The code snippet is the same as the one provided in the book.

import java.lang.*;
import java.util.*;
import java.io.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;

public class PutMerge {

    public static void main(String[] args) throws IOException{
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(conf);
        FileSystem local = FileSystem.getLocal(conf);

        Path inputDir = new Path(args[0]); // First argument has the input directory 
        Path hdfsFile = new Path(args[1]); // Concatenated hdfs file name

        try {
            FileStatus[] inputFiles = local.listStatus(inputDir); // list of Local Files

            FSDataOutputStream out = hdfs.create(hdfsFile); // target file creation

            for (int i = 0; i<inputFiles.size; i++ {

                FSDataInputStream in = local.open(inputFiles[i].getPath());

                int bytesRead = 0;
                byte[] buff = new byte[256];

                while (bytesRead = (in.read(buff))>0) {
                    out.write(buff,0,bytesRead);
                }
                in.close();
            }
            out.close();

        } 
        catch(Exception e) {
            e.printStackTrace();
        }

    }
}

The program successfully compiled and while trying to run I'm getting the following exception

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/commons/configuration/Configuration at org.apache.hadoop.metrics2.lib.DefaultMetricsSystem.(DefaultMetricsSystem.java:37) at org.apache.hadoop.metrics2.lib.DefaultMetricsSystem.(DefaultMetricsSystem.java:34) at org.apache.hadoop.security.UgiInstrumentation.create(UgiInstrumentation.java:51) at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:217) at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:185) at org.apache.hadoop.security.UserGroupInformation.isSecurityEnabled(UserGroupInformation.java:237) at org.apache.hadoop.security.KerberosName.(KerberosName.java:79) at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:210) at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:185) at org.apache.hadoop.security.UserGroupInformation.isSecurityEnabled(UserGroupInformation.java:237) at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:482) at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:468) at org.apache.hadoop.fs.FileSystem$Cache$Key.(FileSystem.java:1519) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:1420) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:254) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:123) at PutMerge.main(PutMerge.java:16) Caused by: java.lang.ClassNotFoundException: org.apache.commons.configuration.Configuration at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:423) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:356) ... 17 more

Based on inputs from some of the posts, I added the commons package. My classpath definition is

/usr/java/jdk1.7.0_21:/data/commons-logging-1.1.2/commons-logging-1.1.2.jar:/data/hadoop-1.1.2/hadoop-core-1.1.2.jar:/data/commons-logging-1.1.2/commons-logging-adapters-1.1.2.jar:/data/commons-logging-1.1.2/commons-logging-api-1.1.2.jar:.

Any clue on why this is not working?

1

1 Answers

1
votes

You didnt include apache configuration in your classpath.

Really though you shouldn't need to include much besides hadoop itself. Make sure you are running your jar with hadoop itself.

> hadoop -jar myJar.jar