2
votes

I have collected some twitter data from Apache flume and store data in hive metastore using this hive quire....

CREATE EXTERNAL TABLE tweets (

    id BIGINT,
    created_at STRING,
    source STRING,
    favorited BOOLEAN,
    retweet_count INT,
    retweeted_status STRUCT<
      text:STRING,
      user:STRUCT<screen_name:STRING,name:STRING>>,
    entities STRUCT<
      urls:ARRAY<STRUCT<expanded_url:STRING>>,
      user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>,
      hashtags:ARRAY<STRUCT<text:STRING>>>,
    text STRING,
      user STRUCT<
      screen_name:STRING,
      name:STRING,
      friends_count:INT,
      followers_count:INT,
      statuses_count:INT,
      verified:BOOLEAN,
      utc_offset:INT,
      time_zone:STRING>,
     in_reply_to_screen_name STRING
    )
ROW FORMAT SERDE 'com.cloudera.hive.serde.JSONSerDe'
LOCATION '/user/flume/tweets';

Now same table i want to create from java program i have configured hive jdbc connection and i am able to create simple table in hive but how to create this table... i tried this java code but its not working...

import java.sql.SQLException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.sql.DriverManager;

public class HiveJdbcClient {
    private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";

    public static void main(String[] args) throws SQLException {

    try {
        Class.forName(driverName);
    } catch (ClassNotFoundException e){
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.exit(1);
    }

    Connection con = DriverManager.getConnection("jdbc:hive://master:10000/default", "", "");
    Statement stmt = con.createStatement();
    String tableName = "tweets";
    stmt.executeQuery("drop table " + tableName);
    stmt.executeQuery("ADD JAR /home/dsri/hadoop_home/hive/lib/hive-serdes-1.0-SNAPSHOT.jar");

        ResultSet res = stmt.executeQuery("create table " + tableName + " (id BIGINT,created_at STRING,source STRING,favorited BOOLEAN,retweet_count INT,retweeted_status STRUCT<text:STRING,user:STRUCT<screen_name:STRING,name:STRING>>,entities STRUCT<urls:ARRAY<STRUCT<expanded_url:STRING>>,user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>,hashtags:ARRAY<STRUCT<text:STRING>>>,text STRING,user STRUCT<screen_name:STRING,name:STRING,friends_count:INT,followers_count:INT,statuses_count:INT,verified:BOOLEAN,utc_offset:INT,time_zone:STRING>,in_reply_to_screen_name STRING) ROW FORMAT SERDE 'com.cloudera.hive.serde.JSONSerDe' LOCATION '/user/flume/tweets'");

    // show tables

    String sql = "show tables '" + tableName + "'";
    System.out.println("Running: " + sql);
    res = stmt.executeQuery(sql);

    if (res.next()) {
        System.out.println(res.getString(1));
    }

    // describe table
    sql = "describe " + tableName;
    System.out.println("Running: " + sql);  
    res = stmt.executeQuery(sql);
    while (res.next()) {
        System.out.println(res.getString(1) + "\t" + res.getString(2));
    }
    // select * query
    sql = "select * from " + tableName;
    sql = "SELECT entities.user_mentions[0].screen_name , text , created_at FROM  " + tableName;

    System.out.println("Running: " + sql);
    res = stmt.executeQuery(sql);
    while (res.next()){
        System.out.println(String.valueOf(res.getInt(1)) + "\t" + res.getString(2));
    }

    // regular hive query
    sql = "select count(1) from " + tableName;
    System.out.println("Running: " + sql);
    res = stmt.executeQuery(sql);
    while (res.next()){
        System.out.println(res.getString(1));
    }
    }
}

here is my output from eclipse..

Running: show tables 'tweets'
tweets
Running: describe tweets
id                      bigint              
created_at              string              
source                  string              
favorited               boolean             
retweet_count           int                 
retweeted_status        struct<text:string,user:struct<screen_name:string,name:string>>
entities                struct<urls:array<struct<expanded_url:string>>,user_mentions:array<struct<screen_name:string,name:string>>,hashtags:array<struct<text:string>>>
text                    string              
user                    struct<screen_name:string,name:string,friends_count:int,followers_count:int,statuses_count:int,verified:boolean,utc_offset:int,time_zone:string>
in_reply_to_screen_name string              
Running: SELECT entities.user_mentions[0].screen_name , text , created_at FROM  tweets
Exception in thread "main" java.sql.SQLException: Query returned non-zero code: 1, cause: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
    at org.apache.hadoop.hive.jdbc.HiveStatement.executeQuery(HiveStatement.java:194)
    at HiveJdbcClient.main(HiveJdbcClient.java:48)

And this is my hive command..

hive> show tables;
OK
Time taken: 0.009 seconds
hive> SELECT entities.user_mentions[0].screen_name , text , created_at FROM tweets;
FAILED: SemanticException [Error 10001]: Line 1:70 Table not found 'tweets'
hive> 

and this is my error log...

2014-08-13 11:45:25,442 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: STARTUP_MSG: /************************************************************ STARTUP_MSG: Starting DataNode STARTUP_MSG: host = master/192.168.1.149 STARTUP_MSG: args = [] STARTUP_MSG: version = 1.2.1 STARTUP_MSG: build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152; compiled by 'mattf' on Mon Jul 22 15:23:09 PDT 2013 STARTUP_MSG: java = 1.7.0_65 ************************************************************/ 2014-08-13 11:45:26,499 INFO org.apache.hadoop.metrics2.impl.MetricsConfig: loaded properties from hadoop-metrics2.properties 2014-08-13 11:45:26,550 INFO org.apache.hadoop.metrics2.impl.MetricsSourceAdapter: MBean for source MetricsSystem,sub=Stats registered. 2014-08-13 11:45:26,568 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Scheduled snapshot period at 10 second(s). 2014-08-13 11:45:26,568 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: DataNode metrics system started 2014-08-13 11:45:26,719 INFO org.apache.hadoop.metrics2.impl.MetricsSourceAdapter: MBean for source ugi registered. 2014-08-13 11:45:28,212 INFO org.apache.hadoop.ipc.Client: Retrying connect to server: master/192.168.1.149:9000. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1 SECONDS) 2014-08-13 11:45:29,569 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Registered FSDatasetStatusMBean 2014-08-13 11:45:29,580 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Opened data transfer server at 50010 2014-08-13 11:45:29,583 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Balancing bandwith is 1048576 bytes/s 2014-08-13 11:45:29,592 INFO org.apache.hadoop.util.NativeCodeLoader: Loaded the native-hadoop library 2014-08-13 11:45:29,749 INFO org.mortbay.log: Logging to org.slf4j.impl.Log4jLoggerAdapter(org.mortbay.log) via org.mortbay.log.Slf4jLog 2014-08-13 11:45:29,831 INFO org.apache.hadoop.http.HttpServer: Added global filtersafety (class=org.apache.hadoop.http.HttpServer$QuotingInputFilter) 2014-08-13 11:45:29,843 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: dfs.webhdfs.enabled = false 2014-08-13 11:45:29,843 INFO org.apache.hadoop.http.HttpServer: Port returned by webServer.getConnectors()[0].getLocalPort() before open() is -1. Opening the listener on 50075 2014-08-13 11:45:29,844 INFO org.apache.hadoop.http.HttpServer: listener.getLocalPort() returned 50075 webServer.getConnectors()[0].getLocalPort() returned 50075 2014-08-13 11:45:29,844 INFO org.apache.hadoop.http.HttpServer: Jetty bound to port 50075 2014-08-13 11:45:29,844 INFO org.mortbay.log: jetty-6.1.26 2014-08-13 11:45:29,909 WARN org.mortbay.log: Can't reuse /tmp/Jetty_0_0_0_0_50075_datanode____hwtdwq, using /tmp/Jetty_0_0_0_0_50075_datanode____hwtdwq_3155498714007559135 2014-08-13 11:45:30,222 INFO org.mortbay.log: Started [email protected]:50075 2014-08-13 11:45:30,228 INFO org.apache.hadoop.metrics2.impl.MetricsSourceAdapter: MBean for source jvm registered. 2014-08-13 11:45:30,229 INFO org.apache.hadoop.metrics2.impl.MetricsSourceAdapter: MBean for source DataNode registered. 2014-08-13 11:45:30,357 INFO org.apache.hadoop.ipc.Server: Starting SocketReader 2014-08-13 11:45:30,359 INFO org.apache.hadoop.metrics2.impl.MetricsSourceAdapter: MBean for source RpcDetailedActivityForPort50020 registered. 2014-08-13 11:45:30,359 INFO org.apache.hadoop.metrics2.impl.MetricsSourceAdapter: MBean for source RpcActivityForPort50020 registered. 2014-08-13 11:45:30,360 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: dnRegistration = DatanodeRegistration(master:50010, storageID=DS-879125703-192.168.1.149-50010-1407740625881, infoPort=50075, ipcPort=50020) 2014-08-13 11:45:30,368 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Finished generating blocks being written report for 1 volumes in 0 seconds 2014-08-13 11:45:30,374 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: DatanodeRegistration(192.168.1.149:50010, storageID=DS-879125703-192.168.1.149-50010-1407740625881, infoPort=50075, ipcPort=50020)In DataNode.run, data = FSDataset{dirpath='/home/dsri/hadoop_home/hdfs/data/current'} 2014-08-13 11:45:30,376 INFO org.apache.hadoop.ipc.Server: IPC Server Responder: starting 2014-08-13 11:45:30,376 INFO org.apache.hadoop.ipc.Server: IPC Server listener on 50020: starting 2014-08-13 11:45:30,380 INFO org.apache.hadoop.ipc.Server: IPC Server handler 0 on 50020: starting 2014-08-13 11:45:30,380 INFO org.apache.hadoop.ipc.Server: IPC Server handler 1 on 50020: starting 2014-08-13 11:45:30,380 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: using BLOCKREPORT_INTERVAL of 3600000msec Initial delay: 0msec 2014-08-13 11:45:30,380 INFO org.apache.hadoop.ipc.Server: IPC Server handler 2 on 50020: starting 2014-08-13 11:45:30,386 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Starting Periodic block scanner 2014-08-13 11:45:30,412 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Finished asynchronous block report scan in 39ms 2014-08-13 11:45:30,426 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Generated rough (lockless) block report in 39 ms 2014-08-13 11:45:30,593 INFO org.apache.hadoop.hdfs.server.datanode.DataBlockScanner: Verification succeeded blk_7678010998501164699_1225 2014-08-13 11:45:33,413 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: BlockReport of 67 blocks took 1 msec to generate and 31 msecs for RPC and NN processing 2014-08-13 11:46:04,361 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Receiving blk_6701685767905484268_1233 src: /192.168.1.149:41509 dest: /192.168.1.149:50010 2014-08-13 11:46:04,373 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:41509, dest: /192.168.1.149:50010, bytes: 4, op: HDFS_WRITE, cliID: DFSClient_NONMAPREDUCE_-31599147_1, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_6701685767905484268_1233, duration: 2171111 2014-08-13 11:46:04,374 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder 0 for blk_6701685767905484268_1233 terminating 2014-08-13 11:46:09,433 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Scheduling blk_-2344105609722430974_1138 file /home/dsri/hadoop_home/hdfs/data/current/blk_-2344105609722430974 for deletion 2014-08-13 11:46:09,436 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted blk_-2344105609722430974_1138 at file /home/dsri/hadoop_home/hdfs/data/current/blk_-2344105609722430974 2014-08-13 11:55:30,921 INFO org.apache.hadoop.hdfs.server.datanode.DataBlockScanner: Verification succeeded blk_-1108348089739196334_1172 2014-08-13 12:05:31,094 INFO org.apache.hadoop.hdfs.server.datanode.DataBlockScanner: Verification succeeded blk_5256405114478241653_1210 2014-08-13 12:15:28,096 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Finished asynchronous block report scan in 13ms 2014-08-13 12:15:31,094 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: BlockReport of 67 blocks took 2 msec to generate and 9 msecs for RPC and NN processing 2014-08-13 12:15:31,344 INFO org.apache.hadoop.hdfs.server.datanode.DataBlockScanner: Verification succeeded blk_8446056606266340583_1232 2014-08-13 12:25:31,536 INFO org.apache.hadoop.hdfs.server.datanode.DataBlockScanner: Verification succeeded blk_5394846158629276502_1227 2014-08-13 12:35:30,768 INFO org.apache.hadoop.hdfs.server.datanode.DataBlockScanner: Verification succeeded blk_-7678390392278116211_1217 2014-08-13 12:36:48,828 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Receiving blk_9102789706813495699_1234 src: /192.168.1.149:41841 dest: /192.168.1.149:50010 2014-08-13 12:36:48,846 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:41841, dest: /192.168.1.149:50010, bytes: 43687, op: HDFS_WRITE, cliID: DFSClient_NONMAPREDUCE_1974987965_59, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_9102789706813495699_1234, duration: 14107906 2014-08-13 12:36:48,847 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder 0 for blk_9102789706813495699_1234 terminating 2014-08-13 12:36:52,586 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Scheduling blk_9102789706813495699_1234 file /home/dsri/hadoop_home/hdfs/data/current/blk_9102789706813495699 for deletion 2014-08-13 12:36:52,588 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted blk_9102789706813495699_1234 at file /home/dsri/hadoop_home/hdfs/data/current/blk_9102789706813495699 2014-08-13 12:38:25,471 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Receiving blk_-8559721137063489651_1235 src: /192.168.1.149:41851 dest: /192.168.1.149:50010 2014-08-13 12:38:25,480 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:41851, dest: /192.168.1.149:50010, bytes: 43697, op: HDFS_WRITE, cliID: DFSClient_NONMAPREDUCE_-1880596955_73, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_-8559721137063489651_1235, duration: 5524824 2014-08-13 12:38:25,481 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: PacketResponder 0 for blk_-8559721137063489651_1235 terminating 2014-08-13 12:38:28,618 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Scheduling blk_-8559721137063489651_1235 file /home/dsri/hadoop_home/hdfs/data/current/blk_-8559721137063489651 for deletion 2014-08-13 12:38:28,619 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Deleted blk_-8559721137063489651_1235 at file /home/dsri/hadoop_home/hdfs/data/current/blk_-8559721137063489651 2014-08-13 12:43:08,572 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41900, bytes: 4607, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_-8117981694073621746_1024, duration: 17721602 2014-08-13 12:43:08,706 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41901, bytes: 7511, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_-3374472984587794400_1025, duration: 1448070 2014-08-13 12:43:08,729 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41902, bytes: 2350, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_-7381104273799398401_1026, duration: 1533110 2014-08-13 12:43:08,740 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41903, bytes: 2302, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_6184311917214209923_1027, duration: 1381655 2014-08-13 12:43:08,752 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41904, bytes: 2147, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_-2011448124616450527_1028, duration: 1404959 2014-08-13 12:43:08,764 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41905, bytes: 2294, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_898700835338503777_1029, duration: 1387014 2014-08-13 12:43:08,778 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41906, bytes: 4360, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_-2958582349124052196_1030, duration: 1425325 2014-08-13 12:43:08,797 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41907, bytes: 5017, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_-1045529804523469699_1031, duration: 1452300 2014-08-13 12:43:08,815 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41908, bytes: 11382, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: blk_-3752408366748461603_1032, duration: 1452505 2014-08-13 12:43:08,834 INFO org.apache.hadoop.hdfs.server.datanode.DataNode.clienttrace: src: /192.168.1.149:50010, dest: /192.168.1.149:41909, bytes: 2812, op: HDFS_READ, cliID: DFSClient_NONMAPREDUCE_-1203793785_100, offset: 0, srvID: DS-879125703-192.168.1.149-50010-1407740625881, blockid: 
3
What error are you getting?Sachin Janani
i m getting this...... Exception in thread "main" java.sql.SQLException: Query returned non-zero code: 40000, cause: FAILED: RuntimeException org.apache.hadoop.security.AccessControlException: org.apache.hadoop.security.AccessControlException: Permission denied: user=root, access=WRITE, inode="tmp":dsri:supergroup:rwxrwxr-x at org.apache.hadoop.hive.jdbc.HiveStatement.executeQuery(HiveStatement.java:194) at HiveJdbcClient.main(HiveJdbcClient.java:48)Uttam Raj
These error is related to Permission.For this you need to run these program using the user which has hive permission.Sachin Janani
table is creating but not showing in hive "show tables;"..Uttam Raj
how to do this from eclipse in centos..Uttam Raj

3 Answers

1
votes

As per your requirement you want to create external hive table.But as per your code you are creating internal table(External keyword missing):

 ResultSet res = stmt.executeQuery("create table " + tableName + " (id BIGINT,created_at STRING,source STRING,favorited BOOLEAN,retweet_count INT,retweeted_status STRUCT<text:STRING,user:STRUCT<screen_name:STRING,name:STRING>>,entities STRUCT<urls:ARRAY<STRUCT<expanded_url:STRING>>,user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>,hashtags:ARRAY<STRUCT<text:STRING>>>,text STRING,user STRUCT<screen_name:STRING,name:STRING,friends_count:INT,followers_count:INT,statuses_count:INT,verified:BOOLEAN,utc_offset:INT,time_zone:STRING>,in_reply_to_screen_name STRING) ROW FORMAT SERDE 'com.cloudera.hive.serde.JSONSerDe' LOCATION '/user/flume/tweets'");

Also as per these link you cant able you create external table using Hive JDBC for these you need Hive CliDriver class.But i will suggest to first change the above query in your code and try once.

0
votes

Change Local metastore or remote metastore mode in hive-site.xml .Not Embedded metastore.

http://saurzcode.in/2015/01/configure-mysql-metastore-hive/

0
votes

First you need set user/password for make changes in db structure, then you should change stmt.executeQuery by stmt.execute, because the "create table" query will throws an exception for not return results.