0
votes

I'm trying to get the aggregate values on numeric fields using Apache Drill 1.3.0 without installing Hadoop and Apache Drill on the file system.

Now the issue is- I would like to omit the header when reading a CSV file but I am not able to find any option to tell Apache Drill, please skipFirstLine using java program as I have not installed Apache Drill.

The program terminates with the error and throws NumberFormatException because of the column name.

pom.xml

<dependency>
<groupId>org.apache.drill.exec</groupId>
<artifactId>drill-jdbc</artifactId>
<version>1.3.0</version>
</dependency>

Sample CSV File:

"ffe92de2-e633-4741-bc4b-83e91dce665a.csv"

"A"
"100"

Sample Java Program:

import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;

/*
* Created By: Ashish Pancholi
* On: 16-12-2015
* */

public class DrillExample {
    private static final String DRILL_JDBC_LOCAL_URI = "jdbc:drill:zk=local";
    private Connection con = null;

    public DrillExample() throws SQLException {
        con = new org.apache.drill.jdbc.Driver().connect(DRILL_JDBC_LOCAL_URI, getDefaultProperties());

    }

    public int getArchiveNumFieldSUM(String filepath) throws Exception {
        //query to be executed
        String query = "select sum(cast(columns[0] as int)) from dfs.`"+filepath+"`";
        try {
            // creating statement
            Statement stmt = con.createStatement();
            // executing query
            ResultSet rs = stmt.executeQuery(query);
            // get the number of rows from the result set
            rs.next();
            return rs.getInt(1);
        } catch (Exception ex) {
            throw ex;
        }
    }

    private Properties getDefaultProperties() {
        final Properties properties = new Properties();
        properties.setProperty(org.apache.drill.exec.ExecConstants.HTTP_ENABLE, "false");
        return properties;
    }

    public void shutdownApacheDrill(){
        if (con != null) {
            try {
                con.close();
            } catch (SQLException e) {
                //ignore
            }
        }
    }

    public static void main(String[] arg){
        try {
            int count = 0;
            DrillExample local = new DrillExample();
            String filePath = "C:/Workarea/Project/xyz/Structured Data/ffe92de2-e633-4741-bc4b-83e91dce665a.csv";
            try {
                count = local.getArchiveNumFieldSUM(filePath);
            } catch (Exception e) {
                e.printStackTrace();
            }
            System.out.println(count);
        }catch(Exception ex){
            ex.printStackTrace();
        }
    }

}

I'm getting the following Error:

SYSTEM ERROR: NumberFormatException: A

Fragment 0:0

[Error Id: 72dec3db-8f28-462d-838d-dcb60663e389 on ashish:31010].
java.sql.SQLException: SYSTEM ERROR: NumberFormatException: A

Fragment 0:0

[Error Id: 72dec3db-8f28-462d-838d-dcb60663e389 on ashish:31010]
    at org.apache.drill.jdbc.impl.DrillCursor.nextRowInternally(DrillCursor.java:247)
    at org.apache.drill.jdbc.impl.DrillCursor.loadInitialSchema(DrillCursor.java:290)
    at org.apache.drill.jdbc.impl.DrillResultSetImpl.execute(DrillResultSetImpl.java:1359)
    at org.apache.drill.jdbc.impl.DrillResultSetImpl.execute(DrillResultSetImpl.java:74)
    at net.hydromatic.avatica.AvaticaConnection.executeQueryInternal(AvaticaConnection.java:404)
    at net.hydromatic.avatica.AvaticaStatement.executeQueryInternal(AvaticaStatement.java:351)
    at net.hydromatic.avatica.AvaticaStatement.executeQuery(AvaticaStatement.java:78)
    at org.apache.drill.jdbc.impl.DrillStatementImpl.executeQuery(DrillStatementImpl.java:97)
    at org.openskye.core.structured.DrillExample.getArchiveNumFieldSUM(DrillExample.java:30)
    at org.openskye.core.structured.DrillExample.main(DrillExample.java:61)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at com.intellij.rt.execution.CommandLineWrapper.main(CommandLineWrapper.java:130)
Caused by: org.apache.drill.common.exceptions.UserRemoteException: SYSTEM ERROR: NumberFormatException: A

Fragment 0:0

[Error Id: 72dec3db-8f28-462d-838d-dcb60663e389 on ashish:31010]
    at org.apache.drill.exec.rpc.user.QueryResultHandler.resultArrived(QueryResultHandler.java:118)
    at org.apache.drill.exec.rpc.user.UserClient.handleReponse(UserClient.java:112)
    at org.apache.drill.exec.rpc.BasicClientWithConnection.handle(BasicClientWithConnection.java:47)
    at org.apache.drill.exec.rpc.BasicClientWithConnection.handle(BasicClientWithConnection.java:32)
    at org.apache.drill.exec.rpc.RpcBus.handle(RpcBus.java:69)
    at org.apache.drill.exec.rpc.RpcBus$RequestEvent.run(RpcBus.java:400)
    at org.apache.drill.common.SerializedExecutor$RunnableProcessor.run(SerializedExecutor.java:105)
    at org.apache.drill.exec.rpc.RpcBus$SameExecutor.execute(RpcBus.java:264)
    at org.apache.drill.common.SerializedExecutor.execute(SerializedExecutor.java:142)
    at org.apache.drill.exec.rpc.RpcBus$InboundHandler.decode(RpcBus.java:298)
    at org.apache.drill.exec.rpc.RpcBus$InboundHandler.decode(RpcBus.java:269)
    at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:89)
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
20:07:26.097 [USER-rpc-event-queue] INFO  o.a.d.j.i.DrillResultSetImpl$ResultsListener - [#1] Query failed: 
org.apache.drill.common.exceptions.UserRemoteException: SYSTEM ERROR: NumberFormatException: A

Fragment 0:0

[Error Id: 72dec3db-8f28-462d-838d-dcb60663e389 on ashish:31010]
    at org.apache.drill.exec.rpc.user.QueryResultHandler.resultArrived(QueryResultHandler.java:118) [drill-java-exec-1.3.0.jar:1.3.0]
    at org.apache.drill.exec.rpc.user.UserClient.handleReponse(UserClient.java:112) [drill-java-exec-1.3.0.jar:1.3.0]
    at org.apache.drill.exec.rpc.BasicClientWithConnection.handle(BasicClientWithConnection.java:47) [drill-java-exec-1.3.0.jar:1.3.0]
    at org.apache.drill.exec.rpc.BasicClientWithConnection.handle(BasicClientWithConnection.java:32) [drill-java-exec-1.3.0.jar:1.3.0]
    at org.apache.drill.exec.rpc.RpcBus.handle(RpcBus.java:69) [drill-java-exec-1.3.0.jar:1.3.0]
    at org.apache.drill.exec.rpc.RpcBus$RequestEvent.run(RpcBus.java:400) [drill-java-exec-1.3.0.jar:1.3.0]
    at org.apache.drill.common.SerializedExecutor$RunnableProcessor.run(SerializedExecutor.java:105) [drill-common-1.3.0.jar:1.3.0]
    at org.apache.drill.exec.rpc.RpcBus$SameExecutor.execute(RpcBus.java:264) [drill-java-exec-1.3.0.jar:1.3.0]
    at org.apache.drill.common.SerializedExecutor.execute(SerializedExecutor.java:142) [drill-common-1.3.0.jar:1.3.0]
    at org.apache.drill.exec.rpc.RpcBus$InboundHandler.decode(RpcBus.java:298) [drill-java-exec-1.3.0.jar:1.3.0]
    at org.apache.drill.exec.rpc.RpcBus$InboundHandler.decode(RpcBus.java:269) [drill-java-exec-1.3.0.jar:1.3.0]
    at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:89) [netty-codec-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:254) [netty-handler-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103) [netty-codec-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:242) [netty-codec-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.ChannelInboundHandlerAdapter.channelRead(ChannelInboundHandlerAdapter.java:86) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:847) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354) [netty-transport-4.0.27.Final.jar:4.0.27.Final]
    at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111) [netty-common-4.0.27.Final.jar:4.0.27.Final]
    at java.lang.Thread.run(Thread.java:744) [na:1.7.0_45]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
    at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:254)
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
    at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
    at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:242)
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
    at io.netty.channel.ChannelInboundHandlerAdapter.channelRead(ChannelInboundHandlerAdapter.java:86)
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:339)
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:324)
    at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:847)
    at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)
    at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:511)
    at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:468)
    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:382)
    at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:354)
    at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:111)
    at java.lang.Thread.run(Thread.java:744)
20:07:26.105 [USER-rpc-event-queue] DEBUG o.a.drill.exec.rpc.user.UserClient - Sending response with Sender 1145498900
0

UPDATE:

I found one class which sets SkipFirstLine property but I am not able to understand where I have to pass it?

TextParsingSettings textParsingSettings = new TextParsingSettings();
textParsingSettings.setSkipFirstLine(true);
1

1 Answers

1
votes

Queries that change storage plugin configuration options can now be written with Drill 1.4 (check Release Notes)

You can modify your query:

String query = "SELECT sum(cast(A as int)) FROM TABLE(dfs.`"+filepath+"`(type => 'text', fieldDelimiter => '^', extractHeader => true))";

Note: I am using cast(A as int) not cast(columns[0] as int).

After extracting header, you need to use column names mentioned in header not columns[] to extract column names.