I'm running a python script to get error logs from CloudFront access logs which I might use to execute from Lambda service once it's successful. When I try to run the query I see that the Table name exists on the AWS Athena database, I'm unable to overwritten. So I am not getting the expected output. Below is the script which I am trying to execute.
Is there any other way? any suggestion?
#Athena configuration
s3_ouput = 's3://athena/athenatest/'
database = 's3_accesslog'
table = 'test_output1'
#Athena database and table definition
create_database = "CREATE DATABASE IF NOT EXISTS %s;" % (database)
create_table = \
"""CREATE EXTERNAL TABLE IF NOT EXISTS %s.%s (
`Date` DATE,
Time STRING,
Location STRING,
SCBytes BIGINT,
RequestIP STRING,
Method STRING,
Host STRING,
Uri STRING,
Status INT,
Referrer STRING,
UserAgent STRING,
UriQS STRING,
Cookie STRING,
ResultType STRING,
RequestId STRING,
HostHeader STRING,
Protocol STRING,
CSBytes BIGINT,
TimeTaken FLOAT,
XForwardFor STRING,
SSLProtocol STRING,
SSLCipher STRING,
ResponseResultType STRING,
CSProtocolVersion STRING,
FleStatus STRING,
FleEncryptedFields INT,
CPort INT,
TimeToFirstByte FLOAT,
XEdgeDetailedResult STRING,
ScContent STRING,
ScContentLen BIGINT,
ScRangeStart BIGINT,
ScRangeEnd BIGINT
)
PARTITIONED BY (
`l_shipdate` string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '%s'
TBLPROPERTIES ('skip.header.line.count' = '2');""" % ( database, table, s3_input )
#Query definitions
query_1 = "SELECT * FROM %s.%s where CAST(status AS VARCHAR) like '404';" % (database, table)