2
votes

I'm trying to insert a dataframe into a snowflake table using the pandas connector and am getting permission issues, but using the "normal" snowflake connector works fine.

import snowflake.connector as snow
from snowflake.connector.pandas_tools import *

cur = self.conn.cursor()
my_schema = "my_schema"
my_table = "my_table"
cur.execute(f"""INSERT INTO {my_schema}.{my_table}(load_date, some_id)
 values (current_timestamp, 'xxx')""")
write_pandas(self.conn, daily_epc_df, table_name=my_table, schema=my_schema)

But I'm getting

  File "/Users/abohr/virtualenv/peak38/lib/python3.8/site-packages/snowflake/connector/errors.py", line 85, in default_errorhandler
    raise error_class(
snowflake.connector.errors.ProgrammingError: 001757 (42601): SQL compilation error:
Table '"my_schema"."my_table"' does not exist

the same connection can insert and then doesn't work on the same table.

I also tried

df.to_sql(..., method=pd_writer)

and get

pandas.io.sql.DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting

Why is it talking about sqlite_master if I'm trying to connect to Snowflake? Do the pandas functions require different connections? my libs:

name = "snowflake-connector-python"
version = "2.3.3"
description = "Snowflake Connector for Python"
category = "main"
optional = false
python-versions = ">=3.5"


[[package]]
name = "pandas"
version = "1.0.5"
description = "Powerful data structures for data analysis, time series, and statistics"
category = "main"
optional = false
python-versions = ">=3.6.1"```
on Python 3.8
2
regarding your write_pandas() error - does the table exist? It might also help to provide the pertinent pieces of your code.Mike Walton
You'll need to show code as mike said. Are you sure the role that you are using to log into Snowflake with Python has access to the table? Regarding your df.to_sql issue, check out my answer at the following link which explains why that doesn't work: stackoverflow.com/questions/64505552/…Simon D
@SimonD - so I found the issue after staring at the error message more cosely: '"my_schema"."my_table"' I set quote_identifiers=False on write_to_pandas and it works. Why would the default behavior be to use an incompatible syntax?? Surely I'm doing it wrong?alexP_Keaton

2 Answers

2
votes

I found my issue - I added quote_identifiers=False to write_pandas(self.conn, daily_epc_df, table_name=my_table, schema=my_schema, quote_identifiers=False) and now it works. But seems very wrong that the default behavior would break, I'm still suspicious.

0
votes

Hmm I've done something superior I think to pandas ...

Tell me if you agree ... Snowball

CREATE OR REPLACE PROCEDURE DEMO_DB.PUBLIC.SNOWBALL(
    db_name STRING,
    schema_name STRING,
    snowball_table STRING,
    max_age_days FLOAT,
    limit FLOAT
  )
  RETURNS VARIANT
  LANGUAGE JAVASCRIPT
  COMMENT = 'Collects table and column stats.'
  EXECUTE AS OWNER
  AS
$$
 
var validLimit = Math.max(LIMIT, 0); // prevent SQL syntax error caused by negative numbers
var sqlGenerateInserts = `
WITH snowball_tables AS (
  SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
    FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.TABLES
  ),
snowball_columns AS (
  SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
    FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.COLUMNS
  ),
snowball AS (
  SELECT table_name, MAX(stats_run_date_time) AS stats_run_date_time
    FROM IDENTIFIER(?) -- <<SNOWBALL_TABLE>> table
   GROUP BY table_name
)
 
SELECT full_table_name, aprox_row_count,
    CONCAT (
      'INSERT INTO IDENTIFIER(''', ?, ''') ', -- SNOWBALL table
      '(table_name,total_rows,table_last_altered,table_created,table_bytes,col_name,',
      'col_data_type,col_hll,col_avg_length,col_null_cnt,col_min,col_max,col_top,col_mode,col_avg,stats_run_date_time)',
      'SELECT ''', full_table_name, ''' AS table_name, ',
      table_stats_sql,
      ', ARRAY_CONSTRUCT( ', col_name, ') AS col_name',
      ', ARRAY_CONSTRUCT( ', col_data_type, ') AS col_data_type',
      ', ARRAY_CONSTRUCT( ', col_hll, ') AS col_hll',
      ', ARRAY_CONSTRUCT( ', col_avg_length, ') AS col_avg_length',
      ', ARRAY_CONSTRUCT( ', col_null_cnt, ') AS col_null_cnt',
      ', ARRAY_CONSTRUCT( ', col_min, ') AS col_min',
      ', ARRAY_CONSTRUCT( ', col_max, ') AS col_max',
      ', ARRAY_CONSTRUCT( ', col_top, ') AS col_top',
      ', ARRAY_CONSTRUCT( ', col_MODE, ') AS col_MODE',
      ', ARRAY_CONSTRUCT( ', col_AVG, ') AS col_AVG',
      ', CURRENT_TIMESTAMP() AS stats_run_date_time ',
      ' FROM ', quoted_table_name
    ) AS insert_sql
FROM (
    SELECT
        tbl.full_table_name,
        tbl.row_count AS aprox_row_count,
        CONCAT ( '"', col.table_catalog, '"."',  col.table_schema, '"."',  col.table_name, '"' ) AS quoted_table_name,
        CONCAT (
          'COUNT(1) AS total_rows,''',
          IFNULL( tbl.last_altered::VARCHAR, 'NULL'), ''' AS table_last_altered,''',
          IFNULL( tbl.created::VARCHAR, 'NULL'), ''' AS table_created,',
          IFNULL( tbl.bytes::VARCHAR, 'NULL'), ' AS table_bytes' ) AS table_stats_sql,
        LISTAGG (
          CONCAT ('''', col.full_table_name, '.', col.column_name, '''' ), ', '
          ) AS col_name,
        LISTAGG ( CONCAT('''', col.data_type, '''' ), ', ' ) AS col_data_type,
        LISTAGG ( CONCAT( ' HLL(', '"', col.column_name, '"',') ' ), ', ' ) AS col_hll,
        LISTAGG ( CONCAT( ' AVG(ZEROIFNULL(LENGTH(', '"', col.column_name, '"','))) ' ), ', ' ) AS col_avg_length,
        LISTAGG ( CONCAT( ' SUM( IFF( ', '"', col.column_name, '"',' IS NULL, 1, 0) ) ' ), ', ') AS col_null_cnt,
        LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MODE(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_MODE,
        LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MIN(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', '  ) AS col_min,
        LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MAX(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_max,
        LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' AVG(', '"', col.column_name,'"',') ' ), 'NULL' ), ', ' ) AS col_AVG,
        LISTAGG ( CONCAT ( ' APPROX_TOP_K(', '"', col.column_name, '"', ', 100, 10000)' ), ', ' ) AS col_top
    FROM snowball_tables tbl JOIN snowball_columns col ON col.full_table_name = tbl.full_table_name
    LEFT OUTER JOIN snowball sb ON sb.table_name = tbl.full_table_name
    WHERE (tbl.table_catalog, tbl.table_schema) = (?, ?)
         AND ( sb.table_name IS NULL OR sb.stats_run_date_time < TIMESTAMPADD(DAY, - FLOOR(?), CURRENT_TIMESTAMP()) )
         --AND tbl.row_count > 0 -- NB: also excludes views (table_type = 'VIEW')
    GROUP BY tbl.full_table_name, aprox_row_count, quoted_table_name, table_stats_sql, stats_run_date_time
    ORDER BY stats_run_date_time NULLS FIRST )
    LIMIT ` + validLimit; 
 
var tablesAnalysed = [];
var currentSql;
 
try {
  currentSql = sqlGenerateInserts;
  var generateInserts = snowflake.createStatement( {
          sqlText: currentSql,
          binds: [
              `"${DB_NAME}".information_schema.tables`,
              `"${DB_NAME}".information_schema.columns`,
              SNOWBALL_TABLE, SNOWBALL_TABLE,
              DB_NAME, SCHEMA_NAME, MAX_AGE_DAYS, LIMIT
            ]
        } );
 
  var insertStatements = generateInserts.execute();
  // loop over generated INSERT statements and execute them
  while (insertStatements.next()) {
    var tableName = insertStatements.getColumnValue('FULL_TABLE_NAME');
    currentSql = insertStatements.getColumnValue('INSERT_SQL');
    var insertStatement = snowflake.createStatement( {
            sqlText: currentSql,
            binds: [ SNOWBALL_TABLE ]
          } );
    var insertResult = insertStatement.execute();
    tablesAnalysed.push(tableName);
  }
 
  return { result: "SUCCESS", analysedTables: tablesAnalysed };
 
}
catch (err)  {
  return {
      error: err,
      analysedTables: tablesAnalysed,
      sql: currentSql
  };
}
 
$$;