Currently, the Python Client has no support for loading data from file with a schema auto-detection flag (I plan on doing a pull request to add this support but still I'd like to talk to the maintainers what their opinions are on this implementation).
There are still some ways to work this around. I didn't find a very elegant solution so far but nevertheless this code allows you to add schema detection as input flag:
from google.cloud.bigquery import Client
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'path/your/json.key'
import google.cloud.bigquery.table as mtable
def _configure_job_metadata(metadata,
allow_jagged_rows,
allow_quoted_newlines,
create_disposition,
encoding,
field_delimiter,
ignore_unknown_values,
max_bad_records,
quote_character,
skip_leading_rows,
write_disposition):
load_config = metadata['configuration']['load']
if allow_jagged_rows is not None:
load_config['allowJaggedRows'] = allow_jagged_rows
if allow_quoted_newlines is not None:
load_config['allowQuotedNewlines'] = allow_quoted_newlines
if create_disposition is not None:
load_config['createDisposition'] = create_disposition
if encoding is not None:
load_config['encoding'] = encoding
if field_delimiter is not None:
load_config['fieldDelimiter'] = field_delimiter
if ignore_unknown_values is not None:
load_config['ignoreUnknownValues'] = ignore_unknown_values
if max_bad_records is not None:
load_config['maxBadRecords'] = max_bad_records
if quote_character is not None:
load_config['quote'] = quote_character
if skip_leading_rows is not None:
load_config['skipLeadingRows'] = skip_leading_rows
if write_disposition is not None:
load_config['writeDisposition'] = write_disposition
load_config['autodetect'] = True
mtable._configure_job_metadata = _configure_job_metadata
bq_client = Client()
ds = bq_client.dataset('dataset_name')
ds.table = lambda: mtable.Table('table_name', ds)
table = ds.table()
with open(source_file_name, 'rb') as source_file:
job = table.upload_from_file(
source_file, source_format='text/csv')