I am writing a Cloud Function to:
- Export a Cloud SQL (postgresql) DB to a file in a Cloud Storage bucket
- Import it back into another Cloud SQL instance/DB (still postgresql)
Note:
I want this code to run on its own every night to copy a production DB to a staging environment, so I'm planning to trigger it using Cloud Scheduler.
If you have a better/easier solution to pull this out within GCP I'm all ears :)
Here's my code (the actual function is clone_db
at the bottom of the file):
from os import getenv
from datetime import datetime
from time import sleep
from googleapiclient import discovery
from googleapiclient.errors import HttpError
from oauth2client.client import GoogleCredentials
from google.cloud import storage
GS_BUCKET = getenv("GS_BUCKET")
GS_FOLDER = "sql-exports"
GS_EXPORT_PATH = f"gs://{GS_BUCKET}/{GS_FOLDER}"
def __sql_file_name(db: str, timestamp: datetime):
return f"{db}-{timestamp.strftime('%Y-%m-%d')}.sql.gz"
def __sql_file_uri(db: str, timestamp: datetime):
return f"{GS_EXPORT_PATH}/{__sql_file_name(db, timestamp)}"
def __export_source_db(service, project: str, timestamp: datetime, instance: str, db: str):
context = {
"exportContext": {
"kind": "sql#exportContext",
"fileType": "SQL",
"uri": __sql_file_uri(db, timestamp),
"databases": [db],
}
}
return service.instances().export(project=project, instance=instance, body=context).execute()
def __import_target_db(service, project: str, timestamp: datetime, instance: str, db: str):
context = {
"importContext": {
"kind": "sql#importContext",
"fileType": "SQL",
"uri": __sql_file_uri(db, timestamp),
"database": db,
}
}
return service.instances().import_(project=project, instance=instance, body=context).execute()
def __drop_db(service, project: str, instance: str, db: str):
try:
return service.databases().delete(project=project, instance=instance, database=db).execute()
except HttpError as e:
if e.resp.status == 404:
return {"status": "DONE"}
else:
raise e
def __create_db(service, project: str, instance: str, db: str):
database = {
"name": db,
"project": project,
"instance": instance,
}
return service.databases().insert(project=project, instance=instance, body=database).execute()
def __update_export_permissions(file_name: str):
client = storage.Client()
file = client.get_bucket(GS_BUCKET).get_blob(f"{GS_FOLDER}/{file_name}")
file.acl.user(getenv("TARGET_DB_SERVICE_ACCOUNT")).grant_read()
file.acl.save()
def __delete_sql_file(file_name: str):
client = storage.Client()
bucket = client.get_bucket(GS_BUCKET)
bucket.delete_blob(f"{GS_FOLDER}/{file_name}")
def __wait_for(operation_type, operation, service, project):
if operation["status"] in ("PENDING", "RUNNING", "UNKNOWN"):
print(f"{operation_type} operation in {operation['status']} status. Waiting for completion...")
while operation['status'] != "DONE":
sleep(1)
operation = service.operations().get(project=project, operation=operation['name']).execute()
print(f"{operation_type} operation completed!")
def clone_db(_):
credentials = GoogleCredentials.get_application_default()
service = discovery.build('sqladmin', 'v1beta4', credentials=credentials)
# Project ID of the project that contains the instance to be exported.
project = getenv('PROJECT_ID')
# Cloud SQL instance ID. This does not include the project ID.
source = {
"instance": getenv("SOURCE_INSTANCE_ID"),
"db": getenv("SOURCE_DB_NAME")
}
timestamp = datetime.utcnow()
print(f"Exporting database {source['instance']}:{source['db']} to Cloud Storage...")
operation = __export_source_db(service, project, timestamp, **source)
__wait_for("Export", operation, service, project)
print("Updating exported file permissions...")
__update_export_permissions(__sql_file_name(source["db"], timestamp))
print("Done.")
target = {
"instance": getenv("TARGET_INSTANCE_ID"),
"db": getenv("TARGET_DB_NAME")
}
print(f"Dropping target database {target['instance']}:{target['db']}")
operation = __drop_db(service, project, **target)
__wait_for("Drop", operation, service, project)
print(f"Creating database {target['instance']}:{target['db']}...")
operation = __create_db(service, project, **target)
__wait_for("Creation", operation, service, project)
print(f"Importing data into {target['instance']}:{target['db']}...")
operation = __import_target_db(service, project, timestamp, **target)
__wait_for("Import", operation, service, project)
print("Deleting exported SQL file")
__delete_sql_file(__sql_file_name(source["db"], timestamp))
print("Done.")
Things work perfectly fine until I'm trying to import the exported data into my target instance.
As it calls import_
, the function fails with the following error:
Error: function crashed. Details:
<HttpError 403 when requesting https://www.googleapis.com/sql/v1beta4/projects/<project_id>/instances/<instance_id>/import?alt=json returned "The service account does not have the required permissions for the bucket.">
I have read about this error in many other Q&As here and on the web, but I can't figure out how to make things work.
Here's what I have done:
- The Cloud Function is run as my "Compute Engine default service account", which has the
Project Editor
role set in IAM - The target Cloud SQL instance's service account is added in the bucket's permissions as a
Storage Object Admin
. I have tried various other roles combinations (legacy reader/owner, storage object viewer, ...) to no avail - As you can see in the function's code, I am specifically granting read access to the target instance's service account for the exported file, and it is correctly reflected on the object's permissions in cloud storage:
- I have tried disabling object-level permissions for this bucket and made sure the permissions of my first point above were correctly set, but it did not work either
Interestingly, when I'm trying to manually import the same file on the same instance from the GCP Cloud SQL console, things work perfectly well.
After it's done, I can see that my exported file's permissions have been updated to include the instance's service account as a Reader
, just as I have done in my code in the end to try and reproduce the behaviour.
So what am I missing here?
Which permissions should I set, for which service account, for this to work?