I try to recreate a schema evolution case with avro-python3 (backward compatibility).
I have two schemas:
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
schema_v1 = avro.schema.Parse("""
{
"type": "record",
"namespace": "com.example",
"name": "CustomerV1",
"fields": [
{ "name": "first_name", "type": "string", "doc": "First Name of Customer" },
{ "name": "last_name", "type": "string", "doc": "Last Name of Customer" },
{ "name": "age", "type": "int", "doc": "Age at the time of registration" },
{ "name": "height", "type": "float", "doc": "Height at the time of registration in cm" },
{ "name": "weight", "type": "float", "doc": "Weight at the time of registration in kg" },
{ "name": "automated_email", "type": "boolean", "default": true, "doc": "Field indicating if the user is enrolled in marketing emails" }
]
}
""")
schema_v2 = avro.schema.Parse("""
{
"type": "record",
"namespace": "com.example",
"name": "CustomerV2",
"fields": [
{ "name": "first_name", "type": "string", "doc": "First Name of Customer" },
{ "name": "last_name", "type": "string", "doc": "Last Name of Customer" },
{ "name": "age", "type": "int", "doc": "Age at the time of registration" },
{ "name": "height", "type": "float", "doc": "Height at the time of registration in cm" },
{ "name": "weight", "type": "float", "doc": "Weight at the time of registration in kg" },
{ "name": "phone_number", "type": ["null", "string"], "default": null, "doc": "optional phone number"},
{ "name": "email", "type": "string", "default": "[email protected]", "doc": "email address"}
]
}
""")
The second schema doesn't have automated_email
field but has two additional fields: phone_number
and email
.
According to avro schema evolution rules if I write an avro record with schema_v1:
writer = DataFileWriter(open("customer_v1.avro", "wb"), DatumWriter(), schema_v1)
writer.append({
"first_name": "John",
"last_name": "Doe",
"age" : 34,
"height": 178.0,
"weight": 75.0,
"automated_email": True
})
writer.close()
... i can read it with schema_v2 provided there are default values for non-existing fields
reader = DataFileReader(open("customer_v1.avro", "rb"), DatumReader(reader_schema=schema_v2))
for field in reader:
print(field)
reader.close()
But I get the following error
SchemaResolutionException: Schemas do not match.
I know this works in Java. This is an example from a video course. Is there a way to make it work in python?