Avro Schema Compatibility Test
How to make schema compatibility check part of your build time.
Key Take Aways:
- Unlike thrift Avro Serialized data has no tag or schema information.
- Absence of Schema in avro compels one to know the shema with which it was written in order for it to read correctly.
- Schema compatibility is needed to support schema evolution and read and write with different schema.
Lets take Employee Record:
{
"name": "com.big.data.avro.schema.Employee",
"type": "record",
"fields": [{
"name": "emp_id",
"type": "int",
"doc": "employee Id of the employee"
},
{
"name": "emp_name",
"type": "string",
"doc": "employee name of the employee"
},
{
"name": "emp_country",
"type": "string",
"doc": "country of residence"
},
{ "name": "bonus",
"type": ["null", "long"],
"default": null,
"doc": "bonus received on a yearly basis"
},
{
"name": "subordinates",
"type": ["null", {"type": "map", "values": "string"}],
"default": null,
"doc": "map of subordinates Name and Designation"
},
{
"name": "departments",
"type":["null", {"type":"array", "items":"string" }],
"default":null,
"doc": "Departments under the employee"
}
]
}
[addToAppearHere]
Lets take the scenario of evolving schema and try to see if we can perform schema compatibility test at build time or not.
import org.apache.avro.Schema;
import org.apache.avro.SchemaCompatibility;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
public class AvroSchemaCompatibilityTest {
@Test
public void testCompatibility() throws Exception {
File avroSchemaPath = new File("src/main/avro/Employee.avsc").getAbsoluteFile();
// Parse the avsc file and form the Schema Object
Schema newSchema = new Schema.Parser().parse(avroSchemaPath);
// Old schema dosent have department array
Schema oldSchema = new Schema.Parser().parse("{\n" +
" \"name\": \"com.big.data.avro.schema.Employee\",\n" +
" \"type\": \"record\",\n" +
" \"fields\": [{\n" +
" \"name\": \"emp_id\",\n" +
" \"type\": \"int\",\n" +
" \"doc\": \"employee Id of the employee\"\n" +
" },\n" +
" {\n" +
" \"name\": \"emp_name\",\n" +
" \"type\": \"string\",\n" +
" \"doc\": \"employee name of the employee\"\n" +
" },\n" +
" {\n" +
" \"name\": \"emp_country\",\n" +
" \"type\": \"string\",\n" +
" \"doc\": \"country of residence\"\n" +
" },\n" +
" { \"name\": \"bonus\",\n" +
" \"type\": [\"null\", \"long\"],\n" +
" \"default\": null,\n" +
" \"doc\": \"bonus received on a yearly basis\"\n" +
" },\n" +
" {\n" +
" \"name\": \"subordinates\",\n" +
" \"type\": [\"null\", {\"type\": \"map\", \"values\": \"string\"}],\n" +
" \"default\": null,\n" +
" \"doc\": \"map of subordinates Name and Designation\"\n" +
" }\n" +
" ]\n" +
" }");
// Test compatibility of newSchema as the reader schema, old schema as the writer schema
SchemaCompatibility.SchemaPairCompatibility compatResult =
SchemaCompatibility.checkReaderWriterCompatibility(newSchema, oldSchema);
Assert.assertTrue(SchemaCompatibility.schemaNameEquals(newSchema,oldSchema));
Assert.assertNotNull(compatResult);
Assert.assertEquals(SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE,
compatResult.getType());
}
}
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
</plugin>
</plugins>
</build>