Avro Schema Compatibility Test

By
on Mar 28, 2017
in Avro

How to make schema compatibility check part of your build time.

Key Take Aways:

Unlike thrift Avro Serialized data has no tag or schema information.
Absence of Schema in avro compels one to know the shema with which it was written in order for it to read correctly.
Schema compatibility is needed to support schema evolution and read and write with different schema.

Lets take Employee Record:

{
    "name": "com.big.data.avro.schema.Employee",
    "type": "record",
    "fields": [{
            "name": "emp_id",
            "type": "int",
            "doc": "employee Id of the employee"
        },
        {
             "name": "emp_name",
             "type": "string",
             "doc": "employee name of the employee"
        },
        {
              "name": "emp_country",
              "type": "string",
              "doc": "country of residence"
        },
        {      "name": "bonus",
               "type": ["null", "long"],
               "default": null,
               "doc": "bonus received on a yearly basis"
        },
       {
               "name": "subordinates",
               "type": ["null", {"type": "map", "values": "string"}],
               "default": null,
               "doc": "map of subordinates Name and Designation"
        },
        {
               "name": "departments",
               "type":["null", {"type":"array", "items":"string" }],
               "default":null,
               "doc": "Departments under the employee"
         }
        ]
 }

[addToAppearHere]

Lets take the scenario of evolving schema and try to see if we can perform schema compatibility test at build time or not.


import org.apache.avro.Schema;
import org.apache.avro.SchemaCompatibility;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;


public class AvroSchemaCompatibilityTest {


    @Test
    public void testCompatibility() throws Exception {


        File avroSchemaPath = new File("src/main/avro/Employee.avsc").getAbsoluteFile();

        // Parse the avsc file and form the Schema Object
        Schema newSchema = new Schema.Parser().parse(avroSchemaPath);

        // Old schema dosent have department array
        Schema oldSchema = new Schema.Parser().parse("{\n" +
                                 "    \"name\": \"com.big.data.avro.schema.Employee\",\n" +
                                 "    \"type\": \"record\",\n" +
                                 "    \"fields\": [{\n" +
                                 "            \"name\": \"emp_id\",\n" +
                                 "            \"type\": \"int\",\n" +
                                 "            \"doc\": \"employee Id of the employee\"\n" +
                                 "        },\n" +
                                 "        {\n" +
                                 "             \"name\": \"emp_name\",\n" +
                                 "             \"type\": \"string\",\n" +
                                 "             \"doc\": \"employee name of the employee\"\n" +
                                 "        },\n" +
                                 "        {\n" +
                                 "              \"name\": \"emp_country\",\n" +
                                 "              \"type\": \"string\",\n" +
                                 "              \"doc\": \"country of residence\"\n" +
                                 "        },\n" +
                                 "        {      \"name\": \"bonus\",\n" +
                                 "               \"type\": [\"null\", \"long\"],\n" +
                                 "               \"default\": null,\n" +
                                 "               \"doc\": \"bonus received on a yearly basis\"\n" +
                                 "        },\n" +
                                 "       {\n" +
                                 "               \"name\": \"subordinates\",\n" +
            "               \"type\": [\"null\", {\"type\": \"map\", \"values\": \"string\"}],\n" +
                                 "               \"default\": null,\n" +
                 "               \"doc\": \"map of subordinates Name and Designation\"\n" +
                                 "        }\n" +
                                 "        ]\n" +
                                 " }");

        // Test compatibility of newSchema as the reader schema, old schema as the writer schema
        SchemaCompatibility.SchemaPairCompatibility compatResult = 
                        SchemaCompatibility.checkReaderWriterCompatibility(newSchema, oldSchema);
        Assert.assertTrue(SchemaCompatibility.schemaNameEquals(newSchema,oldSchema));
        Assert.assertNotNull(compatResult);
        Assert.assertEquals(SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE,
                                                          compatResult.getType());
    }
}

Dependencies

 <dependencies>
        <dependency>
            <groupId>org.apache.avro</groupId>
            <artifactId>avro</artifactId>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.avro</groupId>
                <artifactId>avro-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

Tags: Avro, avro compatibility test, avro definition, Avro Pojo, avro schema evolution