diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml index 7a49f32729ec1f..dc770f7fc83a61 100644 --- a/.github/workflows/check-datahub-jars.yml +++ b/.github/workflows/check-datahub-jars.yml @@ -5,12 +5,12 @@ on: branches: - master paths: - - "metadata-integration" + - "metadata-integration/**" pull_request: branches: - "**" paths: - - "metadata-integration" + - "metadata-integration/**" release: types: [published] @@ -28,15 +28,22 @@ jobs: runs-on: ubuntu-latest steps: - uses: acryldata/sane-checkout-action@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - uses: actions/cache@v4 + with: + path: | + ~/.cache/uv + key: ${{ runner.os }}-uv-${{ hashFiles('**/requirements.txt') }} + - name: Install dependencies + run: ./metadata-ingestion/scripts/install_deps.sh - name: Set up JDK 17 uses: actions/setup-java@v4 with: distribution: "zulu" java-version: 17 - uses: gradle/actions/setup-gradle@v3 - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - name: check ${{ matrix.command }} jar run: | ./gradlew :metadata-integration:java:${{ matrix.command }}:build --info diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index cec3164f10d6cc..42861cf235b56f 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -95,6 +95,11 @@ test { finalizedBy jacocoTestReport } +// no submodule depends on datahub-schematron:cli +// and tests there are the ones checking python-java compatibility +test.dependsOn tasks.getByPath(":metadata-integration:java:datahub-schematron:cli:test") +test.dependsOn tasks.getByPath(":metadata-integration:java:datahub-schematron:lib:test") + task checkShadowJar(type: Exec) { commandLine 'sh', '-c', 'scripts/check_jar.sh' } diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java b/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java new file mode 100644 index 00000000000000..d6522c2d84670f --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java @@ -0,0 +1,942 @@ +package io.datahubproject.schematron.converters.avro; + +import static org.testng.Assert.*; + +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.data.template.StringArray; +import com.linkedin.schema.*; +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.Collections; +import org.apache.avro.Schema; +import org.testng.annotations.*; + +@Test(groups = "unit") +class AvroSchemaConverterTest { + + private AvroSchemaConverter avroSchemaConverter = AvroSchemaConverter.builder().build(); + private DataPlatformUrn dataPlatformUrn = + DataPlatformUrn.createFromString("urn:li:dataPlatform:foo"); + + AvroSchemaConverterTest() throws URISyntaxException {} + + @Test(groups = "basic") + void testPrimitiveTypes() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("primitive_types.avsc"), false, false, dataPlatformUrn, null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 14); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=PrimitiveType].[type=int].intField", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=PrimitiveType].[type=union].intFieldV2", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType() + .setNestedTypes(new StringArray(Collections.singletonList("union")))))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=PrimitiveType].[type=union].[type=int].intFieldV2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=PrimitiveType].[type=null].nullField", + "null", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NullType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=PrimitiveType].[type=union].nullFieldV2", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType() + .setNestedTypes(new StringArray(Collections.singletonList("union")))))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=PrimitiveType].[type=long].longField", + "long", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=PrimitiveType].[type=float].floatField", + "float", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=PrimitiveType].[type=double].doubleField", + "double", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=PrimitiveType].[type=string].stringField", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=PrimitiveType].[type=boolean].booleanField", + "boolean", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=PrimitiveType].[type=int].nullableIntField", + "int", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=PrimitiveType].[type=long].nullableLongField", + "long", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=PrimitiveType].[type=string].nullableStringField", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=PrimitiveType].[type=enum].status", + "Enum", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new EnumType()))); + } + + @Test(groups = "basic") + void testComplexMaps() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("complex_maps.avsc"), false, false, dataPlatformUrn, null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 15); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=MapType].[type=map].mapOfString", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType", + "ComplexType", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("ComplexType")))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType.[type=string].field1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType.[type=int].field2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=MapType].[type=map].[type=union].mapOfNullableString", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("union")))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=string].mapOfNullableString", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=MapType].[type=map].[type=union].mapOfNullableComplexType", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("union")))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType", + "ComplexTypeNullable", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType.[type=string].field1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType.[type=int].field2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=MapType].[type=map].[type=array].mapOfArray", + "array(string)", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("string"))))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=MapType].[type=map].[type=map].mapOfMap", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("int")))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=MapType].[type=map].[type=union].mapOfUnion", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("union")))); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=string].mapOfUnion", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(14), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=int].mapOfUnion", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + } + + @Test(groups = "basic") + void testComplexArrays() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("complex_arrays.avsc"), false, false, dataPlatformUrn, null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 16); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=ArrayType].[type=array].arrayOfString", + "array(string)", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("string"))))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=ArrayType].[type=array].[type=map].arrayOfMap", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord", + "ComplexType", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("ComplexType"))))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord.[type=string].field1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord.[type=int].field2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=ArrayType].[type=array].[type=array].arrayOfArray", + "array(string)", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("string"))))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfUnion", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=string].arrayOfUnion", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=int].arrayOfUnion", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=boolean].arrayOfUnion", + "boolean", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfNullableString", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=string].arrayOfNullableString", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfNullableRecord", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord", + "ComplexTypeNullable", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(14), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord.[type=string].field1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(15), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord.[type=int].field2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + } + + @Test(groups = "basic") + void testComplexStructs() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("complex_structs.avsc"), false, false, dataPlatformUrn, null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 13); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField", + "ComplexStruct", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=string].fieldString", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=int].fieldInt", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=boolean].fieldBoolean", + "boolean", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=map].fieldMap", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord", + "NestedRecord", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord.[type=string].nestedField1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord.[type=int].nestedField2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=array].fieldArray", + "array(string)", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("string"))))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].fieldUnion", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType().setNestedTypes(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].[type=string].fieldUnion", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].[type=int].fieldUnion", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=map].fieldNullableMap", + "map", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + } + + @Test(groups = "basic") + void testComplexUnions() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("complex_unions.avsc"), false, false, dataPlatformUrn, null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 14); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=UnionType].[type=union].fieldUnionNullablePrimitives", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType().setNestedTypes(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=UnionType].[type=union].[type=string].fieldUnionNullablePrimitives", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=UnionType].[type=union].[type=int].fieldUnionNullablePrimitives", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=UnionType].[type=union].[type=boolean].fieldUnionNullablePrimitives", + "boolean", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=UnionType].[type=union].fieldUnionComplexTypes", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType().setNestedTypes(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes", + "NestedRecord", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes.[type=string].nestedField1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes.[type=int].nestedField2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=UnionType].[type=union].[type=map].fieldUnionComplexTypes", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=UnionType].[type=union].fieldUnionPrimitiveAndComplex", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType().setNestedTypes(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=UnionType].[type=union].[type=string].fieldUnionPrimitiveAndComplex", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex", + "ComplexTypeRecord", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex.[type=string].complexField1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex.[type=int].complexField2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + } + + @Test(groups = "basic") + void testLogicalTypes() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("logical_types.avsc"), false, false, dataPlatformUrn, null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 9); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=LogicalTypes].[type=bytes].decimalField", + "bytes(decimal)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())), + "{\"scale\":2,\"logicalType\":\"decimal\",\"precision\":9}"); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=LogicalTypes].[type=bytes].decimalFieldWithoutScale", + "bytes(decimal)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())), + "{\"logicalType\":\"decimal\",\"precision\":9}"); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=LogicalTypes].[type=bytes].decimalFieldWithoutPrecisionAndScale", + "bytes", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType())), + "{\"logicalType\":\"decimal\"}"); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=LogicalTypes].[type=long].timestampMillisField", + "long(timestamp-millis)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"timestamp-millis\"}"); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=LogicalTypes].[type=long].timestampMicrosField", + "long(timestamp-micros)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"timestamp-micros\"}"); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=LogicalTypes].[type=int].dateField", + "int(date)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new DateType())), + "{\"logicalType\":\"date\"}"); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=LogicalTypes].[type=int].timeMillisField", + "int(time-millis)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"time-millis\"}"); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=LogicalTypes].[type=long].timeMicrosField", + "long(time-micros)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"time-micros\"}"); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=LogicalTypes].[type=string].uuidField", + "string(uuid)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())), + "{\"logicalType\":\"uuid\"}"); + } + + @Test(groups = "basic") + void testUsersRecord() throws IOException { + // this is a test case got during the Hudi integration + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("users_record.avsc"), false, false, dataPlatformUrn, null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 20); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=users_record].[type=string]._hoodie_commit_time", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=users_record].[type=string]._hoodie_commit_seqno", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=users_record].[type=string]._hoodie_record_key", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=users_record].[type=string]._hoodie_partition_path", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=users_record].[type=string]._hoodie_file_name", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=users_record].[type=string].user_id", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=users_record].[type=string].name", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=users_record].[type=address].address", + "address", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=users_record].[type=address].address.[type=string].street", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=users_record].[type=address].address.[type=string].city", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=users_record].[type=address].address.[type=string].country", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=users_record].[type=address].address.[type=string].postal_code", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=users_record].[type=address].address.[type=long].created_at", + "long(timestamp-micros)", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"timestamp-micros\"}"); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=users_record].[type=contact].contact", + "contact", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(14), + "[version=2.0].[type=users_record].[type=contact].contact.[type=string].email", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(15), + "[version=2.0].[type=users_record].[type=contact].contact.[type=string].phone", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(16), + "[version=2.0].[type=users_record].[type=long].created_at", + "long(timestamp-micros)", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"timestamp-micros\"}"); + assertSchemaField( + schema.getFields().get(17), + "[version=2.0].[type=users_record].[type=long].updated_at", + "long(timestamp-micros)", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"timestamp-micros\"}"); + assertSchemaField( + schema.getFields().get(18), + "[version=2.0].[type=users_record].[type=map].[type=int].props", + "int", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(19), + "[version=2.0].[type=users_record].[type=string].country", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + } + + private void assertSchemaField( + SchemaField field, + String expectedPath, + String expectedNativeType, + boolean expectedNullable, + boolean expectedIsPartOfKey, + SchemaFieldDataType expectedType) { + assertSchemaField( + field, + expectedPath, + expectedNativeType, + expectedNullable, + expectedIsPartOfKey, + expectedType, + null); + } + + private void assertSchemaField( + SchemaField field, + String expectedPath, + String expectedNativeType, + boolean expectedNullable, + boolean expectedIsPartOfKey, + SchemaFieldDataType expectedType, + String expectedJsonProps) { + assertEquals(field.getFieldPath(), expectedPath); + assertEquals(field.getNativeDataType(), expectedNativeType); + assertEquals(field.isNullable(), expectedNullable); + assertEquals(field.isIsPartOfKey(), expectedIsPartOfKey); + assertEquals(field.getType(), expectedType); + if (expectedJsonProps != null) { + assertEquals(field.getJsonProps(), expectedJsonProps); + } + } + + private Schema readAvroSchema(String schemaFileName) throws IOException { + String schemaPath = getClass().getClassLoader().getResource(schemaFileName).getPath(); + File schemaFile = new File(schemaPath); + return new Schema.Parser().parse(schemaFile); + } +} diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile.avsc deleted file mode 100644 index 81f8b0e54b11e0..00000000000000 --- a/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile.avsc +++ /dev/null @@ -1,456 +0,0 @@ -{ - "type": "record", - "name": "CustomerProfile", - "namespace": "com.example.customer", - "doc": "A complex customer profile schema demonstrating various union types and optional fields", - "fields": [ - { - "name": "customerId", - "type": { - "type": "string", - "logicalType": "uuid" - }, - "doc": "Unique identifier for the customer" - }, - { - "name": "identificationDocument", - "type": [ - "null", - { - "type": "record", - "name": "Passport", - "fields": [ - { - "name": "passportNumber", - "type": "string" - }, - { - "name": "expiryDate", - "type": { - "type": "long", - "logicalType": "date" - } - } - ] - }, - { - "type": "record", - "name": "DriversLicense", - "fields": [ - { - "name": "licenseNumber", - "type": "string" - }, - { - "name": "state", - "type": "string" - }, - { - "name": "validUntil", - "type": { - "type": "long", - "logicalType": "date" - } - } - ] - }, - { - "type": "record", - "name": "NationalID", - "fields": [ - { - "name": "idNumber", - "type": "string" - }, - { - "name": "country", - "type": "string" - } - ] - } - ], - "default": null, - "doc": "Customer's identification document - can be passport, driver's license, or national ID" - }, - { - "name": "contactInfo", - "type": { - "type": "record", - "name": "ContactInformation", - "fields": [ - { - "name": "primaryContact", - "type": [ - { - "type": "record", - "name": "EmailContact", - "fields": [ - { - "name": "emailAddress", - "type": "string" - }, - { - "name": "isVerified", - "type": "boolean", - "default": false - } - ] - }, - { - "type": "record", - "name": "PhoneContact", - "fields": [ - { - "name": "countryCode", - "type": "string" - }, - { - "name": "number", - "type": "string" - }, - { - "name": "type", - "type": { - "type": "enum", - "name": "PhoneType", - "symbols": [ - "MOBILE", - "LANDLINE" - ] - } - } - ] - } - ], - "doc": "Primary contact method - either email or phone" - }, - { - "name": "alternativeContacts", - "type": { - "type": "array", - "items": [ - "null", - "EmailContact", - "PhoneContact" - ] - }, - "default": [], - "doc": "List of alternative contact methods" - } - ] - } - }, - { - "name": "addresses", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Address", - "fields": [ - { - "name": "type", - "type": { - "type": "enum", - "name": "AddressType", - "symbols": [ - "RESIDENTIAL", - "BUSINESS", - "SHIPPING" - ] - }, - "default": "RESIDENTIAL" - }, - { - "name": "street", - "type": "string" - }, - { - "name": "city", - "type": "string" - }, - { - "name": "state", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "country", - "type": "string" - }, - { - "name": "postalCode", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "validationStatus", - "type": [ - "null", - { - "type": "record", - "name": "AddressValidation", - "fields": [ - { - "name": "isValid", - "type": "boolean" - }, - { - "name": "verificationDate", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - }, - { - "name": "verificationMethod", - "type": { - "type": "enum", - "name": "VerificationMethod", - "symbols": [ - "MANUAL", - "AUTOMATED" - ] - } - } - ] - } - ], - "default": null - } - ] - } - }, - "doc": "Customer's addresses with validation information" - }, - { - "name": "preferences", - "type": { - "type": "map", - "values": [ - "null", - "string", - "boolean", - { - "type": "record", - "name": "FrequencyPreference", - "fields": [ - { - "name": "frequency", - "type": { - "type": "enum", - "name": "Frequency", - "symbols": [ - "DAILY", - "WEEKLY", - "MONTHLY" - ] - } - }, - { - "name": "enabled", - "type": "boolean", - "default": true - }, - { - "name": "lastUpdated", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - } - ] - } - ] - }, - "doc": "Customer preferences with various possible value types" - }, - { - "name": "subscriptionHistory", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "Subscription", - "fields": [ - { - "name": "planName", - "type": "string" - }, - { - "name": "startDate", - "type": { - "type": "long", - "logicalType": "date" - } - }, - { - "name": "endDate", - "type": [ - "null", - { - "type": "long", - "logicalType": "date" - } - ], - "default": null - }, - { - "name": "status", - "type": { - "type": "enum", - "name": "SubscriptionStatus", - "symbols": [ - "ACTIVE", - "CANCELLED", - "EXPIRED", - "SUSPENDED" - ] - } - }, - { - "name": "paymentMethod", - "type": [ - "null", - { - "type": "record", - "name": "PaymentMethod", - "fields": [ - { - "name": "type", - "type": { - "type": "enum", - "name": "PaymentType", - "symbols": [ - "CREDIT_CARD", - "DEBIT_CARD", - "BANK_TRANSFER", - "DIGITAL_WALLET" - ] - } - }, - { - "name": "lastFourDigits", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "expiryDate", - "type": [ - "null", - { - "type": "long", - "logicalType": "date" - } - ], - "default": null - } - ] - } - ], - "default": null - } - ] - } - } - ], - "default": null, - "doc": "Historical record of customer subscriptions" - }, - { - "name": "metadata", - "type": { - "type": "map", - "values": [ - "null", - "string", - "long", - "boolean", - { - "type": "record", - "name": "MetadataValue", - "fields": [ - { - "name": "value", - "type": [ - "null", - "string", - "long", - "boolean" - ], - "default": null - }, - { - "name": "timestamp", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - }, - { - "name": "source", - "type": "string" - } - ] - } - ] - }, - "doc": "Flexible metadata storage with various possible value types" - }, - { - "name": "tags", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "Tag", - "fields": [ - { - "name": "name", - "type": "string" - }, - { - "name": "value", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "score", - "type": [ - "null", - "double" - ], - "default": null - }, - { - "name": "addedAt", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - } - ] - } - } - ], - "default": null, - "doc": "Optional tags associated with the customer profile" - } - ] -} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile2.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile2.avsc deleted file mode 100644 index b8c7654ea072a2..00000000000000 --- a/metadata-integration/java/datahub-schematron/lib/src/test/resources/CustomerProfile2.avsc +++ /dev/null @@ -1,244 +0,0 @@ -{ - "type": "record", - "name": "CustomerProfile2", - "namespace": "com.example.customer", - "doc": "A complex customer profile schema demonstrating various union types and optional fields", - "fields": [ - { - "name": "customerId", - "type": { - "type": "string", - "logicalType": "uuid" - }, - "doc": "Unique identifier for the customer" - }, - { - "name": "identificationDocument", - "type": [ - "null", - { - "type": "record", - "name": "Passport", - "fields": [ - { - "name": "passportNumber", - "type": "string" - }, - { - "name": "expiryDate", - "type": { - "type": "long", - "logicalType": "date" - } - } - ] - }, - { - "type": "record", - "name": "DriversLicense", - "fields": [ - { - "name": "licenseNumber", - "type": "string" - }, - { - "name": "state", - "type": "string" - }, - { - "name": "validUntil", - "type": { - "type": "long", - "logicalType": "date" - } - } - ] - }, - { - "type": "record", - "name": "NationalID", - "fields": [ - { - "name": "idNumber", - "type": "string" - }, - { - "name": "country", - "type": "string" - } - ] - } - ], - "default": null, - "doc": "Customer's identification document" - }, - { - "name": "contactInfo", - "type": { - "type": "record", - "name": "ContactInformation", - "fields": [ - { - "name": "primaryEmailContact", - "type": [ - "null", - { - "type": "record", - "name": "PrimaryEmailContact", - "fields": [ - { - "name": "emailAddress", - "type": "string" - }, - { - "name": "isVerified", - "type": "boolean", - "default": false - } - ] - } - ], - "default": null - }, - { - "name": "primaryPhoneContact", - "type": [ - "null", - { - "type": "record", - "name": "PrimaryPhoneContact", - "fields": [ - { - "name": "countryCode", - "type": "string" - }, - { - "name": "number", - "type": "string" - }, - { - "name": "type", - "type": { - "type": "enum", - "name": "PhoneType", - "symbols": [ - "MOBILE", - "LANDLINE" - ] - } - } - ] - } - ], - "default": null - }, - { - "name": "alternativeEmailContacts", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "AlternativeEmailContact", - "fields": [ - { - "name": "emailAddress", - "type": "string" - }, - { - "name": "isVerified", - "type": "boolean", - "default": false - } - ] - } - }, - "default": [] - }, - { - "name": "alternativePhoneContacts", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "AlternativePhoneContact", - "fields": [ - { - "name": "countryCode", - "type": "string" - }, - { - "name": "number", - "type": "string" - }, - { - "name": "type", - "type": "PhoneType" - } - ] - } - }, - "default": [] - } - ] - } - }, - { - "name": "preferences", - "type": { - "type": "record", - "name": "Preferences", - "fields": [ - { - "name": "simplePreferences", - "type": { - "type": "map", - "values": [ - "null", - "string", - "boolean" - ] - }, - "default": {} - }, - { - "name": "frequencyPreferences", - "type": { - "type": "map", - "values": { - "type": "record", - "name": "FrequencyPreference", - "fields": [ - { - "name": "frequency", - "type": { - "type": "enum", - "name": "Frequency", - "symbols": [ - "DAILY", - "WEEKLY", - "MONTHLY" - ] - } - }, - { - "name": "enabled", - "type": "boolean", - "default": true - }, - { - "name": "lastUpdated", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - } - ] - } - }, - "default": {} - } - ] - } - } - ] -} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/FlatUser.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/FlatUser.avsc deleted file mode 100644 index c796878c32ae41..00000000000000 --- a/metadata-integration/java/datahub-schematron/lib/src/test/resources/FlatUser.avsc +++ /dev/null @@ -1,45 +0,0 @@ -{ - "type": "record", - "name": "FlatUser", - "namespace": "com.example", - "fields": [ - { - "name": "id", - "type": "int", - "doc": "The unique identifier for a user", - "default": -1, - "metadata": { - "key1": "value1", - "key2": "value2" - } - }, - { - "name": "username", - "type": "string", - "doc": "The username of the user" - }, - { - "name": "email", - "type": "string", - "doc": "The email of the user" - }, - { - "name": "age", - "type": "int", - "doc": "The age of the user" - }, - { - "name": "isActive", - "type": "boolean", - "doc": "Whether the user is active or not" - }, - { - "name": "registrationDate", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - }, - "doc": "The registration date of the user" - } - ] -} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_arrays.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_arrays.avsc new file mode 100644 index 00000000000000..8e8bcdaa0a7dce --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_arrays.avsc @@ -0,0 +1,87 @@ +{ + "type": "record", + "name": "ArrayType", + "fields": [ + { + "name": "arrayOfString", + "type": { + "type": "array", + "items": "string" + } + }, + { + "name": "arrayOfMap", + "type": { + "type": "array", + "items": { + "type": "map", + "values": "string" + } + } + }, + { + "name": "arrayOfRecord", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "ComplexType", + "fields": [ + { + "name": "field1", + "type": "string" + }, + { + "name": "field2", + "type": "int" + } + ] + } + } + }, + { + "name": "arrayOfArray", + "type": { + "type": "array", + "items": { + "type": "array", + "items": "string" + } + } + }, + { + "name": "arrayOfUnion", + "type": { + "type": "array", + "items": ["string", "int", "boolean"] + } + }, + { + "name": "arrayOfNullableString", + "type": { + "type": "array", + "items": ["null", "string"] + } + }, + { + "name": "arrayOfNullableRecord", + "type": { + "type": "array", + "items": ["null", { + "type": "record", + "name": "ComplexTypeNullable", + "fields": [ + { + "name": "field1", + "type": "string" + }, + { + "name": "field2", + "type": "int" + } + ] + }] + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_maps.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_maps.avsc new file mode 100644 index 00000000000000..baedae1b9dcc15 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_maps.avsc @@ -0,0 +1,87 @@ +{ + "type": "record", + "name": "MapType", + "fields": [ + { + "name": "mapOfString", + "type": { + "type": "map", + "values": "string" + } + }, + { + "name": "mapOfComplexType", + "type": { + "type": "map", + "values": { + "type": "record", + "name": "ComplexType", + "fields": [ + { + "name": "field1", + "type": "string" + }, + { + "name": "field2", + "type": "int" + } + ] + } + } + }, + { + "name": "mapOfNullableString", + "type": { + "type": "map", + "values": ["null", "string"] + } + }, + { + "name": "mapOfNullableComplexType", + "type": { + "type": "map", + "values": ["null", { + "type": "record", + "name": "ComplexTypeNullable", + "fields": [ + { + "name": "field1", + "type": "string" + }, + { + "name": "field2", + "type": "int" + } + ] + }] + } + }, + { + "name": "mapOfArray", + "type": { + "type": "map", + "values": { + "type": "array", + "items": "string" + } + } + }, + { + "name": "mapOfMap", + "type": { + "type": "map", + "values": { + "type": "map", + "values": "int" + } + } + }, + { + "name": "mapOfUnion", + "type": { + "type": "map", + "values": ["null", "string", "int"] + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_structs.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_structs.avsc new file mode 100644 index 00000000000000..7f5824192d3062 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_structs.avsc @@ -0,0 +1,76 @@ +{ + "type": "record", + "name": "StructType", + "fields": [ + { + "name": "structField", + "type": { + "type": "record", + "name": "ComplexStruct", + "fields": [ + { + "name": "fieldString", + "type": "string" + }, + { + "name": "fieldInt", + "type": "int" + }, + { + "name": "fieldBoolean", + "type": "boolean" + }, + { + "name": "fieldMap", + "type": { + "type": "map", + "values": "string" + } + }, + { + "name": "fieldRecord", + "type": { + "type": "record", + "name": "NestedRecord", + "fields": [ + { + "name": "nestedField1", + "type": "string" + }, + { + "name": "nestedField2", + "type": "int" + } + ] + } + }, + { + "name": "fieldArray", + "type": { + "type": "array", + "items": "string" + } + }, + { + "name": "fieldUnion", + "type": [ + "null", + "string", + "int" + ] + }, + { + "name": "fieldNullableMap", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ] + } + ] + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_unions.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_unions.avsc new file mode 100644 index 00000000000000..1a35f1cfa0e6d6 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/complex_unions.avsc @@ -0,0 +1,60 @@ +{ + "type": "record", + "name": "UnionType", + "fields": [ + { + "name": "fieldUnionNullablePrimitives", + "type": [ + "null", + "string", + "int", + "boolean" + ] + }, + { + "name": "fieldUnionComplexTypes", + "type": [ + "null", + { + "type": "record", + "name": "NestedRecord", + "fields": [ + { + "name": "nestedField1", + "type": "string" + }, + { + "name": "nestedField2", + "type": "int" + } + ] + }, + { + "type": "map", + "values": "string" + } + ] + }, + { + "name": "fieldUnionPrimitiveAndComplex", + "type": [ + "null", + "string", + { + "type": "record", + "name": "ComplexTypeRecord", + "fields": [ + { + "name": "complexField1", + "type": "string" + }, + { + "name": "complexField2", + "type": "int" + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/logical_types.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/logical_types.avsc new file mode 100644 index 00000000000000..24919d82149653 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/logical_types.avsc @@ -0,0 +1,72 @@ +{ + "type": "record", + "name": "LogicalTypes", + "fields": [ + { + "name": "decimalField", + "type": { + "type": "bytes", + "logicalType": "decimal", + "precision": 9, + "scale": 2 + } + }, + { + "name": "decimalFieldWithoutScale", + "type": { + "type": "bytes", + "logicalType": "decimal", + "precision": 9 + } + }, + { + "name": "decimalFieldWithoutPrecisionAndScale", + "type": { + "type": "bytes", + "logicalType": "decimal" + } + }, + { + "name": "timestampMillisField", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + } + }, + { + "name": "timestampMicrosField", + "type": { + "type": "long", + "logicalType": "timestamp-micros" + } + }, + { + "name": "dateField", + "type": { + "type": "int", + "logicalType": "date" + } + }, + { + "name": "timeMillisField", + "type": { + "type": "int", + "logicalType": "time-millis" + } + }, + { + "name": "timeMicrosField", + "type": { + "type": "long", + "logicalType": "time-micros" + } + }, + { + "name": "uuidField", + "type": { + "type": "string", + "logicalType": "uuid" + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/primitive_types.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/primitive_types.avsc new file mode 100644 index 00000000000000..c618299748fab1 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/primitive_types.avsc @@ -0,0 +1,62 @@ +{ + "type": "record", + "name": "PrimitiveType", + "fields": [ + { + "name": "intField", + "type": "int" + }, + { + "name": "intFieldV2", + "type": ["int"] + }, + { + "name": "nullField", + "type": "null" + }, + { + "name": "nullFieldV2", + "type": ["null"] + }, + { + "name": "longField", + "type": "long" + }, + { + "name": "floatField", + "type": "float" + }, + { + "name": "doubleField", + "type": "double" + }, + { + "name": "stringField", + "type": "string" + }, + { + "name": "booleanField", + "type": "boolean" + }, + { + "name": "nullableIntField", + "type": ["null", "int"] + }, + { + "name": "nullableLongField", + "type": ["null", "long"] + }, + { + "name": "nullableStringField", + "type": ["null", "string"] + }, + { + "name": "status", + "type": { + "type": "enum", + "name": "StatusEnum", + "symbols": ["ACTIVE", "INACTIVE", "PENDING"] + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/users_record.avsc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/users_record.avsc new file mode 100644 index 00000000000000..bd46ae715a4810 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/users_record.avsc @@ -0,0 +1,195 @@ +{ + "type": "record", + "name": "users_record", + "namespace": "hoodie.users", + "fields": [ + { + "name": "_hoodie_commit_time", + "type": [ + "null", + "string" + ], + "doc": "", + "default": null + }, + { + "name": "_hoodie_commit_seqno", + "type": [ + "null", + "string" + ], + "doc": "", + "default": null + }, + { + "name": "_hoodie_record_key", + "type": [ + "null", + "string" + ], + "doc": "", + "default": null + }, + { + "name": "_hoodie_partition_path", + "type": [ + "null", + "string" + ], + "doc": "", + "default": null + }, + { + "name": "_hoodie_file_name", + "type": [ + "null", + "string" + ], + "doc": "", + "default": null + }, + { + "name": "user_id", + "type": "string" + }, + { + "name": "name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "address", + "type": [ + "null", + { + "type": "record", + "name": "address", + "namespace": "hoodie.users.users_record", + "fields": [ + { + "name": "street", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "city", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "country", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "postal_code", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "created_at", + "type": [ + "null", + { + "type": "long", + "logicalType": "timestamp-micros" + } + ], + "default": null + } + ] + } + ], + "default": null + }, + { + "name": "contact", + "type": [ + "null", + { + "type": "record", + "name": "contact", + "namespace": "hoodie.users.users_record", + "fields": [ + { + "name": "email", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "phone", + "type": [ + "null", + "string" + ], + "default": null + } + ] + } + ], + "default": null + }, + { + "name": "created_at", + "type": [ + "null", + { + "type": "long", + "logicalType": "timestamp-micros" + } + ], + "default": null + }, + { + "name": "updated_at", + "type": [ + "null", + { + "type": "long", + "logicalType": "timestamp-micros" + } + ], + "default": null + }, + { + "name": "props", + "type": [ + "null", + { + "type": "map", + "values": [ + "null", + "int" + ] + } + ], + "default": null + }, + { + "name": "country", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file