diff --git a/CHANGELOG.md b/CHANGELOG.md index 309aae46..90b4eb27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,15 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - - Added support for `sqlserver` (#196) - +- Added support for `sqlserver` (#196) - `datacontract export --format dbml`: Export to [Database Markup Language (DBML)](https://dbml.dbdiagram.io/home/) (#135) - `datacontract export --format avro`: Now supports config map on field level for logicalTypes and default values [Custom Avro Properties](./README.md#custom-avro-properties) - `datacontract import --format avro`: Now supports importing logicalType and default definition on avro files [Custom Avro Properties](./README.md#custom-avro-properties) +- Support `bigqueryType` for testing BigQuery types ### Fixed - Fixed jsonschema export for models with empty object-typed fields (#218) +- Fixed testing BigQuery tables with BOOL fields ## [0.10.4] - 2024-05-17 diff --git a/datacontract/breaking/breaking.py b/datacontract/breaking/breaking.py index a8bca6d5..d228d734 100644 --- a/datacontract/breaking/breaking.py +++ b/datacontract/breaking/breaking.py @@ -256,13 +256,13 @@ def field_breaking_changes( ) ) continue - - if field_definition_field == "items" and old_field.type == 'array' and new_field.type == 'array': + + if field_definition_field == "items" and old_field.type == "array" and new_field.type == "array": results.extend( field_breaking_changes( old_field=old_value, new_field=new_value, - composition=composition + ['items'], + composition=composition + ["items"], new_path=new_path, include_severities=include_severities, ) diff --git a/datacontract/cli.py b/datacontract/cli.py index e7690747..053f6bc8 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -2,6 +2,7 @@ from importlib import metadata from pathlib import Path from typing import Iterable, Optional +from typing import List import typer from click import Context @@ -10,15 +11,14 @@ from rich.table import Table from typer.core import TyperGroup from typing_extensions import Annotated -from typing import List -from datacontract.catalog.catalog import create_index_html, create_data_contract_html +from datacontract.catalog.catalog import create_index_html, \ + create_data_contract_html from datacontract.data_contract import DataContract -from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException - +from datacontract.init.download_datacontract_file import \ + download_datacontract_file, FileExistsException from datacontract.publish.publish import publish_to_datamesh_manager - console = Console() @@ -230,10 +230,17 @@ class ImportFormat(str, Enum): @app.command(name="import") def import_( format: Annotated[ImportFormat, typer.Option(help="The format of the source file.")], - source: Annotated[Optional[str], typer.Option(help="The path to the file or Glue Database that should be imported.")] = None, + source: Annotated[ + Optional[str], typer.Option(help="The path to the file or Glue Database that should be imported.") + ] = None, bigquery_project: Annotated[Optional[str], typer.Option(help="The bigquery project id.")] = None, bigquery_dataset: Annotated[Optional[str], typer.Option(help="The bigquery dataset id.")] = None, - bigquery_table: Annotated[Optional[List[str]], typer.Option(help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset).")] = None, + bigquery_table: Annotated[ + Optional[List[str]], + typer.Option( + help="List of table ids to import from the bigquery API (repeat for multiple table ids, leave empty for all tables in the dataset)." + ), + ] = None, ): """ Create a data contract from the given source location. Prints to stdout. diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index b6fea588..d2415f1f 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -6,11 +6,13 @@ import yaml from pyspark.sql import SparkSession -from datacontract.breaking.breaking import models_breaking_changes, quality_breaking_changes +from datacontract.breaking.breaking import models_breaking_changes, \ + quality_breaking_changes from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import ( check_that_datacontract_contains_valid_server_configuration, ) -from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema +from datacontract.engines.fastjsonschema.check_jsonschema import \ + check_jsonschema from datacontract.engines.soda.check_soda_execute import check_soda_execute from datacontract.export.avro_converter import to_avro_schema_json from datacontract.export.avro_idl_converter import to_avro_idl @@ -18,6 +20,7 @@ from datacontract.export.dbml_converter import to_dbml_diagram from datacontract.export.dbt_converter import to_dbt_models_yaml, \ to_dbt_sources_yaml, to_dbt_staging_sql +from datacontract.export.go_converter import to_go_types from datacontract.export.great_expectations_converter import \ to_great_expectations from datacontract.export.html_export import to_html @@ -25,16 +28,16 @@ from datacontract.export.odcs_converter import to_odcs_yaml from datacontract.export.protobuf_converter import to_protobuf from datacontract.export.pydantic_converter import to_pydantic_model_str -from datacontract.export.go_converter import to_go_types from datacontract.export.rdf_converter import to_rdf_n3 from datacontract.export.sodacl_converter import to_sodacl_yaml from datacontract.export.sql_converter import to_sql_ddl, to_sql_query from datacontract.export.terraform_converter import to_terraform from datacontract.imports.avro_importer import import_avro -from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json +from datacontract.imports.bigquery_importer import import_bigquery_from_api, \ + import_bigquery_from_json from datacontract.imports.glue_importer import import_glue -from datacontract.imports.sql_importer import import_sql from datacontract.imports.jsonschema_importer import import_jsonschema +from datacontract.imports.sql_importer import import_sql from datacontract.integration.publish_datamesh_manager import \ publish_datamesh_manager from datacontract.integration.publish_opentelemetry import publish_opentelemetry @@ -42,12 +45,17 @@ from datacontract.lint.linters.description_linter import DescriptionLinter from datacontract.lint.linters.example_model_linter import ExampleModelLinter from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter -from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter +from datacontract.lint.linters.field_reference_linter import \ + FieldReferenceLinter from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter -from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter -from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter -from datacontract.model.breaking_change import BreakingChanges, BreakingChange, Severity -from datacontract.model.data_contract_specification import DataContractSpecification, Server +from datacontract.lint.linters.quality_schema_linter import \ + QualityUsesSchemaLinter +from datacontract.lint.linters.valid_constraints_linter import \ + ValidFieldConstraintsLinter +from datacontract.model.breaking_change import BreakingChanges, BreakingChange, \ + Severity +from datacontract.model.data_contract_specification import \ + DataContractSpecification, Server from datacontract.model.exceptions import DataContractException from datacontract.model.run import Run, Check @@ -331,9 +339,13 @@ def export(self, export_format, model: str = "all", rdf_base: str = None, sql_se model_name, model_value = self._check_models_for_export(data_contract, model, export_format) found_server = data_contract.servers.get(self._server) if found_server is None: - raise RuntimeError(f"Export to {export_format} requires selecting a bigquery server from the data contract.") - if found_server.type != 'bigquery': - raise RuntimeError(f"Export to {export_format} requires selecting a bigquery server from the data contract.") + raise RuntimeError( + f"Export to {export_format} requires selecting a bigquery server from the data contract." + ) + if found_server.type != "bigquery": + raise RuntimeError( + f"Export to {export_format} requires selecting a bigquery server from the data contract." + ) return to_bigquery_json(model_name, model_value, found_server) if export_format == "dbml": found_server = data_contract.servers.get(self._server) @@ -392,7 +404,9 @@ def _get_examples_server(self, data_contract, run, tmp_dir): run.log_info(f"Using {server} for testing the examples") return server - def _check_models_for_export(self, data_contract: DataContractSpecification, model: str, export_format: str) -> typing.Tuple[str, str]: + def _check_models_for_export( + self, data_contract: DataContractSpecification, model: str, export_format: str + ) -> typing.Tuple[str, str]: if data_contract.models is None: raise RuntimeError(f"Export to {export_format} requires models in the data contract.") @@ -415,7 +429,14 @@ def _check_models_for_export(self, data_contract: DataContractSpecification, mod return model_name, model_value - def import_from_source(self, format: str, source: typing.Optional[str] = None, bigquery_tables: typing.Optional[typing.List[str]] = None, bigquery_project: typing.Optional[str] = None, bigquery_dataset: typing.Optional[str] = None) -> DataContractSpecification: + def import_from_source( + self, + format: str, + source: typing.Optional[str] = None, + bigquery_tables: typing.Optional[typing.List[str]] = None, + bigquery_project: typing.Optional[str] = None, + bigquery_dataset: typing.Optional[str] = None, + ) -> DataContractSpecification: data_contract_specification = DataContract.init() if format == "sql": @@ -430,7 +451,9 @@ def import_from_source(self, format: str, source: typing.Optional[str] = None, b if source is not None: data_contract_specification = import_bigquery_from_json(data_contract_specification, source) else: - data_contract_specification = import_bigquery_from_api(data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset) + data_contract_specification = import_bigquery_from_api( + data_contract_specification, bigquery_tables, bigquery_project, bigquery_dataset + ) else: print(f"Import format {format} not supported.") diff --git a/datacontract/engines/soda/connections/duckdb.py b/datacontract/engines/soda/connections/duckdb.py index 1b973528..b76c7df2 100644 --- a/datacontract/engines/soda/connections/duckdb.py +++ b/datacontract/engines/soda/connections/duckdb.py @@ -87,8 +87,7 @@ def setup_s3_connection(con, server): s3_endpoint = server.endpointUrl.removeprefix("http://").removeprefix("https://") if server.endpointUrl.startswith("http://"): use_ssl = "false" - url_style = 'path' - + url_style = "path" if s3_access_key_id is not None: con.sql(f""" diff --git a/datacontract/engines/soda/connections/sqlserver.py b/datacontract/engines/soda/connections/sqlserver.py index f4511999..8272bd32 100644 --- a/datacontract/engines/soda/connections/sqlserver.py +++ b/datacontract/engines/soda/connections/sqlserver.py @@ -1,8 +1,10 @@ import os import yaml + from datacontract.model.data_contract_specification import Server + def to_sqlserver_soda_configuration(server: Server) -> str: """Serialize server config to soda configuration. @@ -19,21 +21,21 @@ def to_sqlserver_soda_configuration(server: Server) -> str: encrypt: false trust_server_certificate: false driver: ODBC Driver 18 for SQL Server - """ + """ # with service account key, using an external json file soda_configuration = { f"data_source {server.type}": { "type": "sqlserver", "host": server.host, "port": str(server.port), - "username": os.getenv("DATACONTRACT_SQLSERVER_USERNAME", ''), - "password": os.getenv("DATACONTRACT_SQLSERVER_PASSWORD", ''), + "username": os.getenv("DATACONTRACT_SQLSERVER_USERNAME", ""), + "password": os.getenv("DATACONTRACT_SQLSERVER_PASSWORD", ""), "database": server.database, "schema": server.schema_, "trusted_connection": os.getenv("DATACONTRACT_SQLSERVER_TRUSTED_CONNECTION", False), "trust_server_certificate": os.getenv("DATACONTRACT_SQLSERVER_TRUST_SERVER_CERTIFICATE", False), - "encrypt": os.getenv("DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION", True), - "driver": server.driver + "encrypt": os.getenv("DATACONTRACT_SQLSERVER_ENCRYPTED_CONNECTION", True), + "driver": server.driver, } } diff --git a/datacontract/export/avro_converter.py b/datacontract/export/avro_converter.py index 6d960d2a..a051e14e 100644 --- a/datacontract/export/avro_converter.py +++ b/datacontract/export/avro_converter.py @@ -83,6 +83,7 @@ def to_avro_type(field: Field, field_name: str) -> str | dict: else: return "bytes" + def to_avro_logical_type(type: str) -> str: if type in ["timestamp", "timestamp_tz"]: return "timestamp-millis" @@ -91,4 +92,4 @@ def to_avro_logical_type(type: str) -> str: elif type in ["date"]: return "date" else: - return "" \ No newline at end of file + return "" diff --git a/datacontract/export/bigquery_converter.py b/datacontract/export/bigquery_converter.py index a8e210aa..ca66eb47 100644 --- a/datacontract/export/bigquery_converter.py +++ b/datacontract/export/bigquery_converter.py @@ -5,24 +5,21 @@ from datacontract.model.data_contract_specification import Model, Field, Server from datacontract.model.exceptions import DataContractException + def to_bigquery_json(model_name: str, model_value: Model, server: Server) -> str: bigquery_table = to_bigquery_schema(model_name, model_value, server) return json.dumps(bigquery_table, indent=2) + def to_bigquery_schema(model_name: str, model_value: Model, server: Server) -> dict: return { "kind": "bigquery#table", - "tableReference": { - "datasetId": server.dataset, - "projectId": server.project, - "tableId": model_name - }, + "tableReference": {"datasetId": server.dataset, "projectId": server.project, "tableId": model_name}, "description": model_value.description, - "schema": { - "fields": to_fields_array(model_value.fields) - } + "schema": {"fields": to_fields_array(model_value.fields)}, } + def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]: bq_fields = [] for field_name, field in fields.items(): @@ -32,24 +29,25 @@ def to_fields_array(fields: Dict[str, Field]) -> List[Dict[str, Field]]: def to_field(field_name: str, field: Field) -> dict: - bq_type = map_type_to_bigquery(field.type, field_name) bq_field = { "name": field_name, "type": bq_type, "mode": "REQUIRED" if field.required else "NULLABLE", - "description": field.description + "description": field.description, } # handle arrays - if field.type == 'array': - bq_field["mode"] = 'REPEATED' - if field.items.type == 'object': + if field.type == "array": + bq_field["mode"] = "REPEATED" + if field.items.type == "object": # in case the array type is a complex object, we want to copy all its fields bq_field["fields"] = to_fields_array(field.items.fields) else: # otherwise we make up a structure that gets us a single field of the specified type - bq_field["fields"] = to_fields_array({ f"{field_name}_1": Field(type=field.items.type, required=False, description="")}) + bq_field["fields"] = to_fields_array( + {f"{field_name}_1": Field(type=field.items.type, required=False, description="")} + ) # all of these can carry other fields elif bq_type.lower() in ["record", "struct"]: bq_field["fields"] = to_fields_array(field.fields) @@ -65,6 +63,7 @@ def to_field(field_name: str, field: Field) -> dict: return bq_field + def map_type_to_bigquery(type_str: str, field_name: str) -> str: logger = logging.getLogger(__name__) if type_str.lower() in ["string", "varchar", "text"]: @@ -78,7 +77,7 @@ def map_type_to_bigquery(type_str: str, field_name: str) -> str: elif type_str == "float": return "FLOAT" elif type_str == "boolean": - return "BOOLEAN" + return "BOOL" elif type_str.lower() in ["timestamp", "timestamp_tz"]: return "TIMESTAMP" elif type_str == "date": @@ -94,7 +93,9 @@ def map_type_to_bigquery(type_str: str, field_name: str) -> str: elif type_str == "struct": return "STRUCT" elif type_str == "null": - logger.info(f"Can't properly map {field_name} to bigquery Schema, as 'null' is not supported as a type. Mapping it to STRING.") + logger.info( + f"Can't properly map {field_name} to bigquery Schema, as 'null' is not supported as a type. Mapping it to STRING." + ) return "STRING" else: raise DataContractException( diff --git a/datacontract/export/dbml_converter.py b/datacontract/export/dbml_converter.py index 9f711846..2b917022 100644 --- a/datacontract/export/dbml_converter.py +++ b/datacontract/export/dbml_converter.py @@ -1,14 +1,15 @@ from datetime import datetime from importlib.metadata import version +from typing import Tuple + import pytz -from datacontract.export.sql_type_converter import convert_to_sql_type + import datacontract.model.data_contract_specification as spec -from typing import Tuple +from datacontract.export.sql_type_converter import convert_to_sql_type def to_dbml_diagram(contract: spec.DataContractSpecification, server: spec.Server) -> str: - - result = '' + result = "" result += add_generated_info(contract, server) + "\n" result += generate_project_info(contract) + "\n" @@ -18,18 +19,21 @@ def to_dbml_diagram(contract: spec.DataContractSpecification, server: spec.Serve return result + def add_generated_info(contract: spec.DataContractSpecification, server: spec.Server) -> str: tz = pytz.timezone("UTC") now = datetime.now(tz) formatted_date = now.strftime("%b %d %Y") datacontract_cli_version = get_version() - dialect = 'Logical Datacontract' if server is None else server.type + dialect = "Logical Datacontract" if server is None else server.type generated_info = """ Generated at {0} by datacontract-cli version {1} for datacontract {2} ({3}) version {4} Using {5} Types for the field types - """.format(formatted_date, datacontract_cli_version, contract.info.title, contract.id, contract.info.version, dialect) + """.format( + formatted_date, datacontract_cli_version, contract.info.title, contract.id, contract.info.version, dialect + ) comment = """/* {0} @@ -47,22 +51,25 @@ def add_generated_info(contract: spec.DataContractSpecification, server: spec.Se {1} """.format(comment, note) + def get_version() -> str: try: return version("datacontract_cli") except Exception: return "" + def generate_project_info(contract: spec.DataContractSpecification) -> str: return """Project "{0}" {{ Note: "{1}" }}\n - """.format(contract.info.title, ' '.join(contract.info.description.splitlines())) + """.format(contract.info.title, " ".join(contract.info.description.splitlines())) + def generate_table(model_name: str, model: spec.Model, server: spec.Server) -> str: result = """Table "{0}" {{ Note: "{1}" - """.format(model_name, ' '.join(model.description.splitlines())) + """.format(model_name, " ".join(model.description.splitlines())) references = [] @@ -79,31 +86,31 @@ def generate_table(model_name: str, model: spec.Model, server: spec.Server) -> s if len(references) > 0: for ref in references: result += "Ref: {0}\n".format(ref) - + result += "\n" return result -def generate_field(field_name: str, field: spec.Field, model_name: str, server: spec.Server) -> Tuple[str, str]: +def generate_field(field_name: str, field: spec.Field, model_name: str, server: spec.Server) -> Tuple[str, str]: field_attrs = [] if field.primary: - field_attrs.append('pk') + field_attrs.append("pk") if field.unique: - field_attrs.append('unique') - + field_attrs.append("unique") + if field.required: - field_attrs.append('not null') + field_attrs.append("not null") else: - field_attrs.append('null') + field_attrs.append("null") if field.description: - field_attrs.append('Note: "{0}"'.format(' '.join(field.description.splitlines()))) + field_attrs.append('Note: "{0}"'.format(" ".join(field.description.splitlines()))) field_type = field.type if server is None else convert_to_sql_type(field, server.type) - field_str = '"{0}" "{1}" [{2}]'.format(field_name, field_type, ','.join(field_attrs)) + field_str = '"{0}" "{1}" [{2}]'.format(field_name, field_type, ",".join(field_attrs)) ref_str = None if (field.references) is not None: # we always assume many to one, as datacontract doesn't really give us more info diff --git a/datacontract/export/html_export.py b/datacontract/export/html_export.py index dbf3a843..4cd8f1df 100644 --- a/datacontract/export/html_export.py +++ b/datacontract/export/html_export.py @@ -2,9 +2,9 @@ import logging from importlib.metadata import version +import jinja_partials import pytz import yaml -import jinja_partials from jinja2 import Environment, PackageLoader, select_autoescape from datacontract.model.data_contract_specification import \ diff --git a/datacontract/export/jsonschema_converter.py b/datacontract/export/jsonschema_converter.py index a17a5644..6a48e3b9 100644 --- a/datacontract/export/jsonschema_converter.py +++ b/datacontract/export/jsonschema_converter.py @@ -1,7 +1,8 @@ import json from typing import Dict -from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field +from datacontract.model.data_contract_specification import \ + DataContractSpecification, Model, Field def to_jsonschemas(data_contract_spec: DataContractSpecification): @@ -18,12 +19,11 @@ def to_jsonschema_json(model_key, model_value: Model) -> str: def to_jsonschema(model_key, model_value: Model) -> dict: - - model = { + model = { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "properties": to_properties(model_value.fields), - "required": to_required(model_value.fields) + "required": to_required(model_value.fields), } if model_value.title: model["title"] = model_value.title @@ -89,10 +89,10 @@ def to_property(field: Field) -> dict: if field.classification: property["classification"] = field.classification - # TODO: all constraints return property + def to_required(fields: Dict[str, Field]): required = [] for field_name, field in fields.items(): diff --git a/datacontract/export/sql_type_converter.py b/datacontract/export/sql_type_converter.py index dedb5d79..4e7ca3c0 100644 --- a/datacontract/export/sql_type_converter.py +++ b/datacontract/export/sql_type_converter.py @@ -1,3 +1,4 @@ +from datacontract.export.bigquery_converter import map_type_to_bigquery from datacontract.model.data_contract_specification import Field @@ -12,6 +13,8 @@ def convert_to_sql_type(field: Field, server_type: str) -> str: return convert_to_duckdb(field) elif server_type == "sqlserver": return convert_type_to_sqlserver(field) + elif server_type == "bigquery": + return convert_type_to_bigquery(field) return field.type @@ -148,15 +151,15 @@ def convert_to_duckdb(field: Field) -> None | str: if type is None: return None if type.lower() in ["string", "varchar", "text"]: - return "VARCHAR" # aliases: VARCHAR, CHAR, BPCHAR, STRING, TEXT, VARCHAR(n) STRING(n), TEXT(n) + return "VARCHAR" # aliases: VARCHAR, CHAR, BPCHAR, STRING, TEXT, VARCHAR(n) STRING(n), TEXT(n) if type.lower() in ["timestamp", "timestamp_tz"]: - return "TIMESTAMP WITH TIME ZONE" # aliases: TIMESTAMPTZ + return "TIMESTAMP WITH TIME ZONE" # aliases: TIMESTAMPTZ if type.lower() in ["timestamp_ntz"]: return "DATETIME" # timestamp with microsecond precision (ignores time zone), aliases: TIMESTAMP if type.lower() in ["date"]: return "DATE" if type.lower() in ["time"]: - return "TIME" # TIME WITHOUT TIME ZONE + return "TIME" # TIME WITHOUT TIME ZONE if type.lower() in ["number", "decimal", "numeric"]: # precision and scale not supported by data contract return "DECIMAL" @@ -186,12 +189,12 @@ def convert_type_to_sqlserver(field: Field) -> None | str: return None # If provided sql-server config type, prefer it over default mapping - if sqlserver_type := get_type_config(field, 'sqlserverType'): + if sqlserver_type := get_type_config(field, "sqlserverType"): return sqlserver_type field_type = field_type.lower() if field_type in ["string", "varchar", "text"]: - if field.format == 'uuid': + if field.format == "uuid": return "uniqueidentifier" return "varchar" if field_type in ["timestamp", "timestamp_tz"]: @@ -224,11 +227,26 @@ def convert_type_to_sqlserver(field: Field) -> None | str: if field_type in ["bytes"]: return "binary" if field_type in ["array"]: - raise NotImplementedError('SQLServer does not support array types.') + raise NotImplementedError("SQLServer does not support array types.") return None + +def convert_type_to_bigquery(field: Field) -> None | str: + """Convert from supported datacontract types to equivalent bigquery types""" + field_type = field.type + if not field_type: + return None + + # If provided sql-server config type, prefer it over default mapping + if bigquery_type := get_type_config(field, "bigqueryType"): + return bigquery_type + + field_type = field_type.lower() + return map_type_to_bigquery(field_type, field.title) + + def get_type_config(field: Field, config_attr: str) -> dict[str, str] | None: """Retrieve type configuration if provided in datacontract.""" if not field.config: return None - return field.config.get(config_attr, None) \ No newline at end of file + return field.config.get(config_attr, None) diff --git a/datacontract/imports/avro_importer.py b/datacontract/imports/avro_importer.py index 9a30b282..6265fde1 100644 --- a/datacontract/imports/avro_importer.py +++ b/datacontract/imports/avro_importer.py @@ -37,26 +37,28 @@ def import_avro(data_contract_specification: DataContractSpecification, source: return data_contract_specification + def handle_config_avro_custom_properties(field, imported_field): - if field.get_prop('logicalType') is not None: + if field.get_prop("logicalType") is not None: if imported_field.config is None: imported_field.config = {} - imported_field.config["avroLogicalType"] = field.get_prop('logicalType') - + imported_field.config["avroLogicalType"] = field.get_prop("logicalType") + if field.default is not None: if imported_field.config is None: imported_field.config = {} imported_field.config["avroDefault"] = field.default + def import_record_fields(record_fields): imported_fields = {} for field in record_fields: imported_field = Field() imported_field.required = True imported_field.description = field.doc - + handle_config_avro_custom_properties(field, imported_field) - + # Determine field type and handle nested structures if field.type.type == "record": imported_field.type = "object" diff --git a/datacontract/imports/bigquery_importer.py b/datacontract/imports/bigquery_importer.py index 8ff2b8db..3ab0aad6 100644 --- a/datacontract/imports/bigquery_importer.py +++ b/datacontract/imports/bigquery_importer.py @@ -1,14 +1,16 @@ import json - from typing import List +from google.cloud import bigquery + from datacontract.model.data_contract_specification import \ DataContractSpecification, Model, Field from datacontract.model.exceptions import DataContractException -from google.cloud import bigquery -def import_bigquery_from_json(data_contract_specification: DataContractSpecification, source: str) -> DataContractSpecification: +def import_bigquery_from_json( + data_contract_specification: DataContractSpecification, source: str +) -> DataContractSpecification: try: with open(source, "r") as file: bigquery_schema = json.loads(file.read()) @@ -22,7 +24,13 @@ def import_bigquery_from_json(data_contract_specification: DataContractSpecifica ) return convert_bigquery_schema(data_contract_specification, bigquery_schema) -def import_bigquery_from_api(data_contract_specification: DataContractSpecification, bigquery_tables: List[str], bigquery_project: str, bigquery_dataset: str) -> DataContractSpecification: + +def import_bigquery_from_api( + data_contract_specification: DataContractSpecification, + bigquery_tables: List[str], + bigquery_project: str, + bigquery_dataset: str, +) -> DataContractSpecification: client = bigquery.Client(project=bigquery_project) if bigquery_tables is None: @@ -33,14 +41,14 @@ def import_bigquery_from_api(data_contract_specification: DataContractSpecificat api_table = client.get_table("{}.{}.{}".format(bigquery_project, bigquery_dataset, table)) except ValueError as e: - raise DataContractException( + raise DataContractException( type="schema", result="failed", name="Invalid table name for bigquery API", reason=f"Tablename {table} is invalid for the bigquery API", original_exception=e, engine="datacontract", - ) + ) if api_table is None: raise DataContractException( @@ -55,6 +63,7 @@ def import_bigquery_from_api(data_contract_specification: DataContractSpecificat return data_contract_specification + def fetch_table_names(client: bigquery.Client, dataset: str) -> List[str]: table_names = [] api_tables = client.list_tables(dataset) @@ -63,7 +72,10 @@ def fetch_table_names(client: bigquery.Client, dataset: str) -> List[str]: return table_names -def convert_bigquery_schema(data_contract_specification: DataContractSpecification, bigquery_schema: dict) -> DataContractSpecification: + +def convert_bigquery_schema( + data_contract_specification: DataContractSpecification, bigquery_schema: dict +) -> DataContractSpecification: if data_contract_specification.models is None: data_contract_specification.models = {} @@ -73,10 +85,7 @@ def convert_bigquery_schema(data_contract_specification: DataContractSpecificati # what exactly leads to friendlyName being set table_id = bigquery_schema.get("tableReference").get("tableId") - data_contract_specification.models[table_id] = Model( - fields=fields, - type='table' - ) + data_contract_specification.models[table_id] = Model(fields=fields, type="table") # Copy the description, if it exists if bigquery_schema.get("description") is not None: @@ -88,6 +97,7 @@ def convert_bigquery_schema(data_contract_specification: DataContractSpecificati return data_contract_specification + def import_table_fields(table_fields): imported_fields = {} for field in table_fields: @@ -95,7 +105,7 @@ def import_table_fields(table_fields): imported_fields[field_name] = Field() imported_fields[field_name].required = field.get("mode") == "REQUIRED" imported_fields[field_name].description = field.get("description") - + if field.get("type") == "RECORD": imported_fields[field_name].type = "object" imported_fields[field_name].fields = import_table_fields(field.get("fields")) @@ -106,7 +116,9 @@ def import_table_fields(table_fields): # This is a range of date/datetime/timestamp but multiple values # So we map it to an array imported_fields[field_name].type = "array" - imported_fields[field_name].items = Field(type = map_type_from_bigquery(field["rangeElementType"].get("type"))) + imported_fields[field_name].items = Field( + type=map_type_from_bigquery(field["rangeElementType"].get("type")) + ) else: # primitive type imported_fields[field_name].type = map_type_from_bigquery(field.get("type")) @@ -115,7 +127,7 @@ def import_table_fields(table_fields): # spec it is only valid for strings if field.get("maxLength") is not None: imported_fields[field_name].maxLength = int(field.get("maxLength")) - + if field.get("type") == "NUMERIC" or field.get("type") == "BIGNUMERIC": if field.get("precision") is not None: imported_fields[field_name].precision = int(field.get("precision")) @@ -125,6 +137,7 @@ def import_table_fields(table_fields): return imported_fields + def map_type_from_bigquery(bigquery_type_str: str): if bigquery_type_str == "STRING": return "string" diff --git a/datacontract/imports/jsonschema_importer.py b/datacontract/imports/jsonschema_importer.py index c8ef2977..9ee8f279 100644 --- a/datacontract/imports/jsonschema_importer.py +++ b/datacontract/imports/jsonschema_importer.py @@ -11,55 +11,55 @@ def convert_json_schema_properties(properties, is_definition=False): fields = {} for field_name, field_schema in properties.items(): field_kwargs = {} - field_type = field_schema.get('type') + field_type = field_schema.get("type") # Determine if the field is required and set the type to the non-null option if applicable - if isinstance(field_type, list) and 'null' in field_type: - field_kwargs['required'] = False - non_null_types = [t for t in field_type if t != 'null'] + if isinstance(field_type, list) and "null" in field_type: + field_kwargs["required"] = False + non_null_types = [t for t in field_type if t != "null"] if non_null_types: field_type = non_null_types[0] else: field_type = None else: - field_kwargs['required'] = True + field_kwargs["required"] = True # Set the non-null type if field_type: - field_kwargs['type'] = field_type + field_kwargs["type"] = field_type for key, value in field_schema.items(): match key: - case 'title': - field_kwargs['title'] = value - case 'type': + case "title": + field_kwargs["title"] = value + case "type": pass # type is already handled above - case 'format': - field_kwargs['format'] = value - case 'description': - field_kwargs['description'] = value - case 'pattern': - field_kwargs['pattern'] = value - case 'minLength': - field_kwargs['minLength'] = value - case 'maxLength': - field_kwargs['maxLength'] = value - case 'minimum': - field_kwargs['minimum'] = value - case 'exclusiveMinimum': - field_kwargs['exclusiveMinimum'] = value - case 'maximum': - field_kwargs['maximum'] = value - case 'exclusiveMaximum': - field_kwargs['exclusiveMaximum'] = value - case 'enum': - field_kwargs['enum'] = value - case 'tags': - field_kwargs['tags'] = value - case 'properties': - field_kwargs['fields'] = convert_json_schema_properties(value) - case 'items': - field_kwargs['items'] = convert_json_schema_properties(value) + case "format": + field_kwargs["format"] = value + case "description": + field_kwargs["description"] = value + case "pattern": + field_kwargs["pattern"] = value + case "minLength": + field_kwargs["minLength"] = value + case "maxLength": + field_kwargs["maxLength"] = value + case "minimum": + field_kwargs["minimum"] = value + case "exclusiveMinimum": + field_kwargs["exclusiveMinimum"] = value + case "maximum": + field_kwargs["maximum"] = value + case "exclusiveMaximum": + field_kwargs["exclusiveMaximum"] = value + case "enum": + field_kwargs["enum"] = value + case "tags": + field_kwargs["tags"] = value + case "properties": + field_kwargs["fields"] = convert_json_schema_properties(value) + case "items": + field_kwargs["items"] = convert_json_schema_properties(value) field = Field(**field_kwargs) fields[field_name] = field @@ -78,53 +78,53 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so validator(json_schema) model = Model( - description=json_schema.get('description'), - type=json_schema.get('type'), - title=json_schema.get('title'), - fields=convert_json_schema_properties(json_schema.get('properties', {})) + description=json_schema.get("description"), + type=json_schema.get("type"), + title=json_schema.get("title"), + fields=convert_json_schema_properties(json_schema.get("properties", {})), ) - data_contract_specification.models[json_schema.get('title', 'default_model')] = model - - if 'definitions' in json_schema: - for def_name, def_schema in json_schema['definitions'].items(): + data_contract_specification.models[json_schema.get("title", "default_model")] = model + + if "definitions" in json_schema: + for def_name, def_schema in json_schema["definitions"].items(): definition_kwargs = {} - + for key, value in def_schema.items(): match key: - case 'domain': - definition_kwargs['domain'] = value - case 'title': - definition_kwargs['title'] = value - case 'description': - definition_kwargs['description'] = value - case 'type': - definition_kwargs['type'] = value - case 'enum': - definition_kwargs['enum'] = value - case 'format': - definition_kwargs['format'] = value - case 'minLength': - definition_kwargs['minLength'] = value - case 'maxLength': - definition_kwargs['maxLength'] = value - case 'pattern': - definition_kwargs['pattern'] = value - case 'minimum': - definition_kwargs['minimum'] = value - case 'exclusiveMinimum': - definition_kwargs['exclusiveMinimum'] = value - case 'maximum': - definition_kwargs['maximum'] = value - case 'exclusiveMaximum': - definition_kwargs['exclusiveMaximum'] = value - case 'pii': - definition_kwargs['pii'] = value - case 'classification': - definition_kwargs['classification'] = value - case 'tags': - definition_kwargs['tags'] = value - case 'properties': - definition_kwargs['fields'] = convert_json_schema_properties(value, is_definition=True) + case "domain": + definition_kwargs["domain"] = value + case "title": + definition_kwargs["title"] = value + case "description": + definition_kwargs["description"] = value + case "type": + definition_kwargs["type"] = value + case "enum": + definition_kwargs["enum"] = value + case "format": + definition_kwargs["format"] = value + case "minLength": + definition_kwargs["minLength"] = value + case "maxLength": + definition_kwargs["maxLength"] = value + case "pattern": + definition_kwargs["pattern"] = value + case "minimum": + definition_kwargs["minimum"] = value + case "exclusiveMinimum": + definition_kwargs["exclusiveMinimum"] = value + case "maximum": + definition_kwargs["maximum"] = value + case "exclusiveMaximum": + definition_kwargs["exclusiveMaximum"] = value + case "pii": + definition_kwargs["pii"] = value + case "classification": + definition_kwargs["classification"] = value + case "tags": + definition_kwargs["tags"] = value + case "properties": + definition_kwargs["fields"] = convert_json_schema_properties(value, is_definition=True) definition = Definition(name=def_name, **definition_kwargs) data_contract_specification.definitions[def_name] = definition @@ -134,7 +134,7 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so type="schema", name="Parse json schema", reason=f"Failed to parse json schema from {source}: {e}", - engine="datacontract" + engine="datacontract", ) except Exception as e: @@ -146,5 +146,4 @@ def import_jsonschema(data_contract_specification: DataContractSpecification, so original_exception=e, ) - return data_contract_specification diff --git a/datacontract/imports/sql_importer.py b/datacontract/imports/sql_importer.py index 10e8108c..6248f145 100644 --- a/datacontract/imports/sql_importer.py +++ b/datacontract/imports/sql_importer.py @@ -1,6 +1,7 @@ from simple_ddl_parser import parse_from_file -from datacontract.model.data_contract_specification import DataContractSpecification, Model, Field +from datacontract.model.data_contract_specification import \ + DataContractSpecification, Model, Field def import_sql(data_contract_specification: DataContractSpecification, format: str, source: str): @@ -45,7 +46,7 @@ def map_type_from_sql(sql_type: str): return None sql_type_normed = sql_type.lower().strip() - + if sql_type_normed.startswith("varchar"): return "varchar" elif sql_type_normed.startswith("string"): @@ -69,6 +70,6 @@ def map_type_from_sql(sql_type: str): elif sql_type_normed == "datetime2": return "timestamp_ntz" elif sql_type_normed == "datetimeoffset": - return "timestamp_tz" + return "timestamp_tz" else: return "variant" diff --git a/datacontract/model/data_contract_specification.py b/datacontract/model/data_contract_specification.py index 67799879..33d60d78 100644 --- a/datacontract/model/data_contract_specification.py +++ b/datacontract/model/data_contract_specification.py @@ -120,38 +120,45 @@ class Quality(pyd.BaseModel): type: str = None specification: str | object = None + class Availability(pyd.BaseModel): description: Optional[str] = None percentage: Optional[str] = None + class Retention(pyd.BaseModel): description: Optional[str] = None period: Optional[str] = None unlimited: Optional[bool] = None timestampField: Optional[str] = None + class Latency(pyd.BaseModel): description: Optional[str] = None threshold: Optional[str] = None sourceTimestampField: Optional[str] = None processedTimestampField: Optional[str] = None + class Freshness(pyd.BaseModel): description: Optional[str] = None threshold: Optional[str] = None timestampField: Optional[str] = None + class Frequency(pyd.BaseModel): description: Optional[str] = None type: Optional[str] = None interval: Optional[str] = None cron: Optional[str] = None + class Support(pyd.BaseModel): description: Optional[str] = None time: Optional[str] = None responseTime: Optional[str] = None + class Backup(pyd.BaseModel): description: Optional[str] = None interval: Optional[str] = None @@ -159,6 +166,7 @@ class Backup(pyd.BaseModel): recoveryTime: Optional[str] = None recoveryPoint: Optional[str] = None + class ServiceLevel(pyd.BaseModel): availability: Optional[Availability] = None retention: Optional[Retention] = None @@ -168,6 +176,7 @@ class ServiceLevel(pyd.BaseModel): support: Optional[Support] = None backup: Optional[Backup] = None + class DataContractSpecification(pyd.BaseModel): dataContractSpecification: str = None id: str = None diff --git a/tests/fixtures/bigquery/export/bq.txt b/tests/fixtures/bigquery/export/bq.txt new file mode 100644 index 00000000..a65dbeba --- /dev/null +++ b/tests/fixtures/bigquery/export/bq.txt @@ -0,0 +1,8 @@ +# When using the bq CLI tool to create a table in BigQuery, you only need the schema fields. +jq '.schema.fields' tests/fixtures/bigquery/export/bq_table_schema.json > schema_fields.json + +# set the correct project_id, dataset_id and table_id +bq mk --table project_id:dataset_id.table_id schema_fields.json + +# upload some test data +bq query --use_legacy_sql=false 'INSERT INTO `project_id:dataset_id.table_id` (string_field, required_string_field, maxlength_string_field, maxlength_required_string_field, varchar_field, text_field, bytes_field, int_field, integer_field, long_field, bigint_field, float_field, boolean_field, timestamp_field, timestamp_tz_field, timestamp_ntz_field, date_field, number_field, decimal_field, numeric_field, double_field, null_field, object_field, record_field, struct_field, string_array_field, int_array_field, complex_array_field) VALUES ("sample string", "required string", "sample maxlength string", "required maxlength string", "sample varchar", "sample text", FROM_BASE64("Ynl0ZXMgZGF0YQ=="), 123, 456, 789012345678, 987654321, 123.45, true, "2023-05-26T12:00:00Z", "2023-05-26T12:00:00Z", "12:00:00", "2023-05-26", 12.345, 12.345, 12.345, 12.345, "sample null value", STRUCT("required subfield", "optional subfield"), STRUCT(true, DATE "2023-05-26"), STRUCT(FROM_BASE64("Ynl0ZXMgZGF0YQ=="), 123), [STRUCT("sample string 1")], [STRUCT(123)], [STRUCT(true, BIGNUMERIC "12.345")]), ("another sample", "another required string", "another sample maxlength string", "another required maxlength string", "another sample varchar", "another sample text", FROM_BASE64("YW5vdGhlciBieXRlcyBkYXRh"), 789, 1011, 121314151617, 1617181920, 678.90, false, "2024-05-26T12:00:00Z", "2024-05-26T12:00:00Z", "13:00:00", "2024-05-26", 67.890, 67.890, 67.890, 67.890, "another null value", STRUCT("another required subfield", "another optional subfield"), STRUCT(false, DATE "2024-05-26"), STRUCT(FROM_BASE64("YW5vdGhlciBieXRlcyBkYXRh"), 456), [STRUCT("sample string 2")], [STRUCT(456)], [STRUCT(false, BIGNUMERIC "67.890")]);' diff --git a/tests/test_breaking.py b/tests/test_breaking.py index c370a230..b299539d 100644 --- a/tests/test_breaking.py +++ b/tests/test_breaking.py @@ -223,6 +223,7 @@ def test_definition_updated(): assert "field_description_updated" not in output assert "field_tags_updated" not in output + def test_array_fields_updated(): result = runner.invoke( app, @@ -241,12 +242,12 @@ def test_array_fields_updated(): assert "in models.DataType.fields.Records.items.fields.Field1.pii" in output assert "changed from `false` to `true`" in output - assert "field_classification_updated" in output - assert "changed from `Unclassified` to `classified`" in output - - assert "field_type_updated" in output - assert "changed from `string` to `int`" in output + assert "field_classification_updated" in output + assert "changed from `Unclassified` to `classified`" in output + + assert "field_type_updated" in output + assert "changed from `string` to `int`" in output assert "field_description_removed" not in output assert "field_tags_removed" not in output - assert "field_enum_removed" not in output \ No newline at end of file + assert "field_enum_removed" not in output diff --git a/tests/test_download_datacontract_file.py b/tests/test_download_datacontract_file.py index 72f517b6..5b9d1170 100644 --- a/tests/test_download_datacontract_file.py +++ b/tests/test_download_datacontract_file.py @@ -34,8 +34,9 @@ def test_download_datacontract_file_file_exists(tmp_path): def test_download_datacontract_file_overwrite_file(tmp_path): datacontract_test_path = tmp_path / "datacontract.yaml" runner.invoke(app, ["init", str(datacontract_test_path)]) - result = runner.invoke(app, - ["init", str(datacontract_test_path), "--template", _custom_template_url, "--overwrite"]) + result = runner.invoke( + app, ["init", str(datacontract_test_path), "--template", _custom_template_url, "--overwrite"] + ) assert result.exit_code == 0 _compare_test_datacontract_with(str(datacontract_test_path), _custom_template_url) diff --git a/tests/test_export_avro.py b/tests/test_export_avro.py index 99044cad..a8e744ed 100644 --- a/tests/test_export_avro.py +++ b/tests/test_export_avro.py @@ -5,7 +5,8 @@ from datacontract.cli import app from datacontract.export.avro_converter import to_avro_schema_json -from datacontract.model.data_contract_specification import DataContractSpecification +from datacontract.model.data_contract_specification import \ + DataContractSpecification logging.basicConfig(level=logging.DEBUG, force=True) @@ -26,6 +27,7 @@ def test_to_avro_schema(): assert json.loads(result) == json.loads(expected_avro_schema) + def test_to_avro_schema_with_logicalTypes(): data_contract = DataContractSpecification.from_file("fixtures/avro/export/datacontract_logicalType.yaml") with open("fixtures/avro/export/datacontract_logicalType.avsc") as file: diff --git a/tests/test_export_dbml.py b/tests/test_export_dbml.py index 407daa67..d2584938 100644 --- a/tests/test_export_dbml.py +++ b/tests/test_export_dbml.py @@ -1,6 +1,6 @@ +import logging from datetime import datetime from importlib.metadata import version -import logging import pytz from typer.testing import CliRunner @@ -16,11 +16,15 @@ def test_cli(): result = runner.invoke(app, ["export", "./fixtures/dbml/datacontract.yaml", "--format", "dbml"]) assert result.exit_code == 0 + def test_cli_with_server(): runner = CliRunner() - result = runner.invoke(app, ["export", "./fixtures/dbml/datacontract.yaml", "--format", "dbml", "--server", "production"]) + result = runner.invoke( + app, ["export", "./fixtures/dbml/datacontract.yaml", "--format", "dbml", "--server", "production"] + ) assert result.exit_code == 0 + def test_dbml_export(): data_contract = DataContract(data_contract_file="fixtures/dbml/datacontract.yaml") assert data_contract.lint(enabled_linters="none").has_passed() @@ -80,13 +84,14 @@ def test_dbml_export(): }} Ref: line_items.order_id > orders.order_id """.format(formatted_date, datacontract_cli_version) - + assert result.strip() == expected.strip() + def test_dbml_export_with_server(): - data_contract = DataContract(data_contract_file="fixtures/dbml/datacontract.yaml", server='production') + data_contract = DataContract(data_contract_file="fixtures/dbml/datacontract.yaml", server="production") assert data_contract.lint(enabled_linters="none").has_passed() - + result = data_contract.export("dbml") tz = pytz.timezone("UTC") @@ -142,5 +147,5 @@ def test_dbml_export_with_server(): }} Ref: line_items.order_id > orders.order_id """.format(formatted_date, datacontract_cli_version) - + assert result.strip() == expected.strip() diff --git a/tests/test_export_html.py b/tests/test_export_html.py index 5e245a83..d6514ae6 100644 --- a/tests/test_export_html.py +++ b/tests/test_export_html.py @@ -20,14 +20,17 @@ def test_cli(): def test_cli_with_output(tmp_path: Path): runner = CliRunner() - result = runner.invoke(app, [ - "export", - "./fixtures/export/datacontract.yaml", - "--format", - "html", - "--output", - tmp_path / "datacontract.html" - ]) + result = runner.invoke( + app, + [ + "export", + "./fixtures/export/datacontract.yaml", + "--format", + "html", + "--output", + tmp_path / "datacontract.html", + ], + ) assert result.exit_code == 0 assert os.path.exists(tmp_path / "datacontract.html") diff --git a/tests/test_import_avro.py b/tests/test_import_avro.py index 1dea202b..b6392b5d 100644 --- a/tests/test_import_avro.py +++ b/tests/test_import_avro.py @@ -227,6 +227,7 @@ def test_import_avro_nested_records_with_arrays(): assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + def test_import_avro_logicalTypes(): result = DataContract().import_from_source("avro", "fixtures/avro/data/logicalTypes.avsc") @@ -270,4 +271,4 @@ def test_import_avro_logicalTypes(): """ print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) - assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() \ No newline at end of file + assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() diff --git a/tests/test_import_bigquery.py b/tests/test_import_bigquery.py index 289d1986..49be8f8f 100644 --- a/tests/test_import_bigquery.py +++ b/tests/test_import_bigquery.py @@ -1,8 +1,8 @@ import logging -import yaml +from unittest.mock import patch +import yaml from typer.testing import CliRunner -from unittest.mock import patch from datacontract.cli import app from datacontract.data_contract import DataContract @@ -34,7 +34,8 @@ def test_import_bigquery_schema(): assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() -@patch('google.cloud.bigquery.Client.get_table') + +@patch("google.cloud.bigquery.Client.get_table") def test_import_from_api(mock_client): # Set up mocks # mock_table = Mock() @@ -70,15 +71,15 @@ def test_import_from_api(mock_client): # 'numTotalLogicalBytes': '0', # 'numActiveLogicalBytes': '0', # 'numLongTermLogicalBytes': '0'} - + # mock_client.response_value = mock_table - # Call the API Import + # Call the API Import # result = DataContract().import_from_source(format="bigquery", source=None, tables=["Test_One"], bt_project_id= - # "project_id", bt_dataset_id="dataset_id") + # "project_id", bt_dataset_id="dataset_id") # print("Result:\n", result) # TODO: This really should have a proper test, but I've not been able to set the mocks up # correctly – maybe there's some help to be had? # Anyway, the serialized dict above is a real response as captured from the Bigquery API and should # be sufficient to check the behavior of this way of importing things - assert True is True \ No newline at end of file + assert True is True diff --git a/tests/test_test_examples_json.py b/tests/test_test_examples_json.py index 45edc87a..f2225e42 100644 --- a/tests/test_test_examples_json.py +++ b/tests/test_test_examples_json.py @@ -22,6 +22,7 @@ def test_json(): print(run.result) assert run.result == "passed" + def test_with_service_level(): data_contract = DataContract(data_contract_file="fixtures/examples/datacontract_servicelevels.yaml", examples=True) run = data_contract.test() diff --git a/tests/test_test_s3_json_remote.py b/tests/test_test_s3_json_remote.py index b4703301..ffe6947c 100644 --- a/tests/test_test_s3_json_remote.py +++ b/tests/test_test_s3_json_remote.py @@ -9,6 +9,7 @@ datacontract = "fixtures/s3-json-remote/datacontract.yaml" + # Disabled, as this test fails when another local s3 test runs, not clear why. # Maybe with env variables or the DuckDB connection... def _test_test_s3_json(): diff --git a/tests/test_test_sqlserver.py b/tests/test_test_sqlserver.py index f1b2a429..07169dbf 100644 --- a/tests/test_test_sqlserver.py +++ b/tests/test_test_sqlserver.py @@ -15,6 +15,7 @@ sql_server = SqlServerContainer() SQL_SERVER_PORT: int = 1433 + @pytest.fixture(scope="module", autouse=True) def mssql_container(request): sql_server.start() @@ -24,7 +25,8 @@ def remove_container(): request.addfinalizer(remove_container) -@pytest.mark.skipif(not os.getenv('CI'), reason="Skipping test outside CI/CD environment") + +@pytest.mark.skipif(not os.getenv("CI"), reason="Skipping test outside CI/CD environment") def test_test_sqlserver(mssql_container, monkeypatch): monkeypatch.setenv("DATACONTRACT_SQLSERVER_USERNAME", sql_server.SQLSERVER_USER) monkeypatch.setenv("DATACONTRACT_SQLSERVER_PASSWORD", sql_server.SQLSERVER_PASSWORD)