Skip to content

Commit

Permalink
Merge pull request #2 from harrystech/ynaim94/deng-2960/modify-types-…
Browse files Browse the repository at this point in the history
…airtable

Modifying the singer types and add more tests
  • Loading branch information
ynaim94-harrys authored Jul 8, 2024
2 parents 7176b9e + 30f4afb commit 23cc819
Show file tree
Hide file tree
Showing 8 changed files with 386 additions and 149 deletions.
271 changes: 142 additions & 129 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pytest-cov = "^5.0.0"
types-python-slugify = "^8.0.2.20240310"

commitizen = "^3.26.0"
requests-mock = "^1.12.1"
[tool.commitizen]
name = "cz_conventional_commits"
tag_format = "$version"
Expand Down
9 changes: 8 additions & 1 deletion tap_airtable/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,20 @@ def get_bases(self, base_ids: Optional[list[str]] = None) -> list[AirtableBase]:
AirtableTable(
table["id"],
table["name"],
[AirtableField(field["type"], field["id"], field["name"]) for field in table["fields"]],
[AirtableField(self.map_field_type(field), field["id"], field["name"], field["type"] == "formula") for field in table["fields"]]
)
)
bases.append(AirtableBase(base["id"], base["name"], tables))

return bases

def map_field_type(self, field: dict[str, Any]) -> Any:
field_type = field["type"]
if field_type == "formula" and "result" in field["options"]:
result_type = [field["options"]["result"]["type"], "formula"]
return result_type
return field_type

def get_records(self, base_id: str, table_id: str, page_size: int = 100) -> Iterable[dict[str, Any]]:
offset: dict[str, str] = {}
while True:
Expand Down
15 changes: 12 additions & 3 deletions tap_airtable/entities.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Any, cast
from typing import Any, Union, cast

from singer_sdk import typing as th
from slugify import slugify
Expand All @@ -9,13 +9,19 @@

@dataclass
class AirtableField:
field_type: str
field_type: Union[str, list[str]]
id: str
name: str
is_formula: bool = False

@property
def singer_type(self) -> type[th.JSONTypeHelper[Any]]:
return cast(type[th.JSONTypeHelper[Any]], AIRTABLE_TO_SINGER_MAPPING[self.field_type])
if isinstance(self.field_type, list) and self.is_formula:
# Make it the union of each type in the list
return cast(type[th.JSONTypeHelper[Any]], th.CustomType({'type': [type_ for field_type in self.field_type for type_ in AIRTABLE_TO_SINGER_MAPPING[field_type].type_dict['type']]}))
else:
return cast(type[th.JSONTypeHelper[Any]], AIRTABLE_TO_SINGER_MAPPING[self.field_type])


def to_singer_property(self) -> th.Property[Any]:
return th.Property(slugify(self.name, separator="_"), self.singer_type, required=False)
Expand All @@ -34,6 +40,9 @@ def to_singer_schema(self) -> th.PropertiesList:
*(field.to_singer_property() for field in self.fields),
)

def get_formula_fields(self) -> list[str]:
return [field.name for field in self.fields if field.is_formula]


@dataclass
class AirtableBase:
Expand Down
20 changes: 18 additions & 2 deletions tap_airtable/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,31 @@ class BaseAirtableStream(Stream):
original_airtable_table: AirtableTable
base_id: str
replication_key = None

def get_records(
self, context: Optional[dict[str, Any]]
) -> Iterable[Union[dict[str, Any], tuple[dict[str, Any], dict[str, Any]]]]:
client = AirtableClient(self.config["token"])
formula_fields = self.original_airtable_table.get_formula_fields()

for record in client.get_records(self.base_id, self.original_airtable_table.id):
fields = record.pop("fields", {})
for key, value in fields.items():
if key in formula_fields:
value = self._handle_special_values(value)
fields[key] = value
yield {slugify(key, separator="_"): value for key, value in {**record, **fields}.items()}


def _handle_special_values(self, value: Any) -> Any:
if isinstance(value, dict):
if 'error' in value and value['error'] == '#ERROR!':
value = '#ERROR!'
elif 'specialValue' in value:
if value['specialValue'] == 'NaN':
value = str(float('nan'))
elif value['specialValue'] == 'Infinity':
value = str(float('inf'))
return value

def airtable_stream_factory(table_base_id: str, table: AirtableTable) -> type[BaseAirtableStream]:
class AirtableStream(BaseAirtableStream):
Expand Down
32 changes: 18 additions & 14 deletions tap_airtable/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

from singer_sdk import typing as th

# Define custom types for Airtable-specific fields
AirtableThumbnail = th.ObjectType(
th.Property("url", th.StringType), th.Property("height", th.NumberType), th.Property("width", th.NumberType)
th.Property("url", th.StringType),
th.Property("height", th.NumberType),
th.Property("width", th.NumberType)
)

AirtableThumbnailSet = th.ObjectType(
Expand Down Expand Up @@ -31,39 +34,40 @@
th.Property("profilePicUrl", th.StringType),
)

AIRTABLE_TO_SINGER_MAPPING: dict[str, Any] = {
# Mapping of Airtable field types to Singer field types
AIRTABLE_TO_SINGER_MAPPING: dict[str, th.JSONTypeHelper[Any]] = {
"singleLineText": th.StringType,
"email": th.StringType,
"url": th.StringType,
"multilineText": th.StringType,
"number": th.NumberType,
"percent": th.StringType,
"currency": th.StringType,
"percent": th.NumberType, # Percent values can be treated as numbers
"currency": th.NumberType, # Currency values can be treated as numbers
"singleSelect": th.StringType,
"multipleSelects": th.ArrayType(th.StringType),
"singleCollaborator": th.StringType,
"singleCollaborator": th.StringType, # Simplified to StringType for the ID or name
"multipleCollaborators": th.ArrayType(AirtableCollaborator),
"multipleRecordLinks": th.ArrayType(th.StringType),
"date": th.DateType,
"dateTime": th.DateTimeType,
"phoneNumber": th.StringType,
"multipleAttachments": th.ArrayType(AirtableAttachment),
"checkbox": th.BooleanType,
"formula": th.StringType,
"formula": th.StringType, # Formulas can be of various types; simplified to StringType
"createdTime": th.DateTimeType,
"rollup": th.StringType,
"count": th.StringType,
"lookup": th.StringType,
"rollup": th.CustomType({'type': ['number', 'string']}), # Rollups can be either string or number
"count": th.IntegerType, # Counts are numeric
"lookup": th.StringType, # Lookups can be various types; simplified to StringType
"multipleLookupValues": th.ArrayType(th.StringType),
"autoNumber": th.StringType,
"autoNumber": th.IntegerType, # AutoNumbers are numeric
"barcode": th.StringType,
"rating": th.StringType,
"rating": th.IntegerType, # Ratings are typically numeric
"richText": th.StringType,
"duration": th.StringType,
"duration": th.NumberType, # Duration can be treated as a number (seconds)
"lastModifiedTime": th.DateTimeType,
"button": th.StringType,
"createdBy": th.StringType,
"lastModifiedBy": th.StringType,
"createdBy": th.StringType, # CreatedBy can be a user ID or name
"lastModifiedBy": th.StringType, # LastModifiedBy can be a user ID or name
"externalSyncSource": th.StringType,
"aiText": th.StringType,
}
125 changes: 125 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import pytest
import requests
import requests_mock
from tap_airtable.client import AirtableClient, NonRetryableError

@pytest.fixture
def airtable_client():
return AirtableClient(token="fake_token")

@pytest.fixture
def requests_mock_adapter():
with requests_mock.Mocker() as m:
yield m

def test_get_with_200_response(airtable_client, requests_mock_adapter):
requests_mock_adapter.get("https://api.airtable.com/v0/test_endpoint", text='{"data": "test"}')
response = airtable_client._get("test_endpoint")
assert response.json() == {"data": "test"}

def test_get_with_429_response(airtable_client, requests_mock_adapter):
requests_mock_adapter.get("https://api.airtable.com/v0/test_endpoint", status_code=429)
with pytest.raises(requests.HTTPError):
airtable_client._get("test_endpoint")

def test_get_with_400_response(airtable_client, requests_mock_adapter):
requests_mock_adapter.get("https://api.airtable.com/v0/test_endpoint", status_code=400, text='Bad Request')
with pytest.raises(NonRetryableError):
airtable_client._get("test_endpoint")

def test_get_base_schema(airtable_client, requests_mock_adapter):
mock_response = {"tables": [{"id": "tbl123", "name": "Test Table", "fields": []}]}
requests_mock_adapter.get("https://api.airtable.com/v0/meta/bases/base123/tables", json=mock_response)
schema = airtable_client._get_base_schema("base123")
assert schema == mock_response["tables"]

def test_map_field_type_formula_with_result(airtable_client):
field = {"type": "formula",
"options": {
"isValid": "true",
"formula": "SUM({fldf82WQBqZkvrYWR},{fldiGss4dvzDqPuqr},{fldbJtC0hYnUHSYuy}, {fld99YG0EGM0uvn1q},{fldWlAQ5dZUtq77Vy},{fldjz4RFBLMLQHDxz},{fldQsBEv0OV0yR91f}, {fld8Rysm79Q0TwNhh}, {fldmHDA67quleaFef},{fldLFfTFEPthOEAhf}, {fldJecvcAVQ2R6y9I}, {fldJtx7D2Zt8YvHHY} )",
"referencedFieldIds": [
"fldf82WQBqZkvrYWR",
"fldiGss4dvzDqPuqr",
"fldbJtC0hYnUHSYuy",
"fld99YG0EGM0uvn1q",
"fldWlAQ5dZUtq77Vy",
"fldjz4RFBLMLQHDxz",
"fldQsBEv0OV0yR91f",
"fld8Rysm79Q0TwNhh",
"fldmHDA67quleaFef",
"fldLFfTFEPthOEAhf",
"fldJecvcAVQ2R6y9I",
"fldJtx7D2Zt8YvHHY"
],
"result": {
"type": "currency",
"options": { "precision": 0, "symbol": "$" }
}
},
}
assert airtable_client.map_field_type(field) == ["currency", "formula"]

def test_map_field_type_formula_without_result(airtable_client):
field = {"type": "formula", "options": {}}
assert airtable_client.map_field_type(field) == "formula"

def test_map_field_type_non_formula(airtable_client):
field = {"type": "text"}
assert airtable_client.map_field_type(field) == "text"

# Example test for get_records, assuming pagination and mocking two pages of records
def test_get_records_pagination(airtable_client, requests_mock_adapter):
page_1 = {"records": [{"id": "rec1"}], "offset": "page_2"}
page_2 = {"records": [{"id": "rec2"}]}
requests_mock_adapter.get("https://api.airtable.com/v0/base123/table123", [{'json': page_1, 'status_code': 200}, {'json': page_2, 'status_code': 200}])

records = list(airtable_client.get_records("base123", "table123"))
assert len(records) == 2
assert records[0]["id"] == "rec1"
assert records[1]["id"] == "rec2"

@pytest.fixture
def mock_response():
return {
"bases": [
{"id": "base1", "name": "Base 1"},
{"id": "base2", "name": "Base 2"}
]
}

def test_get_bases_success(airtable_client, mock_response):
with requests_mock.Mocker() as m:
# Mock the initial request for getting bases
m.get("https://api.airtable.com/v0/meta/bases", json=mock_response)

# Add mock responses for each base's schema request
for base in mock_response["bases"]:
mock_base_schema = {"tables": [{"id": "tbl123", "name": "Test Table", "fields": []}]}
m.get(f"https://api.airtable.com/v0/meta/bases/{base['id']}/tables", json=mock_base_schema)

# Now calling get_bases should not raise NoMockAddress
bases = airtable_client.get_bases()
assert len(bases) == 2
assert bases[0].id == "base1"
assert bases[1].id == "base2"

def test_get_bases_missing_ids(airtable_client, mock_response):
with requests_mock.Mocker() as m:
m.get("https://api.airtable.com/v0/meta/bases", json=mock_response)
with pytest.raises(ValueError) as excinfo:
airtable_client.get_bases(base_ids=["base3"])
assert "Base ids missing {'base3'}" in str(excinfo.value)

def test_get_bases_http_error(airtable_client):
with requests_mock.Mocker() as m:
m.get("https://api.airtable.com/v0/meta/bases", status_code=500)
with pytest.raises(requests.HTTPError):
airtable_client.get_bases()

def test_get_bases_non_retryable_error(airtable_client):
with requests_mock.Mocker() as m:
m.get("https://api.airtable.com/v0/meta/bases", status_code=400, text="Bad Request")
with pytest.raises(NonRetryableError) as excinfo:
airtable_client.get_bases()
assert "Server response: 400, Bad Request" in str(excinfo.value)
62 changes: 62 additions & 0 deletions tests/test_streams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import math
import pytest
from unittest.mock import MagicMock, patch
from singer_sdk import Tap

from tap_airtable.streams import airtable_stream_factory
from tap_airtable.entities import AirtableTable, AirtableField

@pytest.fixture
def mock_tap():
return MagicMock()

@pytest.fixture
def airtable_field_normal():
return AirtableField(field_type="singleLineText", id="1", name="NormalField", is_formula=False)

@pytest.fixture
def airtable_field_formula():
return AirtableField(field_type="formula", id="2", name="FormulaField", is_formula=True)

@pytest.fixture
def airtable_table(airtable_field_normal, airtable_field_formula):
return AirtableTable(id="tbl123", name="TestTable", fields=[airtable_field_normal, airtable_field_formula])

@pytest.fixture
def base_airtable_stream(mock_tap, airtable_table):
stream = airtable_stream_factory("app123", airtable_table)
instance = stream(tap=mock_tap)
instance._config = {"token": "fake_token"}
return instance

@pytest.fixture
def mock_airtable_client():
with patch('tap_airtable.streams.AirtableClient') as mock:
yield mock

def test_get_records_normal_field(base_airtable_stream, mock_airtable_client):
mock_airtable_client.return_value.get_records.return_value = [
{"id": "rec1", "fields": {"NormalField": "Value1"}}
]
records = list(base_airtable_stream.get_records(None))
assert len(records) == 1
print(records[0])
assert records[0]["normalfield"] == "Value1"

def test_get_records_formula_field_special_values(base_airtable_stream, mock_airtable_client):
mock_airtable_client.return_value.get_records.return_value = [
{"id": "rec1", "fields": {"FormulaField": {"error": "#ERROR!"}}},
{"id": "rec2", "fields": {"FormulaField": {"specialValue": "NaN"}}},
{"id": "rec3", "fields": {"FormulaField": {"specialValue": "Infinity"}}}
]
records = list(base_airtable_stream.get_records(None))
assert len(records) == 3
assert records[0]["formulafield"] == "#ERROR!"
assert records[1]["formulafield"] == str(float('nan'))
assert records[2]["formulafield"] == str(float('inf'))

def test_handle_special_values(base_airtable_stream):
assert base_airtable_stream._handle_special_values({"error": "#ERROR!"}) == "#ERROR!"
assert base_airtable_stream._handle_special_values({"specialValue": "NaN"}) == str(float('nan'))
assert base_airtable_stream._handle_special_values({"specialValue": "Infinity"}) == str(float('inf'))
assert base_airtable_stream._handle_special_values("NormalValue") == "NormalValue"

0 comments on commit 23cc819

Please sign in to comment.