Merge pull request #2 from harrystech/ynaim94/deng-2960/modify-types-…

…airtable Modifying the singer types and add more tests
harrystech · Jul 8, 2024 · 23cc819 · 23cc819
2 parents 7176b9e + 30f4afb
commit 23cc819
Show file tree

Hide file tree

Showing 8 changed files with 386 additions and 149 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,7 @@ pytest-cov = "^5.0.0"
 types-python-slugify = "^8.0.2.20240310"
 
 commitizen = "^3.26.0"
+requests-mock = "^1.12.1"
 [tool.commitizen]
 name = "cz_conventional_commits"
 tag_format = "$version"

diff --git a/tap_airtable/client.py b/tap_airtable/client.py
@@ -59,13 +59,20 @@ def get_bases(self, base_ids: Optional[list[str]] = None) -> list[AirtableBase]:
                     AirtableTable(
                         table["id"],
                         table["name"],
-                        [AirtableField(field["type"], field["id"], field["name"]) for field in table["fields"]],
+                        [AirtableField(self.map_field_type(field), field["id"], field["name"], field["type"] == "formula") for field in table["fields"]]
                     )
                 )
             bases.append(AirtableBase(base["id"], base["name"], tables))
 
         return bases
 
+    def map_field_type(self, field: dict[str, Any]) -> Any:
+        field_type = field["type"]
+        if field_type == "formula" and "result" in field["options"]:
+            result_type = [field["options"]["result"]["type"], "formula"]
+            return result_type
+        return field_type
+
     def get_records(self, base_id: str, table_id: str, page_size: int = 100) -> Iterable[dict[str, Any]]:
         offset: dict[str, str] = {}
         while True:

diff --git a/tap_airtable/entities.py b/tap_airtable/entities.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Any, cast
+from typing import Any, Union, cast
 
 from singer_sdk import typing as th
 from slugify import slugify
@@ -9,13 +9,19 @@
 
 @dataclass
 class AirtableField:
-    field_type: str
+    field_type: Union[str, list[str]]
     id: str
     name: str
+    is_formula: bool = False
 
     @property
     def singer_type(self) -> type[th.JSONTypeHelper[Any]]:
-        return cast(type[th.JSONTypeHelper[Any]], AIRTABLE_TO_SINGER_MAPPING[self.field_type])
+        if isinstance(self.field_type, list) and self.is_formula:
+            # Make it the union of each type in the list
+            return cast(type[th.JSONTypeHelper[Any]], th.CustomType({'type': [type_  for field_type in self.field_type for type_ in AIRTABLE_TO_SINGER_MAPPING[field_type].type_dict['type']]}))
+        else:
+            return cast(type[th.JSONTypeHelper[Any]], AIRTABLE_TO_SINGER_MAPPING[self.field_type])
+
 
     def to_singer_property(self) -> th.Property[Any]:
         return th.Property(slugify(self.name, separator="_"), self.singer_type, required=False)
@@ -34,6 +40,9 @@ def to_singer_schema(self) -> th.PropertiesList:
             *(field.to_singer_property() for field in self.fields),
         )
 
+    def get_formula_fields(self) -> list[str]:
+        return [field.name for field in self.fields if field.is_formula]
+
 
 @dataclass
 class AirtableBase:

diff --git a/tap_airtable/streams.py b/tap_airtable/streams.py
@@ -15,15 +15,31 @@ class BaseAirtableStream(Stream):
     original_airtable_table: AirtableTable
     base_id: str
     replication_key = None
-
+    
     def get_records(
         self, context: Optional[dict[str, Any]]
     ) -> Iterable[Union[dict[str, Any], tuple[dict[str, Any], dict[str, Any]]]]:
         client = AirtableClient(self.config["token"])
+        formula_fields = self.original_airtable_table.get_formula_fields()
+
         for record in client.get_records(self.base_id, self.original_airtable_table.id):
             fields = record.pop("fields", {})
+            for key, value in fields.items():
+                if key in formula_fields:
+                    value = self._handle_special_values(value)
+                fields[key] = value
             yield {slugify(key, separator="_"): value for key, value in {**record, **fields}.items()}
-
+
+    def _handle_special_values(self, value: Any) -> Any:
+        if isinstance(value, dict):
+            if 'error' in value and value['error'] == '#ERROR!':
+                value = '#ERROR!'
+            elif 'specialValue' in value:
+                if value['specialValue'] == 'NaN':
+                    value = str(float('nan'))
+                elif value['specialValue'] == 'Infinity':
+                    value = str(float('inf'))
+        return value
 
 def airtable_stream_factory(table_base_id: str, table: AirtableTable) -> type[BaseAirtableStream]:
     class AirtableStream(BaseAirtableStream):

diff --git a/tap_airtable/types.py b/tap_airtable/types.py
@@ -2,8 +2,11 @@
 
 from singer_sdk import typing as th
 
+# Define custom types for Airtable-specific fields
 AirtableThumbnail = th.ObjectType(
-    th.Property("url", th.StringType), th.Property("height", th.NumberType), th.Property("width", th.NumberType)
+    th.Property("url", th.StringType), 
+    th.Property("height", th.NumberType), 
+    th.Property("width", th.NumberType)
 )
 
 AirtableThumbnailSet = th.ObjectType(
@@ -31,39 +34,40 @@
     th.Property("profilePicUrl", th.StringType),
 )
 
-AIRTABLE_TO_SINGER_MAPPING: dict[str, Any] = {
+# Mapping of Airtable field types to Singer field types
+AIRTABLE_TO_SINGER_MAPPING: dict[str, th.JSONTypeHelper[Any]] = {
     "singleLineText": th.StringType,
     "email": th.StringType,
     "url": th.StringType,
     "multilineText": th.StringType,
     "number": th.NumberType,
-    "percent": th.StringType,
-    "currency": th.StringType,
+    "percent": th.NumberType,  # Percent values can be treated as numbers
+    "currency": th.NumberType,  # Currency values can be treated as numbers
     "singleSelect": th.StringType,
     "multipleSelects": th.ArrayType(th.StringType),
-    "singleCollaborator": th.StringType,
+    "singleCollaborator": th.StringType,  # Simplified to StringType for the ID or name
     "multipleCollaborators": th.ArrayType(AirtableCollaborator),
     "multipleRecordLinks": th.ArrayType(th.StringType),
     "date": th.DateType,
     "dateTime": th.DateTimeType,
     "phoneNumber": th.StringType,
     "multipleAttachments": th.ArrayType(AirtableAttachment),
     "checkbox": th.BooleanType,
-    "formula": th.StringType,
+    "formula": th.StringType,  # Formulas can be of various types; simplified to StringType
     "createdTime": th.DateTimeType,
-    "rollup": th.StringType,
-    "count": th.StringType,
-    "lookup": th.StringType,
+    "rollup": th.CustomType({'type': ['number', 'string']}),  # Rollups can be either string or number
+    "count": th.IntegerType,  # Counts are numeric
+    "lookup": th.StringType,  # Lookups can be various types; simplified to StringType
     "multipleLookupValues": th.ArrayType(th.StringType),
-    "autoNumber": th.StringType,
+    "autoNumber": th.IntegerType,  # AutoNumbers are numeric
     "barcode": th.StringType,
-    "rating": th.StringType,
+    "rating": th.IntegerType,  # Ratings are typically numeric
     "richText": th.StringType,
-    "duration": th.StringType,
+    "duration": th.NumberType,  # Duration can be treated as a number (seconds)
     "lastModifiedTime": th.DateTimeType,
     "button": th.StringType,
-    "createdBy": th.StringType,
-    "lastModifiedBy": th.StringType,
+    "createdBy": th.StringType,  # CreatedBy can be a user ID or name
+    "lastModifiedBy": th.StringType,  # LastModifiedBy can be a user ID or name
     "externalSyncSource": th.StringType,
     "aiText": th.StringType,
 }
diff --git a/tests/test_client.py b/tests/test_client.py
@@ -0,0 +1,125 @@
+import pytest
+import requests
+import requests_mock
+from tap_airtable.client import AirtableClient, NonRetryableError
+
+@pytest.fixture
+def airtable_client():
+    return AirtableClient(token="fake_token")
+
+@pytest.fixture
+def requests_mock_adapter():
+    with requests_mock.Mocker() as m:
+        yield m
+
+def test_get_with_200_response(airtable_client, requests_mock_adapter):
+    requests_mock_adapter.get("https://api.airtable.com/v0/test_endpoint", text='{"data": "test"}')
+    response = airtable_client._get("test_endpoint")
+    assert response.json() == {"data": "test"}
+
+def test_get_with_429_response(airtable_client, requests_mock_adapter):
+    requests_mock_adapter.get("https://api.airtable.com/v0/test_endpoint", status_code=429)
+    with pytest.raises(requests.HTTPError):
+        airtable_client._get("test_endpoint")
+
+def test_get_with_400_response(airtable_client, requests_mock_adapter):
+    requests_mock_adapter.get("https://api.airtable.com/v0/test_endpoint", status_code=400, text='Bad Request')
+    with pytest.raises(NonRetryableError):
+        airtable_client._get("test_endpoint")
+
+def test_get_base_schema(airtable_client, requests_mock_adapter):
+    mock_response = {"tables": [{"id": "tbl123", "name": "Test Table", "fields": []}]}
+    requests_mock_adapter.get("https://api.airtable.com/v0/meta/bases/base123/tables", json=mock_response)
+    schema = airtable_client._get_base_schema("base123")
+    assert schema == mock_response["tables"]
+
+def test_map_field_type_formula_with_result(airtable_client):
+    field = {"type": "formula",
+            "options": {
+                "isValid": "true",
+                "formula": "SUM({fldf82WQBqZkvrYWR},{fldiGss4dvzDqPuqr},{fldbJtC0hYnUHSYuy}, {fld99YG0EGM0uvn1q},{fldWlAQ5dZUtq77Vy},{fldjz4RFBLMLQHDxz},{fldQsBEv0OV0yR91f}, {fld8Rysm79Q0TwNhh}, {fldmHDA67quleaFef},{fldLFfTFEPthOEAhf}, {fldJecvcAVQ2R6y9I}, {fldJtx7D2Zt8YvHHY} )",
+                "referencedFieldIds": [
+                    "fldf82WQBqZkvrYWR",
+                    "fldiGss4dvzDqPuqr",
+                    "fldbJtC0hYnUHSYuy",
+                    "fld99YG0EGM0uvn1q",
+                    "fldWlAQ5dZUtq77Vy",
+                    "fldjz4RFBLMLQHDxz",
+                    "fldQsBEv0OV0yR91f",
+                    "fld8Rysm79Q0TwNhh",
+                    "fldmHDA67quleaFef",
+                    "fldLFfTFEPthOEAhf",
+                    "fldJecvcAVQ2R6y9I",
+                    "fldJtx7D2Zt8YvHHY"
+                ],
+                "result": {
+                    "type": "currency",
+                    "options": { "precision": 0, "symbol": "$" }
+                }
+            },
+    }
+    assert airtable_client.map_field_type(field) == ["currency", "formula"]
+
+def test_map_field_type_formula_without_result(airtable_client):
+    field = {"type": "formula", "options": {}}
+    assert airtable_client.map_field_type(field) == "formula"
+
+def test_map_field_type_non_formula(airtable_client):
+    field = {"type": "text"}
+    assert airtable_client.map_field_type(field) == "text"
+
+# Example test for get_records, assuming pagination and mocking two pages of records
+def test_get_records_pagination(airtable_client, requests_mock_adapter):
+    page_1 = {"records": [{"id": "rec1"}], "offset": "page_2"}
+    page_2 = {"records": [{"id": "rec2"}]}
+    requests_mock_adapter.get("https://api.airtable.com/v0/base123/table123", [{'json': page_1, 'status_code': 200}, {'json': page_2, 'status_code': 200}])
+
+    records = list(airtable_client.get_records("base123", "table123"))
+    assert len(records) == 2
+    assert records[0]["id"] == "rec1"
+    assert records[1]["id"] == "rec2"
+
+@pytest.fixture
+def mock_response():
+    return {
+        "bases": [
+            {"id": "base1", "name": "Base 1"},
+            {"id": "base2", "name": "Base 2"}
+        ]
+    }
+
+def test_get_bases_success(airtable_client, mock_response):
+    with requests_mock.Mocker() as m:
+        # Mock the initial request for getting bases
+        m.get("https://api.airtable.com/v0/meta/bases", json=mock_response)
+
+        # Add mock responses for each base's schema request
+        for base in mock_response["bases"]:
+            mock_base_schema = {"tables": [{"id": "tbl123", "name": "Test Table", "fields": []}]}
+            m.get(f"https://api.airtable.com/v0/meta/bases/{base['id']}/tables", json=mock_base_schema)
+
+        # Now calling get_bases should not raise NoMockAddress
+        bases = airtable_client.get_bases()
+        assert len(bases) == 2
+        assert bases[0].id == "base1"
+        assert bases[1].id == "base2"
+
+def test_get_bases_missing_ids(airtable_client, mock_response):
+    with requests_mock.Mocker() as m:
+        m.get("https://api.airtable.com/v0/meta/bases", json=mock_response)
+        with pytest.raises(ValueError) as excinfo:
+            airtable_client.get_bases(base_ids=["base3"])
+        assert "Base ids missing {'base3'}" in str(excinfo.value)
+
+def test_get_bases_http_error(airtable_client):
+    with requests_mock.Mocker() as m:
+        m.get("https://api.airtable.com/v0/meta/bases", status_code=500)
+        with pytest.raises(requests.HTTPError):
+            airtable_client.get_bases()
+
+def test_get_bases_non_retryable_error(airtable_client):
+    with requests_mock.Mocker() as m:
+        m.get("https://api.airtable.com/v0/meta/bases", status_code=400, text="Bad Request")
+        with pytest.raises(NonRetryableError) as excinfo:
+            airtable_client.get_bases()
+        assert "Server response: 400, Bad Request" in str(excinfo.value)
diff --git a/tests/test_streams.py b/tests/test_streams.py
@@ -0,0 +1,62 @@
+import math
+import pytest
+from unittest.mock import MagicMock, patch
+from singer_sdk import Tap
+
+from tap_airtable.streams import airtable_stream_factory
+from tap_airtable.entities import AirtableTable, AirtableField
+
+@pytest.fixture
+def mock_tap():
+    return MagicMock()
+
+@pytest.fixture
+def airtable_field_normal():
+    return AirtableField(field_type="singleLineText", id="1", name="NormalField", is_formula=False)
+
+@pytest.fixture
+def airtable_field_formula():
+    return AirtableField(field_type="formula", id="2", name="FormulaField", is_formula=True)
+
+@pytest.fixture
+def airtable_table(airtable_field_normal, airtable_field_formula):
+    return AirtableTable(id="tbl123", name="TestTable", fields=[airtable_field_normal, airtable_field_formula])
+
+@pytest.fixture
+def base_airtable_stream(mock_tap, airtable_table):
+    stream = airtable_stream_factory("app123", airtable_table)
+    instance = stream(tap=mock_tap)
+    instance._config = {"token": "fake_token"}
+    return instance
+
+@pytest.fixture
+def mock_airtable_client():
+    with patch('tap_airtable.streams.AirtableClient') as mock:
+        yield mock
+
+def test_get_records_normal_field(base_airtable_stream, mock_airtable_client):
+    mock_airtable_client.return_value.get_records.return_value = [
+        {"id": "rec1", "fields": {"NormalField": "Value1"}}
+    ]
+    records = list(base_airtable_stream.get_records(None))
+    assert len(records) == 1
+    print(records[0])
+    assert records[0]["normalfield"] == "Value1"
+
+def test_get_records_formula_field_special_values(base_airtable_stream, mock_airtable_client):
+    mock_airtable_client.return_value.get_records.return_value = [
+        {"id": "rec1", "fields": {"FormulaField": {"error": "#ERROR!"}}},
+        {"id": "rec2", "fields": {"FormulaField": {"specialValue": "NaN"}}},
+        {"id": "rec3", "fields": {"FormulaField": {"specialValue": "Infinity"}}}
+    ]
+    records = list(base_airtable_stream.get_records(None))
+    assert len(records) == 3
+    assert records[0]["formulafield"] == "#ERROR!"
+    assert records[1]["formulafield"] == str(float('nan'))
+    assert records[2]["formulafield"] == str(float('inf'))  
+
+def test_handle_special_values(base_airtable_stream):
+    assert base_airtable_stream._handle_special_values({"error": "#ERROR!"}) == "#ERROR!"
+    assert base_airtable_stream._handle_special_values({"specialValue": "NaN"}) == str(float('nan'))
+    assert base_airtable_stream._handle_special_values({"specialValue": "Infinity"}) == str(float('inf'))
+    assert base_airtable_stream._handle_special_values("NormalValue") == "NormalValue"