forked from amundsen-io/amundsen
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implements Table Detail and Popular Tables APIs (#15)
- Loading branch information
Showing
3 changed files
with
353 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,57 @@ | ||
[TBD] | ||
Create a new atlas client instance. (update the host and credentials information) | ||
```python | ||
from atlasclient.client import Atlas | ||
client = Atlas(host='localhost', port=21000, username='admin', password='admin') | ||
``` | ||
|
||
### Create a Super Type Entity | ||
Since Atlas stores most of the metadata about tables, databases, columns etc., | ||
we need to have a super Entity Type, that can be used to filter out the Tables only. | ||
|
||
[Atlas Proxy](https://github.com/lyft/amundsenmetadatalibrary/blob/master/metadata_service/proxy/atlas_proxy.py) uses | ||
`Table` as super entity type. | ||
```python | ||
TABLE_ENTITY = 'Table' | ||
``` | ||
|
||
Create a new type, defined above via `TABLE_ENTITY` using the script below. | ||
```python | ||
typedef_dict = { | ||
"entityDefs": [ | ||
{ | ||
"name": TABLE_ENTITY, | ||
"superTypes": ["DataSet"], | ||
} | ||
] | ||
} | ||
|
||
client.typedefs.create(data=typedef_dict) | ||
``` | ||
|
||
### Add required fields | ||
We need to add some extra fields to atlas in order to get all the information needed for the amundsen frontend. | ||
Adding those extra attributes in the super type entity definition would be handy to keep them in once place. | ||
|
||
[TBD - How to add attributes definition] | ||
|
||
### Assign superType to entity definitions | ||
Assign newly created TABLE_ENTITY entity as super type to the entity definitions you want to behave like tables. | ||
in the code snippet below, `'hive_table' and 'rdbms_table'` would be affected. | ||
```python | ||
# Below are the entity which would behave like table entities for Amundsen Atlas Proxy | ||
atlas_tables = ['hive_table', 'rdbms_table'] | ||
entities_to_update = [] | ||
for t in client.typedefs: | ||
for e in t.entityDefs: | ||
if e.name in atlas_tables: | ||
superTypes = e.superTypes # Get a property first to inflate the relational objects | ||
ent_dict = e._data | ||
ent_dict["superTypes"] = superTypes | ||
ent_dict["superTypes"].append(TABLE_ENTITY) | ||
entities_to_update.append(ent_dict) | ||
|
||
typedef_dict = { | ||
"entityDefs": entities_to_update | ||
} | ||
client.typedefs.update(data=typedef_dict) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
import unittest | ||
|
||
from atlasclient.exceptions import BadRequest | ||
from mock import patch, MagicMock | ||
|
||
from metadata_service.entity.popular_table import PopularTable | ||
from metadata_service.entity.table_detail import (Table) | ||
from metadata_service.exception import NotFoundException | ||
from metadata_service.proxy.atlas_proxy import AtlasProxy | ||
|
||
|
||
class TestAtlasProxy(unittest.TestCase): | ||
|
||
def setUp(self): | ||
with patch('metadata_service.proxy.atlas_proxy.Atlas'): | ||
self.proxy = AtlasProxy(host='DOES_NOT_MATTER', port=0000) | ||
self.proxy._driver = MagicMock() | ||
|
||
self.db = 'TEST_DB' | ||
self.cluster = 'TEST_CLUSTER' | ||
self.schema = 'TEST_SCHEMA' | ||
self.name = 'TEST_TABLE' | ||
self.table_uri = f'{self.db}://{self.cluster}.{self.schema}/{self.name}' | ||
|
||
entity1_relationships = { | ||
'relationshipAttributes': { | ||
'columns': [] | ||
} | ||
} | ||
self.entity1 = { | ||
'guid': '1', | ||
'updateTime': 123, | ||
'attributes': { | ||
'qualifiedName': 'Table1_Qualified', | ||
'schema': self.schema, | ||
'name': 'Table1', | ||
'db': { | ||
'guid': '-100', | ||
'qualifiedName': self.db | ||
} | ||
} | ||
} | ||
self.entity1.update(entity1_relationships) | ||
self.entity1['attributes'].update(entity1_relationships) | ||
|
||
entity2_relationships = { | ||
'relationshipAttributes': { | ||
'columns': [] | ||
} | ||
} | ||
self.entity2 = { | ||
'guid': '2', | ||
'updateTime': 234, | ||
'attributes': { | ||
'qualifiedName': 'Table2_Qualified', | ||
'schema': self.schema, | ||
'name': 'Table1', | ||
'db': { | ||
'guid': '-100', | ||
'qualifiedName': self.db | ||
} | ||
} | ||
} | ||
self.entity2.update(entity2_relationships) | ||
self.entity2['attributes'].update(entity2_relationships) | ||
|
||
self.entities = { | ||
'entities': [ | ||
self.entity1, | ||
self.entity2, | ||
] | ||
} | ||
|
||
def test_extract_table_uri_info(self): | ||
table_info = self.proxy._extract_info_from_uri(table_uri=self.table_uri) | ||
self.assertDictEqual(table_info, { | ||
'db': self.db, | ||
'cluster': self.cluster, | ||
'schema': self.schema, | ||
'name': self.name | ||
}) | ||
|
||
def test_get_ids_from_basic_search(self): | ||
basic_search_response = MagicMock() | ||
basic_search_response._data = self.entities | ||
|
||
self.proxy._driver.search_basic = MagicMock(return_value=[basic_search_response]) | ||
response = self.proxy._get_ids_from_basic_search(params={}) | ||
expected = ['1', '2'] | ||
self.assertListEqual(response, expected) | ||
|
||
def test_get_table(self): | ||
unique_attr_response = MagicMock() | ||
unique_attr_response.entity = self.entity1 | ||
|
||
self.proxy._driver.entity_unique_attribute = MagicMock(return_value=unique_attr_response) | ||
response = self.proxy.get_table(table_uri=self.table_uri) | ||
|
||
expected = Table(database=self.db, | ||
cluster=self.cluster, | ||
schema=self.schema, | ||
name=self.name, | ||
columns=self.entity1['relationshipAttributes']['columns'], | ||
last_updated_timestamp=self.entity1['updateTime']) | ||
self.assertEqual(str(expected), str(response)) | ||
|
||
def test_get_table_not_found(self): | ||
with self.assertRaises(NotFoundException): | ||
self.proxy._driver.entity_unique_attribute = MagicMock(side_effect=Exception('Boom!')) | ||
self.proxy.get_table(table_uri=self.table_uri) | ||
|
||
def test_get_table_missing_info(self): | ||
with self.assertRaises(BadRequest): | ||
local_entity = self.entity1 | ||
local_entity.pop('relationshipAttributes') | ||
unique_attr_response = MagicMock() | ||
unique_attr_response.entity = local_entity | ||
|
||
self.proxy._driver.entity_unique_attribute = MagicMock(return_value=unique_attr_response) | ||
self.proxy.get_table(table_uri=self.table_uri) | ||
|
||
@patch.object(AtlasProxy, '_get_ids_from_basic_search') | ||
def test_get_popular_tables(self, mock_basic_search): | ||
entity1 = MagicMock() | ||
entity1.attributes = self.entity1['attributes'] | ||
|
||
entity2 = MagicMock() | ||
entity2.attributes = self.entity2['attributes'] | ||
|
||
bulk_ent_collection = MagicMock() | ||
bulk_ent_collection.entities = [entity1, entity2] | ||
|
||
self.proxy._driver.entity_bulk = MagicMock(return_value=[bulk_ent_collection]) | ||
|
||
db_entity = MagicMock() | ||
db_entity.entity = {'attributes': { | ||
'qualifiedName': self.db, | ||
'cluster': self.cluster | ||
}} | ||
|
||
self.proxy._driver.entity_guid = MagicMock(return_value=db_entity) | ||
|
||
response = self.proxy.get_popular_tables(num_entries=2) | ||
|
||
expected = [ | ||
PopularTable(database=self.db, cluster=self.cluster, schema=self.schema, | ||
name=self.entity1['attributes']['qualifiedName']), | ||
PopularTable(database=self.db, cluster=self.cluster, schema=self.schema, | ||
name=self.entity2['attributes']['qualifiedName']), | ||
] | ||
|
||
self.assertEqual(response.__repr__(), expected.__repr__()) | ||
|
||
@patch.object(AtlasProxy, '_get_ids_from_basic_search') | ||
def test_get_popular_tables_without_db(self, mock_basic_search): | ||
attrs_ent1 = self.entity1['attributes'] | ||
attrs_ent1.pop('db') | ||
entity1 = MagicMock() | ||
entity1.attributes = attrs_ent1 | ||
|
||
attrs_ent2 = self.entity2['attributes'] | ||
attrs_ent2.pop('db') | ||
entity2 = MagicMock() | ||
entity2.attributes = attrs_ent2 | ||
|
||
bulk_ent_collection = MagicMock() | ||
bulk_ent_collection.entities = [entity1, entity2] | ||
|
||
self.proxy._driver.entity_bulk = MagicMock(return_value=[bulk_ent_collection]) | ||
response = self.proxy.get_popular_tables(num_entries=2) | ||
|
||
expected = [ | ||
PopularTable(database='', cluster='', schema=self.schema, name=self.entity1['attributes']['qualifiedName']), | ||
PopularTable(database='', cluster='', schema=self.schema, name=self.entity2['attributes']['qualifiedName']), | ||
] | ||
|
||
self.assertEqual(response.__repr__(), expected.__repr__()) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |