-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(ingest/qlik): Qlik cloud connector integration #9682
feat(ingest/qlik): Qlik cloud connector integration #9682
Conversation
metadata-ingestion/src/datahub/ingestion/source/qlik_cloud/qlik_api.py
Outdated
Show resolved
Hide resolved
spaces.append( | ||
Space( | ||
id=space[Constant.ID], | ||
name=space[Constant.NAME], |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also - are all of these fields guaranteed to be in the API response?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
metadata-ingestion/setup.py
Outdated
@@ -626,6 +627,7 @@ | |||
"gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource", | |||
"sql-queries = datahub.ingestion.source.sql_queries:SqlQueriesSource", | |||
"fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource", | |||
"qlik-cloud = datahub.ingestion.source.qlik_cloud.qlik_cloud:QlikCloudSource", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does it make sense to call this qlik
, or is there a reason you called it qlik-cloud
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lets make it qlik-sense as they also have product qlik-view.
metadata-ingestion/src/datahub/ingestion/source/qlik_cloud/config.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_cloud/config.py
Outdated
Show resolved
Hide resolved
…into Qlik-Connector-Integration
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
haven't finished going through the code yet
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/data_classes.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/data_classes.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/tests/integration/qlik_sense/golden_test_platform_instace_ingest.json
Outdated
Show resolved
Hide resolved
metadata-ingestion/docs/sources/qlik-sense/qlik-sense_recipe.yml
Outdated
Show resolved
Hide resolved
yield MetadataChangeProposalWrapper( | ||
entityUrn=chart_urn, | ||
aspect=ChartInfoClass( | ||
title=chart.qId, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see any other metadata which we can use to get proper title of chart
Below is the eg of metadata resoponse of chart which we are getting:
{
"qInfo": {"qId": "pKtjchL", "qType": "barchart"},
"qMeta": {"privileges": ["read"]},
"qHyperCube": {
"qSize": {"qcx": 3, "qcy": 9},
"qDimensionInfo": [
{
"qFallbackTitle": "_FIVETRAN_SYNCED.Week",
"qApprMaxGlyphCount": 3,
"qCardinal": 9,
"qSortIndicator": "A",
"qGroupFallbackTitles": ["_FIVETRAN_SYNCED.Week"],
"qGroupPos": 0,
"qStateCounts": {
"qLocked": 0,
"qSelected": 0,
"qOption": 9,
"qDeselected": 0,
"qAlternative": 0,
"qExcluded": 0,
"qSelectedExcluded": 0,
"qLockedExcluded": 0,
},
"qTags": ["$weeknumber", "$cyclic", "$numeric", "$integer"],
"qDimensionType": "N",
"qGrouping": "N",
"qNumFormat": {"qType": "U", "qnDec": 0, "qUseThou": 0},
"qIsAutoFormat": True,
"qGroupFieldDefs": ["_FIVETRAN_SYNCED.autoCalendar.Week"],
"qMin": 1,
"qMax": 53,
"qContinuousAxes": True,
"qIsCyclic": True,
"qDerivedField": True,
"qAttrExprInfo": [],
"qAttrDimInfo": [],
"qCardinalities": {
"qCardinal": 9,
"qHypercubeCardinal": 9,
"qAllValuesCardinal": -1,
},
"autoSort": True,
"cId": "PGzANP",
"othersLabel": "Others",
},
{
"qFallbackTitle": "_FIVETRAN_SYNCED.Week",
"qApprMaxGlyphCount": 3,
"qCardinal": 9,
"qSortIndicator": "A",
"qGroupFallbackTitles": ["_FIVETRAN_SYNCED.Week"],
"qGroupPos": 0,
"qStateCounts": {
"qLocked": 0,
"qSelected": 0,
"qOption": 9,
"qDeselected": 0,
"qAlternative": 0,
"qExcluded": 0,
"qSelectedExcluded": 0,
"qLockedExcluded": 0,
},
"qTags": ["$numeric", "$integer"],
"qDimensionType": "N",
"qGrouping": "N",
"qNumFormat": {"qType": "U", "qnDec": 0, "qUseThou": 0},
"qIsAutoFormat": True,
"qGroupFieldDefs": ["_FIVETRAN_SYNCED.autoCalendar.Week"],
"qMin": 1,
"qMax": 53,
"qDerivedField": True,
"qAttrExprInfo": [],
"qAttrDimInfo": [],
"qCardinalities": {
"qCardinal": 9,
"qHypercubeCardinal": 9,
"qAllValuesCardinal": -1,
},
"autoSort": True,
"cId": "mpZPuW",
"othersLabel": "Others",
},
],
"qMeasureInfo": [
{
"qFallbackTitle": "Sum([_FIVETRAN_SYNCED.autoCalendar.Date])",
"qApprMaxGlyphCount": 12,
"qCardinal": 0,
"qSortIndicator": "D",
"qNumFormat": {
"qType": "D",
"qnDec": 0,
"qUseThou": 0,
"qFmt": "M/D/YYYY",
},
"qMin": 45291,
"qMax": 1813801836,
"qIsAutoFormat": True,
"qAttrExprInfo": [],
"qAttrDimInfo": [],
"qTrendLines": [],
"autoSort": True,
"cId": "jSpLmS",
"numFormatFromTemplate": True,
}
],
"qEffectiveInterColumnSortOrder": [0, 1, 2],
"qPivotDataPages": [],
"qStackedDataPages": [],
"qMode": "S",
"qNoOfLeftDims": -1,
"qTreeNodesOnDim": [],
"qColumnOrder": [0, 1, 2],
"columnOrder": [],
},
"script": "",
"refLine": {"refLines": [], "dimRefLines": []},
"showTitles": True,
"title": "",
"subtitle": "",
"footnote": "",
"disableNavMenu": False,
"showDetails": True,
"showDetailsExpression": False,
"showDisclaimer": True,
"barGrouping": {"grouping": "grouped"},
"orientation": "vertical",
"scrollbar": "miniChart",
"scrollStartPos": 0,
"gridLine": {"auto": True, "spacing": 2},
"dataPoint": {
"showLabels": False,
"showSegmentLabels": False,
"showTotalLabels": True,
},
"color": {
"auto": True,
"mode": "primary",
"formatting": {"numFormatFromTemplate": True},
"useBaseColors": "off",
"paletteColor": {"index": 6},
"useDimColVal": True,
"useMeasureGradient": True,
"persistent": True,
"expressionIsColor": True,
"expressionLabel": "",
"measureScheme": "sg",
"reverseScheme": False,
"dimensionScheme": "12",
"autoMinMax": True,
"measureMin": 0,
"measureMax": 10,
},
"legend": {"show": True, "dock": "auto", "showTitle": True},
"dimensionAxis": {
"continuousAuto": True,
"show": "all",
"label": "auto",
"dock": "near",
"axisDisplayMode": "auto",
"maxVisibleItems": 10,
},
"preferContinuousAxis": True,
"measureAxis": {
"show": "all",
"dock": "near",
"spacing": 1,
"autoMinMax": True,
"minMax": "min",
"min": 0,
"max": 10,
},
"tooltip": {
"auto": True,
"hideBasic": False,
"chart": {"style": {"size": "medium"}},
"data": {},
},
"visualization": "barchart",
"version": "1.34.7",
"components": [],
"showMiniChartForContinuousAxis": True,
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the object has a title set in Qlik, we should figure out how to extract that. Otherwise, let's set the names to "Object 1", "Object 2" etc
Also, the dimensions and measures should get extracted into the InputFieldsClass
aspect on the chart
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/websocket_connection.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/config.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/data_classes.py
Outdated
Show resolved
Hide resolved
…amjagtap639/datahub into Qlik-Connector-Integration
@@ -74,6 +75,21 @@ def _get_dataset(self, dataset_id: str) -> Optional[QlikDataset]: | |||
) | |||
return None | |||
|
|||
def get_user_name(self, user_id: str) -> Optional[str]: | |||
try: | |||
if user_id in self.users: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not necessary, but we could use functools.lru_cache
here instead
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use of functools.lru_cache creating some issue in api request.
yield MetadataChangeProposalWrapper( | ||
entityUrn=chart_urn, | ||
aspect=ChartInfoClass( | ||
title=chart.qId, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the object has a title set in Qlik, we should figure out how to extract that. Otherwise, let's set the names to "Object 1", "Object 2" etc
Also, the dimensions and measures should get extracted into the InputFieldsClass
aspect on the chart
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- let's get chart InputFields extracted too
- fix Qlik dataset urn generation
- (pending) dataset <- chart lineage
api_key: "QLIK_API_KEY" | ||
|
||
# Optional - filter for certain space names instead of ingesting everything. | ||
# Mention 'personal_space' if entities of personal space need to ingest |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if there's hierarchy e.g. "My Space -> Nested Space -> App", does full path need to be specified?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This filter just works at space level. And we can't have nested spaces.
|
||
def _gen_qlik_dataset_urn(self, dataset_identifier: str) -> str: | ||
return builder.make_dataset_urn_with_platform_instance( | ||
name=dataset_identifier, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On my side I don't see any indentation error.
aspect=UpstreamLineage( | ||
upstreams=[ | ||
Upstream( | ||
dataset=upstream_dataset_urn, type=DatasetLineageType.COPY |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we also generate fineGrainedLineage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can consider as one to one mapping of tables and columns and can generare fineGrainedLineage.
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py
Show resolved
Hide resolved
custom_properties = { | ||
"Dimension": str(chart.qDimension), | ||
"Measure": str(chart.qMeasure), | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To generare above fields in charts we require parent urn (Dataset urn) of field which we are not able to generate here as of now.
def _gen_dashboard_info_workunit(self, sheet: Sheet) -> MetadataWorkUnit: | ||
def _gen_dashboard_info_workunit( | ||
self, sheet: Sheet, app_id: str | ||
) -> MetadataWorkUnit: | ||
dashboard_urn = self._gen_dashboard_urn(sheet.id) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we know if the sheet.id
is globally unique?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Public sheet id is unique among all objects present internally. Don't know if it is unique globally.
And private sheet id is not in UUID format. Eg: 'ULVLjqu'. Hence we can consider private sheet id as not unique globally.
https://community.qlik.com/t5/New-to-Qlik-Sense/Sheet-Id-from-QMC-vs-URL/td-p/1630337
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
Outdated
Show resolved
Hide resolved
metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py
Outdated
Show resolved
Hide resolved
for table_node_qri in response.json()[Constant.GRAPH][Constant.NODES]: | ||
table_node_qri = quote(table_node_qri, safe="") | ||
response = self.session.get( | ||
f"{self.rest_api_url}/lineage-graphs/nodes/{app_qri}/actions/expand?node={table_node_qri}&level=FIELD" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if the table node is derived from multiple snowflake datasets, does this handle it correctly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are using lineage graph api just to get the app used table QRI.
And the actual table details and its dataset/dataconnection it is derived from are been fetched from Websocket json api.
And I just see single data connection details for each tables. Then I guess we don't see situation in which one table derived from multiple snowflake dataset.
Below is response we get for tables used by single qlik app
{ "qLayout": { "qInfo": {"qId": "LoadModel", "qType": "LoadModel"}, "tables": [ { "dataconnectorName": "Google_BigQuery_harshal-playground-306419", "dataconnectorPrefix": "test_space:", "boxType": "blackbox", "databaseName": "", "ownerName": "", "tableName": "test_table", "tableAlias": "test_table", "loadProperties": { "filterInfo": {"filterClause": "", "filterType": 1} }, "key": "Google_BigQuery_harshal-playground-306419:::test_table", "fields": [ { "alias": "name", "name": "name", "selected": True, "checked": True, "id": "dsd.test_table.name", } ], "connectionInfo": { "name": "Google_BigQuery_harshal-playground-306419", "displayName": "Google_BigQuery_harshal-playground-306419", "id": "bb5be407-d3d3-4f19-858c-e71d593f09ae", "type": { "provider": "QvOdbcConnectorPackage.exe", "type": "custom", "name": "QvOdbcConnectorPackage", "displayName": "Qlik® ODBC Connector Package", "isStandardConnector": False, "isIframeCompatible": True, "needsConnect": True, "connectDialog": "/customdata/64/QvOdbcConnectorPackage/web/standalone/connect-dialog.html?locale=en-US", "selectDialog": "/customdata/64/QvOdbcConnectorPackage/web/standalone/select-dialog.html?locale=en-US", "selectAddData": "/customdata/64/QvOdbcConnectorPackage/web/standalone/select-adddata.html?locale=en-US", "credentialsDialog": "/customdata/64/QvOdbcConnectorPackage/web/standalone/credentials-dialog.html?locale=en-US", "update": "/customdata/64/QvOdbcConnectorPackage/web/standalone/loadModelUpdate.js", "architecture": {"text": "Common.undefinedbit"}, "connectorMain": "QvOdbcConnectorPackage.webroot/connector-main-iframe", }, "typeName": "QvOdbcConnectorPackage.exe", "privileges": [ "change_owner", "change_space", "delete", "list", "read", "update", ], "sourceConnectorID": "gbq", "dataconnectorPrefix": "test_space:", "isInAppSpace": True, "space": "659d0e41d1b0ecce6eebc9b1", "connectionString": 'CUSTOM CONNECT TO "provider=QvOdbcConnectorPackage.exe;driver=gbq;OAuthMechanism=0;SupportOldClient=true;Catalog_Old=harshal-playground-306419;separateCredentials=false;Catalog=harshal-playground-306419;Min_TLS=1.2;SQLDialect=1;RowsFetchedPerBlock=16384;DefaultStringColumnLength=65535;AllowLargeResults=false;EnableHTAPI=false;HTAPI_MinResultsSize=1000;HTAPI_MinActivationRatio=3;allowNonSelectQueries=false;QueryTimeout=30;Timeout=300;useBulkReader=true;bulkFetchSize=50;rowBatchSize=1;bulkFetchColumnMode=true;maxStringLength=4096;logSQLStatements=false;"', "hasEditableSeparatedCredentials": False, "canDelete": True, "connectorImagePath": "https://iq37k6byr9lgam8.us.qlikcloud.com/customdata/64/QvOdbcConnectorPackage/web/gbq-square.png", "connectorDisplayName": "Google BigQuery", "dbInfo": {}, }, "id": "dsd.test_table", "tableGroupId": "", "connectorProperties": { "tableQualifiers": [ "harshal-playground-306419", "test_dataset", ], }, "selectStatement": "SELECT name\nFROM
harshal-playground-306419.
test_dataset.
test_table;", "caching": {"enabled": True, "type": "qvd"}, }, { "dataconnectorName": "DataFiles", "dataconnectorPrefix": "test_space:", "boxType": "load-file", "databaseName": "IPL_Matches_2022.csv", "ownerName": "TYPE9_CSV", "tableName": "IPL_Matches_2022", "tableAlias": "IPL_Matches_2022", "key": "DataFiles:IPL_Matches_2022.csv:TYPE9_CSV:IPL_Matches_2022", "fields": [ { "id": "dsd.IPL_Matches_2022.City", "name": "City", "alias": "City", "selected": True, }, { "id": "dsd.IPL_Matches_2022.Date", "name": "Date", "alias": "Date", "selected": True, }, ], "connectionInfo": { "type": {"isStorageProvider": False}, "id": "87d7bc7e-77d8-40dc-a251-3a35ec107b4e", "name": "DataFiles", "typeName": "qix-datafiles.exe", "sourceConnectorID": "qix-datafiles.exe", "connectionString": 'CUSTOM CONNECT TO "provider=qix-datafiles.exe;path=test_space:datafiles;"', "space": "659d0e41d1b0ecce6eebc9b1", "dataconnectorPrefix": "test_space:", "caching": {"enabled": True, "type": "qvd"}, }, "id": "dsd.IPL_Matches_2022", "loadSelectStatement": "LOAD [ID] AS [ID],\n\t[City] AS [City],\n\t[Date] AS [Date],\n\t[Season] AS [Season],\n\t[MatchNumber] AS [MatchNumber],\n\t[Team1] AS [Team1],\n\t[Team2] AS [Team2],\n\t[Venue] AS [Venue],\n\t[TossWinner] AS [TossWinner],\n\t[TossDecision] AS [TossDecision],\n\t[SuperOver] AS [SuperOver],\n\t[WinningTeam] AS [WinningTeam],\n\t[WonBy] AS [WonBy],\n\t[Margin] AS [Margin],\n\t[method] AS [method],\n\t[Player_of_Match] AS [Player_of_Match],\n\t[Team1Players] AS [Team1Players],\n\t[Team2Players] AS [Team2Players],\n\t[Umpire1] AS [Umpire1],\n\t[Umpire2] AS [Umpire2]\nFROM [lib://DataFiles/IPL_Matches_2022.csv]\n(txt, codepage is 28591, embedded labels, delimiter is ',', msq);\n", "formatSpec": "(txt, codepage is 28591, embedded labels, delimiter is ',', msq)", "caching": {"enabled": True, "type": "qvd"}, }, ], "schemaVersion": 2.1, } }
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mostly looks good
For the table -> chart lineage, it sounds like we can't get in depth information about the tables/columns used yet. As such, can we simply add all tables within the app as upstreams of the chart? It will be a superset of the correct lineage, but it's better than having no lineage.
Done |
…into Qlik-Connector-Integration
…into Qlik-Connector-Integration
Checklist