Skip to content

Commit

Permalink
Merge pull request #21 from mara/Handle-duplicate-ids
Browse files Browse the repository at this point in the history
Merge release 4.0.0 changes for including the Ad-group-id
  • Loading branch information
gathineou authored Jul 17, 2019
2 parents f771167 + 207d8cd commit 4cdcce9
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 65 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
*.egg-info/
__pycache__
.idea
.vscode
.venv/
build
dist
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Changelog

## 4.0.0 (2019-07-05)

- Compatible with specifications: a unique identifier is an Ad ID + Ad Group ID.
- Add option to ignore downloading of data related to removed campaigns

**required changes**

- The file format changed to `v5`. Adapt etl scripts that process the output data.
- Ad ID no longer unique in any files
- Ad performance datasets now include Ad Group Id

## 3.0.0 (2019-04-13)

- Change MARA_XXX variables to functions to delay importing of imports
Expand Down
7 changes: 6 additions & 1 deletion google_ads_downloader/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def redownload_window() -> str:

def output_file_version() -> str:
"""A suffix that is added to output files, denoting a version of the data format"""
return 'v4'
return 'v5'


def max_retries() -> int:
Expand All @@ -62,3 +62,8 @@ def max_retries() -> int:
def retry_backoff_factor() -> int:
"""How many seconds to wait between retries (is multiplied with retry count)"""
return 5


def ignore_removed_campaigns() -> bool:
"""Whether to ignore campaigns with status 'REMOVED'"""
return False
127 changes: 64 additions & 63 deletions google_ads_downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import re
import shutil
import sys
import io
import tempfile
import json
import time
Expand Down Expand Up @@ -84,8 +85,8 @@ def _fetch_client_customers(self):

def download_data():
"""Creates an AdWordsApiClient and downloads the data"""
logger = logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

logging.info('Adwords API version: '+str(config.api_version()))

Expand All @@ -100,22 +101,32 @@ def download_data_sets(api_client: AdWordsApiClient):
api_client: AdWordsApiClient
"""

predicates = [{'field': 'Status',
'operator': 'IN',
'values': ['ENABLED',
'PAUSED',
'DISABLED']
}, {
'field': 'Impressions',
'operator': 'GREATER_THAN',
'values': [0]
}]

if config.ignore_removed_campaigns():
predicates.append({
'field': 'CampaignStatus',
'operator': 'NOT_EQUALS',
'values': 'REMOVED'
})

download_performance(api_client,
PerformanceReportType.AD_PERFORMANCE_REPORT,
fields=['Date', 'Id', 'Device', 'AdNetworkType2',
fields=['Date', 'Id', 'AdGroupId', 'Device', 'AdNetworkType2',
'ActiveViewImpressions', 'AveragePosition',
'Clicks', 'Conversions', 'ConversionValue',
'Cost', 'Impressions'],
predicates=[{'field': 'Status',
'operator': 'IN',
'values': ['ENABLED',
'PAUSED',
'DISABLED']
}, {
'field': 'Impressions',
'operator': 'GREATER_THAN',
'values': [0]
}]
predicates=predicates
)

download_account_structure(api_client)
Expand All @@ -132,7 +143,6 @@ def download_performance(api_client: AdWordsApiClient,
performance_report_type: A PerformanceReportType object
fields: A list of fields to be included in the report
predicates: A list of filters for the report
redownload_window: The number of days the performance is redownloaded
"""
client_customer_ids = api_client.client_customers.keys()

Expand Down Expand Up @@ -194,7 +204,7 @@ def get_performance_for_single_day(api_client: AdWordsApiClient,
fields=fields,
predicates=predicates,
)
report_list.extend(_convert_report_to_list(report))
report_list.extend(list(report))
return report_list


Expand Down Expand Up @@ -224,7 +234,8 @@ def download_account_structure(api_client: AdWordsApiClient):
ad_group_attributes = get_ad_group_attributes(api_client, client_customer_id)
ad_data = get_ad_data(api_client, client_customer_id)

for ad_id, ad_data_dict in ad_data.items():
for ad_data_dict in ad_data:
ad_id = ad_data_dict['Ad ID']
campaign_id = ad_data_dict['Campaign ID']
ad_group_id = ad_data_dict['Ad group ID']
currency_code = client_customer['Currency Code']
Expand Down Expand Up @@ -272,10 +283,7 @@ def get_campaign_attributes(api_client: AdWordsApiClient, client_customer_id: in
'PAUSED',
'REMOVED']
})
report_list = _convert_report_to_list(report)

return {row['Campaign ID']: parse_labels(row['Labels']) for row in
report_list}
return {row['Campaign ID']: parse_labels(row['Labels']) for row in report}


def get_ad_group_attributes(api_client: AdWordsApiClient, client_customer_id: int) -> {}:
Expand All @@ -300,13 +308,11 @@ def get_ad_group_attributes(api_client: AdWordsApiClient, client_customer_id: in
'PAUSED',
'REMOVED']
})
report_list = _convert_report_to_list(report)

return {row['Ad group ID']: parse_labels(row['Labels']) for row in
report_list}
return {row['Ad group ID']: parse_labels(row['Labels']) for row in report}


def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> {}:
def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> [{}]:
"""Downloads the ad data from the Google AdWords API for a given client_customer_id
https://developers.google.com/adwords/api/docs/appendix/reports/ad-performance-report
Expand All @@ -318,30 +324,42 @@ def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> {}:
A dictionary of the form {ad_id: {key: value}}
"""
logging.info('get ad data for account {}'.format(client_customer_id))
ad_data = {}

api_client.SetClientCustomerId(client_customer_id)

predicates = [
{
'field': 'Status',
'operator': 'IN',
'values': ['ENABLED',
'PAUSED',
'DISABLED']
}
]

if config.ignore_removed_campaigns():
predicates.append({
'field': 'CampaignStatus',
'operator': 'NOT_EQUALS',
'values': 'REMOVED'
})

report = _download_adwords_report(api_client,
report_type='AD_PERFORMANCE_REPORT',
fields=['Id', 'AdGroupId', 'AdGroupName',
'CampaignId', 'CampaignName',
'Labels', 'Headline', 'AdType',
'Status'],
predicates={'field': 'Status',
'operator': 'IN',
'values': ['ENABLED',
'PAUSED',
'DISABLED']
})
report_list = _convert_report_to_list(report)
predicates=predicates)

for row in report_list:
ad_data = []
for row in report:
attributes = parse_labels(row['Labels'])
if row['Ad type'] is not None:
attributes = {**attributes, 'Ad type': row['Ad type']}
if row['Ad state'] is not None:
attributes = {**attributes, 'Ad state': row['Ad state']}
ad_data[row['Ad ID']] = {**row, 'attributes': attributes}
ad_data.append({**row, 'attributes': attributes})

return ad_data

Expand All @@ -350,7 +368,7 @@ def _download_adwords_report(api_client: AdWordsApiClient,
report_type: str,
fields: [str],
predicates: {},
current_date: datetime = None) -> []:
current_date: datetime = None) -> csv.DictReader:
"""Downloads an Google Ads report from the Google Ads API
Args:
Expand All @@ -371,7 +389,7 @@ def _download_adwords_report(api_client: AdWordsApiClient,
'reportName': '{}_#'.format(report_type),
'dateRangeType': 'CUSTOM_DATE',
'reportType': report_type,
'downloadFormat': 'TSV',
'downloadFormat': 'CSV',
'selector': {
'fields': fields,
'predicates': predicates
Expand All @@ -393,11 +411,14 @@ def _download_adwords_report(api_client: AdWordsApiClient,
while True:
retry_count += 1
try:
report = report_downloader.DownloadReportAsString(report_filter,
skip_report_header=False,
skip_column_header=False,
skip_report_summary=False)
return report
report = io.StringIO()
report_downloader.DownloadReport(report_filter,
output=report,
skip_report_header=True,
skip_column_header=False,
skip_report_summary=True)
report.seek(0)
return csv.DictReader(report)
except errors.AdWordsReportError as e:
if retry_count < config.max_retries():

Expand Down Expand Up @@ -435,7 +456,7 @@ def __init__(self, client_type=None, client_id=None, client_secret=None,
self.auth_uri = auth_uri
self.token_uri = token_uri

def Build(self):
def build(self):
"""Builds a client config dictionary used in the OAuth 2.0 flow."""
if all((self.client_type, self.client_id, self.client_secret,
self.auth_uri, self.token_uri)):
Expand All @@ -459,7 +480,7 @@ def refresh_oauth_token():
client_config = ClientConfigBuilder(
client_type=ClientConfigBuilder.CLIENT_TYPE_WEB, client_id=config.oauth2_client_id(),
client_secret=config.oauth2_client_secret())
flow = InstalledAppFlow.from_client_config(client_config.Build(),
flow = InstalledAppFlow.from_client_config(client_config.build(),
scopes=['https://www.googleapis.com/auth/adwords'])
flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'
authorize_url, _ = flow.authorization_url(prompt='consent')
Expand Down Expand Up @@ -493,26 +514,6 @@ def parse_labels(labels: str) -> {str: str}:
return labels


def _convert_report_to_list(report: str) -> [{}]:
"""Converts a Google AdWords report to a list of dictionaries
Args:
report: A Google AdWords report as a string
Returns:
A list containing dictionaries with the data from the report
"""
# Discard the first line as it only contains meta information.
# The last two lines only display summaries
rows = list(csv.reader(report.split('\n')[1:-2], dialect='excel-tab'))

# The second line holds the column names
keys = rows[0]

return [dict(zip(keys, row)) for row in rows[1:]]


def ensure_data_directory(relative_path: Path = None) -> Path:
"""Checks if a directory in the data dir path exists. Creates it if necessary
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='google-ads-performance-downloader',
version='3.0.0',
version='4.0.0',
description="Downloads data from the Google Adwords Api to local files",

install_requires=[
Expand Down

0 comments on commit 4cdcce9

Please sign in to comment.