diff --git a/.gitignore b/.gitignore index e68a888..94b0ec9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ *.egg-info/ __pycache__ .idea +.vscode .venv/ +build +dist diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c39c68..dda3e89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## 4.0.0 (2019-07-05) + +- Compatible with specifications: a unique identifier is an Ad ID + Ad Group ID. +- Add option to ignore downloading of data related to removed campaigns + +**required changes** + +- The file format changed to `v5`. Adapt etl scripts that process the output data. + - Ad ID no longer unique in any files + - Ad performance datasets now include Ad Group Id + ## 3.0.0 (2019-04-13) - Change MARA_XXX variables to functions to delay importing of imports diff --git a/google_ads_downloader/config.py b/google_ads_downloader/config.py index 7d5a72c..cdc8c73 100644 --- a/google_ads_downloader/config.py +++ b/google_ads_downloader/config.py @@ -51,7 +51,7 @@ def redownload_window() -> str: def output_file_version() -> str: """A suffix that is added to output files, denoting a version of the data format""" - return 'v4' + return 'v5' def max_retries() -> int: @@ -62,3 +62,8 @@ def max_retries() -> int: def retry_backoff_factor() -> int: """How many seconds to wait between retries (is multiplied with retry count)""" return 5 + + +def ignore_removed_campaigns() -> bool: + """Whether to ignore campaigns with status 'REMOVED'""" + return False diff --git a/google_ads_downloader/downloader.py b/google_ads_downloader/downloader.py index ff3953c..daa0c82 100644 --- a/google_ads_downloader/downloader.py +++ b/google_ads_downloader/downloader.py @@ -7,6 +7,7 @@ import re import shutil import sys +import io import tempfile import json import time @@ -84,8 +85,8 @@ def _fetch_client_customers(self): def download_data(): """Creates an AdWordsApiClient and downloads the data""" - logger = logging.basicConfig(level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logging.info('Adwords API version: '+str(config.api_version())) @@ -100,22 +101,32 @@ def download_data_sets(api_client: AdWordsApiClient): api_client: AdWordsApiClient """ + + predicates = [{'field': 'Status', + 'operator': 'IN', + 'values': ['ENABLED', + 'PAUSED', + 'DISABLED'] + }, { + 'field': 'Impressions', + 'operator': 'GREATER_THAN', + 'values': [0] + }] + + if config.ignore_removed_campaigns(): + predicates.append({ + 'field': 'CampaignStatus', + 'operator': 'NOT_EQUALS', + 'values': 'REMOVED' + }) + download_performance(api_client, PerformanceReportType.AD_PERFORMANCE_REPORT, - fields=['Date', 'Id', 'Device', 'AdNetworkType2', + fields=['Date', 'Id', 'AdGroupId', 'Device', 'AdNetworkType2', 'ActiveViewImpressions', 'AveragePosition', 'Clicks', 'Conversions', 'ConversionValue', 'Cost', 'Impressions'], - predicates=[{'field': 'Status', - 'operator': 'IN', - 'values': ['ENABLED', - 'PAUSED', - 'DISABLED'] - }, { - 'field': 'Impressions', - 'operator': 'GREATER_THAN', - 'values': [0] - }] + predicates=predicates ) download_account_structure(api_client) @@ -132,7 +143,6 @@ def download_performance(api_client: AdWordsApiClient, performance_report_type: A PerformanceReportType object fields: A list of fields to be included in the report predicates: A list of filters for the report - redownload_window: The number of days the performance is redownloaded """ client_customer_ids = api_client.client_customers.keys() @@ -194,7 +204,7 @@ def get_performance_for_single_day(api_client: AdWordsApiClient, fields=fields, predicates=predicates, ) - report_list.extend(_convert_report_to_list(report)) + report_list.extend(list(report)) return report_list @@ -224,7 +234,8 @@ def download_account_structure(api_client: AdWordsApiClient): ad_group_attributes = get_ad_group_attributes(api_client, client_customer_id) ad_data = get_ad_data(api_client, client_customer_id) - for ad_id, ad_data_dict in ad_data.items(): + for ad_data_dict in ad_data: + ad_id = ad_data_dict['Ad ID'] campaign_id = ad_data_dict['Campaign ID'] ad_group_id = ad_data_dict['Ad group ID'] currency_code = client_customer['Currency Code'] @@ -272,10 +283,7 @@ def get_campaign_attributes(api_client: AdWordsApiClient, client_customer_id: in 'PAUSED', 'REMOVED'] }) - report_list = _convert_report_to_list(report) - - return {row['Campaign ID']: parse_labels(row['Labels']) for row in - report_list} + return {row['Campaign ID']: parse_labels(row['Labels']) for row in report} def get_ad_group_attributes(api_client: AdWordsApiClient, client_customer_id: int) -> {}: @@ -300,13 +308,11 @@ def get_ad_group_attributes(api_client: AdWordsApiClient, client_customer_id: in 'PAUSED', 'REMOVED'] }) - report_list = _convert_report_to_list(report) - return {row['Ad group ID']: parse_labels(row['Labels']) for row in - report_list} + return {row['Ad group ID']: parse_labels(row['Labels']) for row in report} -def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> {}: +def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> [{}]: """Downloads the ad data from the Google AdWords API for a given client_customer_id https://developers.google.com/adwords/api/docs/appendix/reports/ad-performance-report @@ -318,30 +324,42 @@ def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> {}: A dictionary of the form {ad_id: {key: value}} """ logging.info('get ad data for account {}'.format(client_customer_id)) - ad_data = {} api_client.SetClientCustomerId(client_customer_id) + + predicates = [ + { + 'field': 'Status', + 'operator': 'IN', + 'values': ['ENABLED', + 'PAUSED', + 'DISABLED'] + } + ] + + if config.ignore_removed_campaigns(): + predicates.append({ + 'field': 'CampaignStatus', + 'operator': 'NOT_EQUALS', + 'values': 'REMOVED' + }) + report = _download_adwords_report(api_client, report_type='AD_PERFORMANCE_REPORT', fields=['Id', 'AdGroupId', 'AdGroupName', 'CampaignId', 'CampaignName', 'Labels', 'Headline', 'AdType', 'Status'], - predicates={'field': 'Status', - 'operator': 'IN', - 'values': ['ENABLED', - 'PAUSED', - 'DISABLED'] - }) - report_list = _convert_report_to_list(report) + predicates=predicates) - for row in report_list: + ad_data = [] + for row in report: attributes = parse_labels(row['Labels']) if row['Ad type'] is not None: attributes = {**attributes, 'Ad type': row['Ad type']} if row['Ad state'] is not None: attributes = {**attributes, 'Ad state': row['Ad state']} - ad_data[row['Ad ID']] = {**row, 'attributes': attributes} + ad_data.append({**row, 'attributes': attributes}) return ad_data @@ -350,7 +368,7 @@ def _download_adwords_report(api_client: AdWordsApiClient, report_type: str, fields: [str], predicates: {}, - current_date: datetime = None) -> []: + current_date: datetime = None) -> csv.DictReader: """Downloads an Google Ads report from the Google Ads API Args: @@ -371,7 +389,7 @@ def _download_adwords_report(api_client: AdWordsApiClient, 'reportName': '{}_#'.format(report_type), 'dateRangeType': 'CUSTOM_DATE', 'reportType': report_type, - 'downloadFormat': 'TSV', + 'downloadFormat': 'CSV', 'selector': { 'fields': fields, 'predicates': predicates @@ -393,11 +411,14 @@ def _download_adwords_report(api_client: AdWordsApiClient, while True: retry_count += 1 try: - report = report_downloader.DownloadReportAsString(report_filter, - skip_report_header=False, - skip_column_header=False, - skip_report_summary=False) - return report + report = io.StringIO() + report_downloader.DownloadReport(report_filter, + output=report, + skip_report_header=True, + skip_column_header=False, + skip_report_summary=True) + report.seek(0) + return csv.DictReader(report) except errors.AdWordsReportError as e: if retry_count < config.max_retries(): @@ -435,7 +456,7 @@ def __init__(self, client_type=None, client_id=None, client_secret=None, self.auth_uri = auth_uri self.token_uri = token_uri - def Build(self): + def build(self): """Builds a client config dictionary used in the OAuth 2.0 flow.""" if all((self.client_type, self.client_id, self.client_secret, self.auth_uri, self.token_uri)): @@ -459,7 +480,7 @@ def refresh_oauth_token(): client_config = ClientConfigBuilder( client_type=ClientConfigBuilder.CLIENT_TYPE_WEB, client_id=config.oauth2_client_id(), client_secret=config.oauth2_client_secret()) - flow = InstalledAppFlow.from_client_config(client_config.Build(), + flow = InstalledAppFlow.from_client_config(client_config.build(), scopes=['https://www.googleapis.com/auth/adwords']) flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob' authorize_url, _ = flow.authorization_url(prompt='consent') @@ -493,26 +514,6 @@ def parse_labels(labels: str) -> {str: str}: return labels -def _convert_report_to_list(report: str) -> [{}]: - """Converts a Google AdWords report to a list of dictionaries - - Args: - report: A Google AdWords report as a string - - Returns: - A list containing dictionaries with the data from the report - - """ - # Discard the first line as it only contains meta information. - # The last two lines only display summaries - rows = list(csv.reader(report.split('\n')[1:-2], dialect='excel-tab')) - - # The second line holds the column names - keys = rows[0] - - return [dict(zip(keys, row)) for row in rows[1:]] - - def ensure_data_directory(relative_path: Path = None) -> Path: """Checks if a directory in the data dir path exists. Creates it if necessary diff --git a/setup.py b/setup.py index 7bfb4ef..768f03d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='google-ads-performance-downloader', - version='3.0.0', + version='4.0.0', description="Downloads data from the Google Adwords Api to local files", install_requires=[