-
Notifications
You must be signed in to change notification settings - Fork 0
/
01_google_search_console.py
127 lines (105 loc) · 4.47 KB
/
01_google_search_console.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import pickle
import pandas as pd
import time
import os
from datetime import datetime, timedelta
from google_auth_oauthlib.flow import InstalledAppFlow
from apiclient.discovery import build
from dotenv import load_dotenv
load_dotenv()
print('primo')
timestr = time.strftime("%Y%m%d-%H%M%S")
SITE_URL = os.environ.get("GSC_SITE_URL")
print(SITE_URL)
print(timestr)
# There are only two OAuth Scopes for the Google Search Console API
# For the most part, all you will need is `.readonly` but if you want to modify data in Google Search Console,
# you will need the second scope listed below
# Read more: https://developers.google.com/webmaster-tools/search-console-api-original/v3/
OAUTH_SCOPE = ('https://www.googleapis.com/auth/webmasters.readonly', 'https://www.googleapis.com/auth/webmasters')
# Redirect URI for installed apps
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
# This is auth flow walks you through the Web auth flow the first time you run the script and stores the credentials in a file
# Every subsequent time you run the script, the script will use the "pickled" credentials stored in config/credentials.pickle
try:
credentials = pickle.load(open("config_file/credentials.pickle", "rb"))
except (OSError, IOError) as e:
flow = InstalledAppFlow.from_client_secrets_file("config_file/client_secret_555711575796-j821ki3bi9glb8tnp787i4r08b1od81i.apps.googleusercontent.com.json", scopes=OAUTH_SCOPE)
credentials = flow.run_console()
pickle.dump(credentials, open("config_file/credentials.pickle", "wb"))
# Connect to Search Console Service using the credentials
webmasters_service = build('webmasters', 'v3', credentials=credentials)
maxRows = 25000
i = 0
output_rows = []
#date a mano
#start_date = datetime.strptime("2020-01-15", "%Y-%m-%d")
#end_date = datetime.strptime("2020-01-16", "%Y-%m-%d")s
#date automatiche
#per spezzare l'anno in 4
# 370 - 277
# 276 - 183
# 182 - 89
# 88 - 1
start_date = datetime.now() + timedelta(days=-10)
end_date = datetime.now() + timedelta(days=-3)
#print(start_date)
#print(end_date)
def date_range(start_date, end_date, delta=timedelta(days=1)):
"""
The range is inclusive, so both start_date and end_date will be returned
Args:
start_date: The datetime object representing the first day in the range.
end_date: The datetime object representing the second day in the range.
delta: A datetime.timedelta instance, specifying the step interval. Defaults to one day.
Yields:
Each datetime object in the range.
"""
current_date = start_date
while current_date <= end_date:
yield current_date
current_date += delta
for date in date_range(start_date, end_date):
date = date.strftime("%Y-%m-%d")
print(date)
print('pausa 25 secondi')
time.sleep(25)
i = 0
while True:
request = {
'startDate' : date,
'endDate' : date,
'dimensions' : ["query","page","country","device"],
"searchType": "Web",
'rowLimit' : maxRows,
'startRow' : i * maxRows
}
response = webmasters_service.searchanalytics().query(siteUrl = SITE_URL, body=request).execute()
print()
if response is None:
print("there is no response")
break
if 'rows' not in response:
print("row not in response")
break
else:
for row in response['rows']:
keyword = row['keys'][0]
page = row['keys'][1]
country = row['keys'][2]
device = row['keys'][3]
output_row = [date, keyword, page, country, device, row['clicks'], row['impressions'], row['ctr'], row['position']]
output_rows.append(output_row)
i = i + 1
# Salva il file con la data nel nome
#root = 'output'
#df = pd.DataFrame(output_rows, columns=['date','query','page', 'country', 'device', 'clicks', 'impressions', 'ctr', 'avg_position'])
#df.to_csv(root + '/' + f"{timestr}-gsc.csv")
# Salva il file senza data nel nome
root = 'output_data/'
if not os.path.exists(root):
os.makedirs(root)
df = pd.DataFrame(output_rows, columns=['date','query','page', 'country', 'device', 'clicks', 'impressions', 'ctr', 'avg_position'])
df.to_csv(root +f'01_google_search_console.csv', index=False, sep = ';')
# passaggio al secondo file
#os.system('python3 2_google_search_console.py')