-
Notifications
You must be signed in to change notification settings - Fork 0
/
update_data.py
102 lines (88 loc) · 3.59 KB
/
update_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gspread
import logging
import numpy as np
import os, sys
import pandas as pd
import requests
import subprocess
from oauth2client.service_account import ServiceAccountCredentials
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def read_existing_data(file_path):
"""Reads existing data from a CSV file to compare with newly fetched data."""
if os.path.exists(file_path):
logging.info("Existing data file found. Reading data...")
e_df = pd.read_csv(file_path)
logging.info(f"Data read successfully.")
return e_df
else:
logging.info("No existing data file found.")
return pd.DataFrame()
def download_sheet(sheet_id, range_name):
"""Downloads data from Google Sheets and returns a DataFrame."""
try:
# Define the scope
scope = ['https://www.googleapis.com/auth/spreadsheets']
# Add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name('omoku-analysis-cred_key.json', scope)
# Authorize the clientsheet
client = gspread.authorize(creds)
# Get the instance of the Spreadsheet
sheet = client.open_by_key(sheet_id)
# Get the sheet by name
worksheet = sheet.get_worksheet(0)
# Get all records of the data
data = worksheet.get_all_records()
# Convert to DataFrame
df = pd.DataFrame(data).astype(str)
df.replace('', np.nan, inplace=True)
df = df.astype({'Power_time': 'float', 'Outages':'float'})
logging.info(f"New data downloaded successfully.")
return df
except gspread.exceptions.APIError as e:
logging.error(f"API error occurred: {e}")
raise
except requests.exceptions.RequestException as e:
logging.error(f"Request failed: {e}")
raise
def save_to_csv(df, file_path):
if df.empty:
logging.error("No data to save: DataFrame is empty.")
raise ValueError("Empty DataFrame")
try:
df.to_csv(file_path, index=False)
logging.info(f"Data saved successfully.")
except Exception as e:
logging.error(f"Failed to save data: {e}")
raise
def git_commit_push():
"""Sets git configurations, commits, and pushes updated CSV file to GitHub."""
try:
subprocess.run(['git', 'config', '--global', 'user.name', 'github-actions'], check=True)
subprocess.run(['git', 'config', '--global', 'user.email', '[email protected]'], check=True)
subprocess.run(['git', 'add', 'omoku_data.csv'], check=True)
subprocess.run(['git', 'commit', '-m', 'Update dataset'], check=True)
subprocess.run(['git', 'push'], check=True)
logging.info(f"Data updated successfully.")
except subprocess.CalledProcessError as e:
if "nothing to commit" in str(e.stderr):
logging.info("No changes to commit.")
else:
logging.error(f"Failed to commit or push changes: {e}")
raise
if __name__ == "__main__":
try:
SHEET_ID = '1dVa6SGm1j-z20NUDUlWSJgQffXzvJZ_a33wT_O5EOUk'
RANGE_NAME = 'data'
FILE_PATH = 'omoku_data.csv'
existing_data = read_existing_data(FILE_PATH)
new_data = download_sheet(SHEET_ID, RANGE_NAME)
if new_data.equals(existing_data):
logging.info("No new entry for new data. Nothing to update.")
sys.exit(0)
else:
save_to_csv(new_data, FILE_PATH)
git_commit_push()
except Exception as e:
logging.critical(f"An unexpected error occurred: {e}")
raise