-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
65 lines (50 loc) · 1.88 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pickle
import requests
import zipfile
import re
import xlsxwriter
def save_obj(obj, name):
"""
This function save an object as a pickle.
:param obj: object to save
:param name: name of the pickle file.
:return: -
"""
with open(name + '.pkl', 'wb') as f:
pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name):
"""
This function will load a pickle file
:param name: name of the pickle file
:return: loaded pickle file
"""
with open(name + '.pkl', 'rb') as f:
return pickle.load(f)
__fid_ptrn = re.compile("(?<=/folders/)([\w-]+)|(?<=%2Ffolders%2F)([\w-]+)|(?<=/file/d/)([\w-]+)|(?<=%2Ffile%2Fd%2F)([\w-]+)|(?<=id=)([\w-]+)|(?<=id%3D)([\w-]+)")
__gdrive_url = "https://docs.google.com/uc?export=download"
def download_file_from_google_drive(url, destination):
m = __fid_ptrn.search(url)
if m is None:
raise ValueError(f'Could not identify google drive file id in {url}.')
file_id = m.group()
session = requests.Session()
response = session.get(__gdrive_url, params = { 'id' : file_id }, stream = True)
token = _get_confirm_token(response)
if token:
params = { 'id' : file_id, 'confirm' : token }
response = session.get(__gdrive_url, params = params, stream = True)
_save_response_content(response, destination)
def _get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def _save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
def unzip_file(file_path, target_dir):
with zipfile.ZipFile(file_path, 'r') as z:
z.extractall(target_dir)