-
Notifications
You must be signed in to change notification settings - Fork 0
/
clery_grabber.py
62 lines (58 loc) · 2.8 KB
/
clery_grabber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# REVISION HISTORY:
# Mar 2018: Adam Ross Nelson - Added sleep to give page load time.
# Jan 2018: Adam Ross Nelson - GitHub ReBuild
# Aug 2017: Adam Ross Nelson - Initial Build
#
# Quickly grabs Clery data files from
# https://ope.ed.gov/campussafety/#/datafile/list
#
# Requires selenium and geckodriver-v0.19.1-win64.zip from
# https://github.com/mozilla/geckodriver/releases
#
# Requires Geckodrive from
# https://github.com/mozilla/geckodriver/releases
#
# Use Stata to build research ready panel dataset see:
# https://github.com/adamrossnelson/CleryData
#
# Intended for use with IPEDS panel data files built from
# https://github.com/adamrossnelson/StataIPEDSAll
import sys
from time import sleep
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import WebDriverException
from selenium import webdriver
try:
browser = webdriver.Firefox()
except WebDriverException:
print('\n\n There was an error. Verify Firefox is properly installed.', end='\n')
print(' Verify geckodriver installation: \n https://github.com/mozilla/geckodriver/releases', end='\n')
print(' Windows: place version of geckodriver.exe appropriate for your system in a sytem path location', end='\n')
print(' Report issues at: https://github.com/adamrossnelson/CleryData/issues', end='\n\n')
sys.exit()
try:
# At time of last successful test, Clery data website was: https://ope.ed.gov/campussafety/#/datafile/list
# Visit Clery data website.
browser.get('https://ope.ed.gov/campussafety/#/datafile/list')
except WebDriverException:
# Report errors and next steps if website not available.
print('\n\n There was an error. Verify web address is stil current: \n https://ope.ed.gov/campussafety/#/datafile/list', end='\n')
print(' Verify working internet connection.', end='\n')
print(' If web address out of date, report issues at: https://github.com/adamrossnelson/CleryData/issues', end='\n\n')
sys.exit()
while True:
# Sleep for one second to give time for page load.
sleep(1)
elems = browser.find_elements_by_css_selector("ul.file-list li:first-child a")
if elems != []:
print('\n\n Succes: Visit Firefox window to complete file downloads.', end='\n')
print(' Thanks for using: https://github.com/adamrossnelson/CleryData.', end='\n\n')
for elem in elems:
elem.click()
# See also: https://stackoverflow.com/questions/1176348/access-to-file-download-dialog-in-firefox
break
elif elems == []:
print('\n\n There was an error. Possible change in css selector syntax.', end='\n')
print(' Report issues at: \n https://github.com/adamrossnelson/CleryData/issues', end='\n\n')
# Sleep for one second to reduce demand on server.
sleep(1)