From 70c63e3c38e64130e6de74171a35778be3ba2f33 Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:09:48 -0700 Subject: [PATCH 01/13] toDateTime --- feedVersionsOverlap.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 868e36e..5c4add9 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -49,10 +49,9 @@ def findStartAndEndDates(updatedScheduledService): # convert an dictionary of strings to a dictionary of datetime objects def convertToDateTime(scheduled_service): - updatedScheduledService = {} for date in scheduled_service: - official_date = datetime.datetime.strptime(str(date), '%Y-%m-%d') + official_date = toDateTime(date) updatedScheduledService[official_date] = scheduled_service[date] return updatedScheduledService @@ -101,6 +100,8 @@ def writeToCSV (filename, status): headerRow = ['ID', 'currentSha1', 'nextSha1', 'originalStart', 'originalEnd', 'updatedStart', 'updatedEnd', 'overlapStart', 'overlapEnd', 'overlapDifference', 'gapStart', 'gapEnd', 'gapDifference', 'startDifference'] +def toDateTime(value): + return datetime.datetime.strptime(str(value), '%Y-%m-%d') with open(filename, 'w') as f: writer = csv.DictWriter(f, fieldnames=headerRow) writer.writeheader() From 78bb8ac8a096a1f331dcdacfc3d040745c8ab10b Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:09:56 -0700 Subject: [PATCH 02/13] use fetched_at --- feedVersionsOverlap.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 5c4add9..4041a29 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -69,7 +69,7 @@ def findAverageServiceHours(updatedScheduledService): # find and interpret schedule by # determining correct start and end dates # converting to datetime objects -def interpretSchedule(element): +def interpretSchedule(feedVersion, element): if 'data' in element and not 'error' in element['data']: sha1 = element['feed_version_sha1'] identification = element['id'] @@ -83,6 +83,10 @@ def interpretSchedule(element): updatedStart = cleanTails(updatedScheduledService, False) updatedEnd = cleanTails(updatedScheduledService, True) + fetchedAt = toDateTime(feedVersion['fetched_at'][:10]) + if (fetchedAt - updatedStart).days > 7: + print "\tset updatedStart %s to fetchedAt %s"%(updatedStart, fetchedAt) + updatedStart = fetchedAt rowInfo = { "ID": identification, @@ -91,6 +95,7 @@ def interpretSchedule(element): "originalEnd": end, "updatedStart": updatedStart, "updatedEnd": updatedEnd, + "fetchedAt": fetchedAt } if (sha1, id, updatedStart, updatedEnd): From 72354949461c8d780babdc67ff8c9f78e7f75ad3 Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:10:19 -0700 Subject: [PATCH 03/13] writeToCSV header --- feedVersionsOverlap.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 4041a29..9dd3896 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -101,14 +101,14 @@ def interpretSchedule(feedVersion, element): if (sha1, id, updatedStart, updatedEnd): return rowInfo -def writeToCSV (filename, status): - headerRow = ['ID', 'currentSha1', 'nextSha1', 'originalStart', 'originalEnd', 'updatedStart', 'updatedEnd', 'overlapStart', - 'overlapEnd', 'overlapDifference', 'gapStart', 'gapEnd', 'gapDifference', 'startDifference'] def toDateTime(value): return datetime.datetime.strptime(str(value), '%Y-%m-%d') + +def writeToCSV (filename, status, header=None): + header = header or status[0].keys() with open(filename, 'w') as f: - writer = csv.DictWriter(f, fieldnames=headerRow) + writer = csv.DictWriter(f, fieldnames=header) writer.writeheader() for elem in status: writer.writerow(elem) From 6a092ae7ec5510b9094fb0643c8cdc7f72b661ff Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:10:26 -0700 Subject: [PATCH 04/13] findOverlap2 --- feedVersionsOverlap.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 9dd3896..4a70b8b 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -115,6 +115,38 @@ def writeToCSV (filename, status, header=None): # find overlaps and gaps in feed versions +def findOverlap2(interpretedSchedule): + interpretedSchedule = sorted(interpretedSchedule, key = lambda x: (x['fetchedAt'])) + status = [] + for current,next_ in zip(interpretedSchedule[:-1], interpretedSchedule[1:]): + print "\n\n" + print "current:" + print current + print "next:" + print next_ + + overlapDays = (current['updatedEnd'] - next_['updatedStart']).days + totalTime = (next_['updatedEnd'] - current['updatedStart']).days + overlapPercent = float(overlapDays) / float(totalTime) + + overlapObject = { + "currentSha1": current['currentSha1'], + "nextSha1": next_['currentSha1'], + "originalStart": current['originalStart'].strftime('%Y-%m-%d'), + "originalEnd": current['originalEnd'].strftime('%Y-%m-%d'), + "updatedStart": current['updatedStart'].strftime('%Y-%m-%d'), + "updatedEnd": current['updatedEnd'].strftime('%Y-%m-%d'), + "overlap": overlapDays, + "startDifference": (next_['updatedStart'] - current['updatedStart']).days, + "fetchedDifference": (next_['fetchedAt'] - current['fetchedAt']).days, + "overlapPercent": overlapPercent, + } + print "overlapObject:" + print overlapObject + status.append(overlapObject) + + return status + def findOverlap (interpretedSchedule): interpretedSchedule = sorted(interpretedSchedule, key = lambda x: (x['updatedStart'], x['updatedEnd'])) From 839de79dad0734e9228ae88a10744e15d63eac4d Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:10:59 -0700 Subject: [PATCH 05/13] fetch feed versions --- feedVersionsOverlap.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 4a70b8b..65dfadc 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -251,16 +251,28 @@ def getFeedService (onestop_id): ('per_page', 'false') ) - reqService = requests.get('https://transit.land/api/v1/feed_version_infos/', params=params) - serviceJS = json.loads(reqService.text) + feedVersionInfos = requests.get('https://transit.land/api/v1/feed_version_infos/', params=params).json()['feed_version_infos'] interpretedSchedule = [] - - for element in serviceJS['feed_version_infos']: - schedule = interpretSchedule(element) - + for feedVersionInfo in feedVersionInfos: + sha1 = feedVersionInfo['feed_version_sha1'] + print "sha1: ", sha1 + feedVersion = requests.get('https://transit.land/api/v1/feed_versions/%s'%sha1).json() + if feedVersion is None: + print "no feed_version" + continue + if feedVersionInfo is None: + print "no feed_version_info" + continue + if feedVersion.get('tags') and feedVersion['tags'].get('gtfs_data_exchange'): + print "gtfs_data_exchange feed; skipping" + continue + schedule = interpretSchedule(feedVersion, feedVersionInfo) if schedule: interpretedSchedule.append(schedule) + else: + print "error processing schedule" + continue status, overlapAverage, gapAverage, startDifferenceAverage = findOverlap(interpretedSchedule) writeToCSV("%s.csv"%onestop_id, status) From 074c06865b1c2cd9ff8d5127a3b0bea1e75cc79f Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:11:39 -0700 Subject: [PATCH 06/13] calculate overlap percentage and fetched at difference average --- feedVersionsOverlap.py | 43 ++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 65dfadc..e61668a 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -113,7 +113,6 @@ def writeToCSV (filename, status, header=None): for elem in status: writer.writerow(elem) - # find overlaps and gaps in feed versions def findOverlap2(interpretedSchedule): interpretedSchedule = sorted(interpretedSchedule, key = lambda x: (x['fetchedAt'])) @@ -149,7 +148,7 @@ def findOverlap2(interpretedSchedule): def findOverlap (interpretedSchedule): - interpretedSchedule = sorted(interpretedSchedule, key = lambda x: (x['updatedStart'], x['updatedEnd'])) + interpretedSchedule = sorted(interpretedSchedule, key = lambda x: (x['fetchedAt'])) currentIndex = 0 nextIndex = 1 @@ -245,6 +244,7 @@ def findOverlap (interpretedSchedule): # get feedversion with scheduled stops, and find overlap and gap averages for each feed def getFeedService (onestop_id): + print "===== %s ====="%(onestop_id) params = ( ('feed_onestop_id', onestop_id), ('type', 'FeedVersionInfoStatistics'), @@ -274,24 +274,35 @@ def getFeedService (onestop_id): print "error processing schedule" continue - status, overlapAverage, gapAverage, startDifferenceAverage = findOverlap(interpretedSchedule) - writeToCSV("%s.csv"%onestop_id, status) - with open('%s.json'%onestop_id, 'w') as f: - f.write(json.dumps(status, default=lambda x:str(x))) + overlaps = findOverlap2(interpretedSchedule) + overlapPercentAverage = sum(i['overlapPercent'] for i in overlaps) / float(len(overlaps)) + fetchedAtDifferenceAverage = sum(i['fetchedDifference'] for i in overlaps) / float(len(overlaps)) - averageOneStopInformation = { + return { 'onestop_id': onestop_id, - 'overlapAverage': overlapAverage, - 'gapAverage': gapAverage, - 'startDifferenceAverage': startDifferenceAverage + 'overlapPercentAverage': overlapPercentAverage, + 'fetchedAtDifferenceAverage': fetchedAtDifferenceAverage } - print overlapAverage - print gapAverage - print startDifferenceAverage - return averageOneStopInformation - - + # status, overlapAverage, gapAverage, startDifferenceAverage = findOverlap(interpretedSchedule) + # + # header = ['ID', 'currentSha1', 'nextSha1', 'originalStart', 'originalEnd', 'updatedStart', 'updatedEnd', 'overlapStart', + # 'overlapEnd', 'overlapDifference', 'gapStart', 'gapEnd', 'gapDifference', 'startDifference'] + # writeToCSV("%s.csv"%onestop_id, status, header=header) + # with open('%s.json'%onestop_id, 'w') as f: + # f.write(json.dumps(status, default=lambda x:str(x))) + # + # averageOneStopInformation = { + # 'onestop_id': onestop_id, + # 'overlapAverage': overlapAverage, + # 'gapAverage': gapAverage, + # 'startDifferenceAverage': startDifferenceAverage + # } + # + # print overlapAverage + # print gapAverage + # print startDifferenceAverage + # return averageOneStopInformation # call function with onestop_id as parameter def main(): From 64b941ececfda125c42b3aa7b74d165e0b348aba Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:11:47 -0700 Subject: [PATCH 07/13] use bbox --- feedVersionsOverlap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index e61668a..6ea0320 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -306,8 +306,8 @@ def getFeedService (onestop_id): # call function with onestop_id as parameter def main(): - per_page = int(sys.argv[1]) - feeds = requests.get('https://transit.land/api/v1/feeds', params={'per_page': per_page}).json()['feeds'] + params = {'per_page': 10, 'bbox': '-123.321533,36.826875,-120.786438,38.629745'} + feeds = requests.get('https://transit.land/api/v1/feeds', params=params).json()['feeds'] allFeedsInformation = [] for feed in feeds: print feed['onestop_id'] From 87138dc78706e8404d72ede7eff5fc72ea523064 Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:11:54 -0700 Subject: [PATCH 08/13] write csv --- feedVersionsOverlap.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 6ea0320..41753de 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -312,18 +312,9 @@ def main(): for feed in feeds: print feed['onestop_id'] allFeedsInformation.append(getFeedService(feed['onestop_id'])) - filename = 'allFeeds4.csv' - headerRow = ['onestop_id', 'overlapAverage', 'gapAverage', 'startDifferenceAverage'] - - with open(filename, 'w') as f: - writer = csv.DictWriter(f, fieldnames=headerRow) - writer.writeheader() - for elem in allFeedsInformation: - writer.writerow(elem) - - + writeToCSV(filename, allFeedsInformation) if __name__ == "__main__": - main() \ No newline at end of file + main() From 18e313a0c80bc4e5f5fa3d56c2e1240a61604b2c Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:20:22 -0700 Subject: [PATCH 09/13] apikey --- feedVersionsOverlap.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 41753de..77fc759 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -7,6 +7,7 @@ ## threshold for detecting a tail THRESHOLD = 0.15 +APIKEY = sys.argv[1] # remove tail end def cleanTails(updatedScheduledService, end): @@ -248,7 +249,8 @@ def getFeedService (onestop_id): params = ( ('feed_onestop_id', onestop_id), ('type', 'FeedVersionInfoStatistics'), - ('per_page', 'false') + ('per_page', 'false'), + ('apikey', APIKEY) ) feedVersionInfos = requests.get('https://transit.land/api/v1/feed_version_infos/', params=params).json()['feed_version_infos'] @@ -257,7 +259,8 @@ def getFeedService (onestop_id): for feedVersionInfo in feedVersionInfos: sha1 = feedVersionInfo['feed_version_sha1'] print "sha1: ", sha1 - feedVersion = requests.get('https://transit.land/api/v1/feed_versions/%s'%sha1).json() + params = {'apikey': APIKEY} + feedVersion = requests.get('https://transit.land/api/v1/feed_versions/%s'%sha1, params=params).json() if feedVersion is None: print "no feed_version" continue @@ -306,7 +309,7 @@ def getFeedService (onestop_id): # call function with onestop_id as parameter def main(): - params = {'per_page': 10, 'bbox': '-123.321533,36.826875,-120.786438,38.629745'} + params = {'per_page': 10, 'bbox': '-123.321533,36.826875,-120.786438,38.629745', 'apikey': APIKEY} feeds = requests.get('https://transit.land/api/v1/feeds', params=params).json()['feeds'] allFeedsInformation = [] for feed in feeds: From 3b4028c2daacc7bacad80957f7f7d6f75ef4243c Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:51:38 -0700 Subject: [PATCH 10/13] simple rate limit --- feedVersionsOverlap.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 77fc759..1ab2914 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -1,3 +1,4 @@ +import time import sys import requests import json @@ -253,6 +254,7 @@ def getFeedService (onestop_id): ('apikey', APIKEY) ) + time.sleep(0.125) feedVersionInfos = requests.get('https://transit.land/api/v1/feed_version_infos/', params=params).json()['feed_version_infos'] interpretedSchedule = [] @@ -260,6 +262,8 @@ def getFeedService (onestop_id): sha1 = feedVersionInfo['feed_version_sha1'] print "sha1: ", sha1 params = {'apikey': APIKEY} + + time.sleep(0.125) feedVersion = requests.get('https://transit.land/api/v1/feed_versions/%s'%sha1, params=params).json() if feedVersion is None: print "no feed_version" @@ -310,6 +314,8 @@ def getFeedService (onestop_id): # call function with onestop_id as parameter def main(): params = {'per_page': 10, 'bbox': '-123.321533,36.826875,-120.786438,38.629745', 'apikey': APIKEY} + + time.sleep(0.125) feeds = requests.get('https://transit.land/api/v1/feeds', params=params).json()['feeds'] allFeedsInformation = [] for feed in feeds: From abfedf665c51ca92d1516b97c141e1c631c753f5 Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 15:51:43 -0700 Subject: [PATCH 11/13] use 30 day threshold --- feedVersionsOverlap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 1ab2914..4007bca 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -86,7 +86,7 @@ def interpretSchedule(feedVersion, element): updatedEnd = cleanTails(updatedScheduledService, True) fetchedAt = toDateTime(feedVersion['fetched_at'][:10]) - if (fetchedAt - updatedStart).days > 7: + if (fetchedAt - updatedStart).days > 30: print "\tset updatedStart %s to fetchedAt %s"%(updatedStart, fetchedAt) updatedStart = fetchedAt From 830e1dee45d98077e54c6ca213c2796a06f64568 Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 16:38:42 -0700 Subject: [PATCH 12/13] fetch, then process, then compare --- feedVersionsOverlap.py | 73 ++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index 4007bca..ed8c74a 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -74,7 +74,7 @@ def findAverageServiceHours(updatedScheduledService): def interpretSchedule(feedVersion, element): if 'data' in element and not 'error' in element['data']: sha1 = element['feed_version_sha1'] - identification = element['id'] + print "interpret:", sha1 scheduled_service = element['data']['scheduled_service'] @@ -85,13 +85,25 @@ def interpretSchedule(feedVersion, element): updatedStart = cleanTails(updatedScheduledService, False) updatedEnd = cleanTails(updatedScheduledService, True) + if not feedVersion.get('fetched_at'): + print "\tno fetched_at, skipping" + return fetchedAt = toDateTime(feedVersion['fetched_at'][:10]) if (fetchedAt - updatedStart).days > 30: print "\tset updatedStart %s to fetchedAt %s"%(updatedStart, fetchedAt) updatedStart = fetchedAt + print "\tfetchedAt:", fetchedAt + print "\toriginalStart:", start + print "\tupdatedStart:", updatedStart + print "\tstart shift:", (updatedStart - start).days + print "\toriginalEnd:", end + print "\tupdatedEnd:", updatedEnd + print "\tend shift:", (updatedEnd - end).days + print "\toriginal duration:", (end - start).days + print "\tupdated duration:", (updatedEnd - updatedStart).days + rowInfo = { - "ID": identification, "currentSha1": sha1, "originalStart": start, "originalEnd": end, @@ -100,7 +112,7 @@ def interpretSchedule(feedVersion, element): "fetchedAt": fetchedAt } - if (sha1, id, updatedStart, updatedEnd): + if (sha1, updatedStart, updatedEnd): return rowInfo @@ -120,15 +132,18 @@ def findOverlap2(interpretedSchedule): interpretedSchedule = sorted(interpretedSchedule, key = lambda x: (x['fetchedAt'])) status = [] for current,next_ in zip(interpretedSchedule[:-1], interpretedSchedule[1:]): - print "\n\n" - print "current:" + print "compare %s -> %s"%(current['currentSha1'], next_['currentSha1']) + print "\tcurrent:" print current - print "next:" + print "\tnext:" print next_ overlapDays = (current['updatedEnd'] - next_['updatedStart']).days totalTime = (next_['updatedEnd'] - current['updatedStart']).days overlapPercent = float(overlapDays) / float(totalTime) + print "\toverlapDays:", overlapDays + print "\ttotalTime:", totalTime + print "\toverlapPercent:", overlapPercent overlapObject = { "currentSha1": current['currentSha1'], @@ -142,8 +157,6 @@ def findOverlap2(interpretedSchedule): "fetchedDifference": (next_['fetchedAt'] - current['fetchedAt']).days, "overlapPercent": overlapPercent, } - print "overlapObject:" - print overlapObject status.append(overlapObject) return status @@ -176,7 +189,6 @@ def findOverlap (interpretedSchedule): # status.append("Overlap: " + str(difference) + " " + str(start) + " and " + str(end)) overlapObject = { - "ID": current['ID'], "currentSha1": current['currentSha1'], "nextSha1": next['currentSha1'], "originalStart": current['originalStart'].strftime('%Y-%m-%d'), @@ -203,7 +215,6 @@ def findOverlap (interpretedSchedule): # status.append("Gap: " + str(difference) + " " + str(start) + " and " + str(end)) gapObject = { - "ID": current['ID'], "currentSha1": current['currentSha1'], "nextSha1": next['currentSha1'], "originalStart": current['originalStart'].strftime('%Y-%m-%d'), @@ -245,7 +256,7 @@ def findOverlap (interpretedSchedule): return status, overlapAverage, gapAverage, startDifferenceAverage # get feedversion with scheduled stops, and find overlap and gap averages for each feed -def getFeedService (onestop_id): +def getFeedVersions (onestop_id): print "===== %s ====="%(onestop_id) params = ( ('feed_onestop_id', onestop_id), @@ -257,31 +268,43 @@ def getFeedService (onestop_id): time.sleep(0.125) feedVersionInfos = requests.get('https://transit.land/api/v1/feed_version_infos/', params=params).json()['feed_version_infos'] - interpretedSchedule = [] + data = [] for feedVersionInfo in feedVersionInfos: sha1 = feedVersionInfo['feed_version_sha1'] - print "sha1: ", sha1 + print "sha1:", sha1 params = {'apikey': APIKEY} time.sleep(0.125) feedVersion = requests.get('https://transit.land/api/v1/feed_versions/%s'%sha1, params=params).json() if feedVersion is None: - print "no feed_version" + print "\tno feed_version" continue if feedVersionInfo is None: - print "no feed_version_info" + print "\tno feed_version_info" continue if feedVersion.get('tags') and feedVersion['tags'].get('gtfs_data_exchange'): - print "gtfs_data_exchange feed; skipping" + print "\tgtfs_data_exchange feed; skipping" continue + data.append([feedVersion, feedVersionInfo]) + + return data + +def processFeed(onestop_id): + # Get feed_versions and feed_version_infos + fvs = getFeedVersions(onestop_id) + + # Interpret and adjust the schedules + interpretedSchedules = [] + for feedVersion, feedVersionInfo in fvs: schedule = interpretSchedule(feedVersion, feedVersionInfo) if schedule: - interpretedSchedule.append(schedule) + interpretedSchedules.append(schedule) else: - print "error processing schedule" + print "\terror processing schedule; skipping" continue - overlaps = findOverlap2(interpretedSchedule) + # Calculate overlap statistics + overlaps = findOverlap2(interpretedSchedules) overlapPercentAverage = sum(i['overlapPercent'] for i in overlaps) / float(len(overlaps)) fetchedAtDifferenceAverage = sum(i['fetchedDifference'] for i in overlaps) / float(len(overlaps)) @@ -290,7 +313,6 @@ def getFeedService (onestop_id): 'overlapPercentAverage': overlapPercentAverage, 'fetchedAtDifferenceAverage': fetchedAtDifferenceAverage } - # status, overlapAverage, gapAverage, startDifferenceAverage = findOverlap(interpretedSchedule) # # header = ['ID', 'currentSha1', 'nextSha1', 'originalStart', 'originalEnd', 'updatedStart', 'updatedEnd', 'overlapStart', @@ -311,16 +333,19 @@ def getFeedService (onestop_id): # print startDifferenceAverage # return averageOneStopInformation + # call function with onestop_id as parameter def main(): - params = {'per_page': 10, 'bbox': '-123.321533,36.826875,-120.786438,38.629745', 'apikey': APIKEY} - + params = { + 'per_page': 1, + 'bbox': '-123.321533,36.826875,-120.786438,38.629745', + 'apikey': APIKEY + } time.sleep(0.125) feeds = requests.get('https://transit.land/api/v1/feeds', params=params).json()['feeds'] allFeedsInformation = [] for feed in feeds: - print feed['onestop_id'] - allFeedsInformation.append(getFeedService(feed['onestop_id'])) + allFeedsInformation.append(processFeed(feed['onestop_id'])) filename = 'allFeeds4.csv' writeToCSV(filename, allFeedsInformation) From ef03305a91073ec199463e4cf73d26b3ee4a609e Mon Sep 17 00:00:00 2001 From: Ian Rees Date: Fri, 1 Sep 2017 17:01:40 -0700 Subject: [PATCH 13/13] Full run --- feedVersionsOverlap.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/feedVersionsOverlap.py b/feedVersionsOverlap.py index ed8c74a..48349e4 100644 --- a/feedVersionsOverlap.py +++ b/feedVersionsOverlap.py @@ -102,6 +102,7 @@ def interpretSchedule(feedVersion, element): print "\tend shift:", (updatedEnd - end).days print "\toriginal duration:", (end - start).days print "\tupdated duration:", (updatedEnd - updatedStart).days + print "" rowInfo = { "currentSha1": sha1, @@ -132,7 +133,7 @@ def findOverlap2(interpretedSchedule): interpretedSchedule = sorted(interpretedSchedule, key = lambda x: (x['fetchedAt'])) status = [] for current,next_ in zip(interpretedSchedule[:-1], interpretedSchedule[1:]): - print "compare %s -> %s"%(current['currentSha1'], next_['currentSha1']) + print "compare: %s -> %s"%(current['currentSha1'], next_['currentSha1']) print "\tcurrent:" print current print "\tnext:" @@ -144,6 +145,7 @@ def findOverlap2(interpretedSchedule): print "\toverlapDays:", overlapDays print "\ttotalTime:", totalTime print "\toverlapPercent:", overlapPercent + print "" overlapObject = { "currentSha1": current['currentSha1'], @@ -337,7 +339,7 @@ def processFeed(onestop_id): # call function with onestop_id as parameter def main(): params = { - 'per_page': 1, + 'per_page': 'false', 'bbox': '-123.321533,36.826875,-120.786438,38.629745', 'apikey': APIKEY }