diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..6274d00 Binary files /dev/null and b/.DS_Store differ diff --git a/README.md b/README.md index d892a54..6e60ee1 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ pip3 install axeman ``` $ axeman -h usage: axeman [-h] [-f LOG_FILE] [-s START_OFFSET] [-l] [-u CTL_URL] - [-o OUTPUT_DIR] [-v] [-c CONCURRENCY_COUNT] + [-z CTL_OFFSET] [-o OUTPUT_DIR] [-v] [-c CONCURRENCY_COUNT] Pull down certificate transparency list information @@ -29,9 +29,11 @@ optional arguments: -s START_OFFSET Skip N number of lists before starting -l List all available certificate lists -u CTL_URL Retrieve this CTL only + -z CTL_OFFSET The CTL offset to start at -o OUTPUT_DIR The output directory to store certificates in -v Print out verbose/debug info -c CONCURRENCY_COUNT The number of concurrent downloads to run at a time + ``` ## Demo diff --git a/axeman/__init__.py b/axeman/__init__.py index d107145..1702bd2 100644 --- a/axeman/__init__.py +++ b/axeman/__init__.py @@ -1,4 +1,4 @@ -__version__ = '1.14' +__version__ = '1.15' if __name__ == "__main__": from .core import main diff --git a/axeman/certlib.py b/axeman/certlib.py index 93abcdc..710f8bf 100644 --- a/axeman/certlib.py +++ b/axeman/certlib.py @@ -6,11 +6,11 @@ from OpenSSL import crypto -CTL_LISTS = 'https://www.gstatic.com/ct/log_list/log_list.json' +CTL_LISTS = 'https://www.gstatic.com/ct/log_list/v2/log_list.json' -CTL_INFO = "http://{}/ct/v1/get-sth" +CTL_INFO = "{}/ct/v1/get-sth" -DOWNLOAD = "http://{}/ct/v1/get-entries?start={}&end={}" +DOWNLOAD = "{}/ct/v1/get-entries?start={}&end={}" from construct import Struct, Byte, Int16ub, Int64ub, Enum, Bytes, Int24ub, this, GreedyBytes, GreedyRange, Terminated, Embedded @@ -42,13 +42,15 @@ async def retrieve_all_ctls(session=None): async with session.get(CTL_LISTS) as response: ctl_lists = await response.json() - logs = ctl_lists['logs'] + operators = ctl_lists['operators'] - for log in logs: - if log['url'].endswith('/'): - log['url'] = log['url'][:-1] - owner = _get_owner(log, ctl_lists['operators']) - log['operated_by'] = owner + logs = list() + for operator in operators: + for log in operator['logs']: + if log['url'].endswith('/'): + log['url'] = log['url'][:-1] + log['operated_by'] = operator['name'] + logs.append(log) return logs diff --git a/axeman/core.py b/axeman/core.py index d67a4c2..9cb11dd 100644 --- a/axeman/core.py +++ b/axeman/core.py @@ -6,6 +6,8 @@ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) import sys +import time +import json import math import base64 import os @@ -37,6 +39,7 @@ async def download_worker(session, log_info, work_deque, download_queue): logging.debug("[{}] Queueing up blocks {}-{}...".format(log_info['url'], start, end)) + for x in range(3): try: async with session.get(certlib.DOWNLOAD.format(log_info['url'], start, end)) as response: @@ -74,7 +77,7 @@ async def queue_monitor(log_info, work_deque, download_results_queue): await asyncio.sleep(2) async def retrieve_certificates(loop, url=None, ctl_offset=0, output_directory='/tmp/', concurrency_count=DOWNLOAD_CONCURRENCY): - async with aiohttp.ClientSession(loop=loop, conn_timeout=10) as session: + async with aiohttp.ClientSession(loop=loop, timeout = aiohttp.ClientTimeout(total=10)) as session: ctl_logs = await certlib.retrieve_all_ctls(session) if url: @@ -143,9 +146,8 @@ async def processing_coro(download_results_queue, output_dir="/tmp"): logging.debug("Got a chunk of {}. Mapping into process pool".format(process_pool.pool_workers)) - for entry in entries_iter: - csv_storage = '{}/certificates/{}'.format(output_dir, entry['log_info']['url'].replace('/', '_')) + csv_storage = '{}/certificates/{}'.format(output_dir, entry['log_info']['url'].replace('https://', '')) if not os.path.exists(csv_storage): print("[{}] Making dir...".format(os.getpid())) os.makedirs(csv_storage) @@ -236,20 +238,24 @@ def process_worker(result_info): return True async def get_certs_and_print(): - async with aiohttp.ClientSession(conn_timeout=5) as session: + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=5)) as session: ctls = await certlib.retrieve_all_ctls(session) - print("Found {} CTLs...".format(len(ctls))) + output = [] for log in ctls: try: log_info = await certlib.retrieve_log_info(log, session) except: continue - print(log['description']) - print(" \- URL: {}".format(log['url'])) - print(" \- Owner: {}".format(log_info['operated_by'])) - print(" \- Cert Count: {}".format(locale.format("%d", log_info['tree_size']-1, grouping=True))) - print(" \- Max Block Size: {}\n".format(log_info['block_size'])) + output.append({ + "description": log['description'], + "url": log['url'], + "owner": log_info['operated_by'], + "cert_count": log_info['tree_size']-1, + "max_block_size": log_info['block_size'] + }) + + print(json.dumps(output, indent=4)) def main(): loop = asyncio.get_event_loop() @@ -272,7 +278,8 @@ def main(): parser.add_argument('-v', dest="verbose", action="store_true", help="Print out verbose/debug info") - parser.add_argument('-c', dest='concurrency_count', action='store', default=50, type=int, help="The number of concurrent downloads to run at a time") + parser.add_argument('-c', dest='concurrency_count', action='store', default=DOWNLOAD_CONCURRENCY, type=int, + help="The number of concurrent downloads to run at a time, default "+str(DOWNLOAD_CONCURRENCY)) args = parser.parse_args()