Skip to content

Commit

Permalink
Rework logging management (#261)
Browse files Browse the repository at this point in the history
* Add keyword parameter for query_tweets to specify log level

* Add CLI argument for loglevel (WIP)

* Specify logger name

Co-authored-by: Ahmet Taspinar <[email protected]>
  • Loading branch information
LinqLover and taspinar authored Jul 22, 2020
1 parent 90e50f2 commit b0dcc3c
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 25 deletions.
1 change: 0 additions & 1 deletion twitterscraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,3 @@
from twitterscraper.query import query_user_info
from twitterscraper.tweet import Tweet
from twitterscraper.user import User
from twitterscraper.ts_logger import logger as ts_logger
26 changes: 20 additions & 6 deletions twitterscraper/main.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
"""
This is a command line application that allows you to scrape twitter!
"""
import csv
import json
import argparse
import collections
import csv
import datetime as dt
import json
import logging
from os.path import isfile
from pprint import pprint
from twitterscraper.query import query_tweets
from twitterscraper.query import query_tweets_from_user
from twitterscraper.query import query_user_info
from twitterscraper.ts_logger import logger

from twitterscraper.query import (query_tweets, query_tweets_from_user,
query_user_info)

logger = logging.getLogger('twitterscraper')


class JSONEncoder(json.JSONEncoder):
Expand Down Expand Up @@ -39,6 +41,12 @@ def valid_date(s):
msg = "Not a valid date: '{0}'.".format(s)
raise argparse.ArgumentTypeError(msg)

def valid_loglevel(level):
try:
return logging._checkLevel(level)
except (ValueError, TypeError) as ex:
raise argparse.ArgumentTypeError(ex)

def main():
try:
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
Expand Down Expand Up @@ -91,9 +99,15 @@ def main():
parser.add_argument("-p", "--poolsize", type=int, default=20, help="Specify the number of parallel process you want to run. \n"
"Default value is set to 20. \nYou can change this number if you have more computing power available. \n"
"Set to 1 if you dont want to run any parallel processes.", metavar='\b')
parser.add_argument("--loglevel", type=valid_loglevel, default=logging.INFO, help="Specify the level for logging. \n"
"Must be a valid value from https://docs.python.org/2/library/logging.html#logging-levels. \n"
"Default log level is set to INFO.")
parser.add_argument("-dp", "--disableproxy", action="store_true", default=False, help="Set this flag if you want to disable use of proxy servers when scrapping tweets and user profiles. \n")
args = parser.parse_args()

logging.basicConfig()
logger.setLevel(args.loglevel)

if isfile(args.output) and not args.dump and not args.overwrite:
logger.error("Output file already exists! Aborting.")
exit(-1)
Expand Down
15 changes: 9 additions & 6 deletions twitterscraper/query.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
from __future__ import division

import datetime as dt
import json
import requests
import urllib
import logging
import random
import datetime as dt

import sys
import urllib
from functools import partial
from itertools import cycle

import requests
from billiard.pool import Pool
from bs4 import BeautifulSoup
from itertools import cycle

from twitterscraper.tweet import Tweet
from twitterscraper.ts_logger import logger
from twitterscraper.user import User

logger = logging.getLogger('twitterscraper')

#from fake_useragent import UserAgent
#ua = UserAgent()
#HEADER = {'User-Agent': ua.random}
Expand Down
12 changes: 0 additions & 12 deletions twitterscraper/ts_logger.py

This file was deleted.

0 comments on commit b0dcc3c

Please sign in to comment.