From b221304c73a9dce06e70a14e981fedef4bf6035c Mon Sep 17 00:00:00 2001 From: Tom Shortall Date: Sat, 24 Sep 2011 12:17:26 +0100 Subject: [PATCH 1/3] add error handling options, enhanced output add -j options to skip past the first comment stored (to get around broken data) add -c option to continue on errors rename a variable to avoid conflict with model name give more informative output including progress report give more informative error reproting give more informative output on http errors --- disqus/management/commands/disqus_export.py | 91 ++++++++++++++++----- 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/disqus/management/commands/disqus_export.py b/disqus/management/commands/disqus_export.py index f7f3f4a..a11ed68 100644 --- a/disqus/management/commands/disqus_export.py +++ b/disqus/management/commands/disqus_export.py @@ -1,9 +1,12 @@ from optparse import make_option import os.path +import sys +import urllib2 from django.conf import settings from django.contrib import comments from django.contrib.sites.models import Site +from django.core.exceptions import ObjectDoesNotExist from django.core.management.base import NoArgsCommand from django.utils import simplejson as json @@ -18,6 +21,12 @@ class Command(NoArgsCommand): make_option('-s', '--state-file', action="store", dest="state_file", help="Saves the state of the export in the given file " + "and auto-resumes from this file if possible."), + make_option('-j', '--jump-store', action="store_true", dest="jump_store", + help="If --state-file is specified then skip the first " + "comment in the state file. Used to get around corrupt comments."), + make_option('-c', '--continue-on-error', action="store_true", dest="continue_on_error", + help="If an error is encountered print a warning and continue. " + + "Default behaviour is to quit."), ) help = 'Export comments from contrib.comments to DISQUS' requires_model_validation = False @@ -56,13 +65,17 @@ def handle(self, **options): verbosity = int(options.get('verbosity')) dry_run = bool(options.get('dry_run')) state_file = options.get('state_file') + jump_store = bool(options.get('jump_store')) + continue_on_error = bool(options.get('continue_on_error')) last_exported_id = None if state_file is not None and os.path.exists(state_file): last_exported_id = self._get_last_state(state_file) + if jump_store: + last_exported_id += 1 - comments = self._get_comments_to_export(last_exported_id) - comments_count = comments.count() + exp_comments = self._get_comments_to_export(last_exported_id) + comments_count = exp_comments.count() if verbosity >= 1: print "Exporting %d comment(s)" % comments_count @@ -74,8 +87,8 @@ def handle(self, **options): if not comments_count: return - # Get a list of all forums for an API key. Each API key can have - # multiple forums associated. This application only supports the one + # Get a list of all forums for an API key. Each API key can have + # multiple forums associated. This application only supports the one # set in the DISQUS_WEBSITE_SHORTNAME variable forum_list = client.get_forum_list(user_api_key=settings.DISQUS_API_KEY) try: @@ -91,20 +104,44 @@ def handle(self, **options): user_api_key=settings.DISQUS_API_KEY, forum_id=forum['id']) - for comment in comments: + for i, comment in enumerate(exp_comments): if verbosity >= 1: - print "Exporting comment '%s'" % comment + print "Exporting comment %s/%s (%s%%)" % \ + (i+1, comments_count, round((float(i)/comments_count)*100, 2)), + + comment_str = None + try: + # Try to find a thread with the comments URL. + url_path = comment.content_object.get_absolute_url() + comment_str = str(comment).replace('\n', '').replace('\r', '') + except (AttributeError, ObjectDoesNotExist), e: + print + print 'ERROR: %s' % e + if comment_str is not None: + print ' Comment contents: "%s"' % (str(comment)[:30],) + if comment.content_object is not None: + model = comment.content_object.__class__ + else: + model = comment.content_object + print ' Comment pk: %s. Content object: %s (model: %s - pk: %s)' % \ + (comment.pk, comment.content_object, model, comment.object_pk,) + if continue_on_error: + continue + else: + sys.exit(1) + + if verbosity >= 1: + print ": '%s'" % (comment_str,) - # Try to find a thread with the comments URL. url = 'http://%s%s' % ( current_site.domain, - comment.content_object.get_absolute_url()) + url_path) thread = client.get_thread_by_url( url=url, forum_api_key=forum_api_key) # if no thread with the URL could be found, we create a new one. - # to do this, we first need to create the thread and then + # to do this, we first need to create the thread and then # update the thread with a URL. if not thread: thread = client.thread_by_identifier( @@ -119,15 +156,31 @@ def handle(self, **options): # name and email are optional in contrib.comments but required # in DISQUS. If they are not set, dummy values will be used - client.create_post( - forum_api_key=forum_api_key, - thread_id=thread['id'], - message=comment.comment.encode('utf-8'), - author_name=comment.userinfo.get('name', - 'nobody').encode('utf-8'), - author_email=comment.userinfo.get('email', - 'nobody@example.org'), - author_url=comment.userinfo.get('url', ''), - created_at=comment.submit_date.strftime('%Y-%m-%dT%H:%M')) + try: + client.create_post( + forum_api_key=forum_api_key, + thread_id=thread['id'], + message=comment.comment.encode('utf-8'), + author_name=comment.userinfo.get('name', + 'nobody').encode('utf-8'), + author_email=comment.userinfo.get('email', + 'nobody@example.org'), + author_url=comment.userinfo.get('url', ''), + created_at=comment.submit_date.strftime('%Y-%m-%dT%H:%M')) + except urllib2.HTTPError, e: + if verbosity == 0: + sys.exit(1) + print e + print dir(e) + print 'HTTP: args: ', e.args + print 'HTTP: code:', e.code + print 'HTTP: errno:', e.errno + print 'HTTP: geturl:', e.geturl() + print 'HTTP: headers:', e.headers + print 'HTTP: msg:', e.msg + print 'HTTP: strerror:', e.strerror + print 'HTTP: url:', e.url + sys.exit(1) + if state_file is not None: self._save_state(state_file, comment.pk) From 7391b157e1581c1f4f46ac887e996b13e2d115b5 Mon Sep 17 00:00:00 2001 From: Tom Shortall Date: Sat, 24 Sep 2011 13:26:26 +0100 Subject: [PATCH 2/3] add log to file option and make dry run more useful add option to specify a log file to which all errors will be output --dry-run now will do full data processing but stop short of uploading data --- disqus/management/commands/disqus_export.py | 180 +++++++++++--------- 1 file changed, 99 insertions(+), 81 deletions(-) diff --git a/disqus/management/commands/disqus_export.py b/disqus/management/commands/disqus_export.py index a11ed68..59edd62 100644 --- a/disqus/management/commands/disqus_export.py +++ b/disqus/management/commands/disqus_export.py @@ -27,6 +27,9 @@ class Command(NoArgsCommand): make_option('-c', '--continue-on-error', action="store_true", dest="continue_on_error", help="If an error is encountered print a warning and continue. " + "Default behaviour is to quit."), + make_option('-l', '--log-file', action="store", dest="log_file", + help="Log file - if specified details of data errors " + + "will be stored here."), ) help = 'Export comments from contrib.comments to DISQUS' requires_model_validation = False @@ -67,6 +70,7 @@ def handle(self, **options): state_file = options.get('state_file') jump_store = bool(options.get('jump_store')) continue_on_error = bool(options.get('continue_on_error')) + log_file = options.get('log_file') last_exported_id = None if state_file is not None and os.path.exists(state_file): @@ -78,11 +82,9 @@ def handle(self, **options): comments_count = exp_comments.count() if verbosity >= 1: print "Exporting %d comment(s)" % comments_count - - # if this is a dry run, we output the comments and exit if dry_run: - print comments - return + print "DRY RUN" + # if no comments were found we also exit if not comments_count: return @@ -104,83 +106,99 @@ def handle(self, **options): user_api_key=settings.DISQUS_API_KEY, forum_id=forum['id']) - for i, comment in enumerate(exp_comments): - if verbosity >= 1: - print "Exporting comment %s/%s (%s%%)" % \ - (i+1, comments_count, round((float(i)/comments_count)*100, 2)), - - comment_str = None - try: - # Try to find a thread with the comments URL. - url_path = comment.content_object.get_absolute_url() - comment_str = str(comment).replace('\n', '').replace('\r', '') - except (AttributeError, ObjectDoesNotExist), e: - print - print 'ERROR: %s' % e - if comment_str is not None: - print ' Comment contents: "%s"' % (str(comment)[:30],) - if comment.content_object is not None: - model = comment.content_object.__class__ - else: - model = comment.content_object - print ' Comment pk: %s. Content object: %s (model: %s - pk: %s)' % \ - (comment.pk, comment.content_object, model, comment.object_pk,) - if continue_on_error: + with open(log_file, 'a') as log_file_fd: + for i, comment in enumerate(exp_comments): + if verbosity >= 1: + print "Exporting comment %s/%s (%s%%)" % \ + (i+1, comments_count, round((float(i)/comments_count)*100, 2)), + + comment_str = None + try: + # Try to find a thread with the comments URL. + url_path = comment.content_object.get_absolute_url() + comment_str = str(comment).replace('\n', '').replace('\r', '') + except (AttributeError, ObjectDoesNotExist), e: + output = [] + if verbosity >= 1: + print + line_1 = 'ERROR (comment pk: %s): %s' % (comment.pk, e) + print line_1 + output.append(line_1) + + if comment_str is not None: + output.append('Comment contents: "%s"' % (str(comment)[:30],)) + if comment.content_object is not None: + model = comment.content_object.__class__ + else: + model = comment.content_object + output.append(' Comment pk: %s. Content object: %s (model: %s - pk: %s)' % \ + (comment.pk, comment.content_object, model, comment.object_pk,)) + if verbosity >= 1: + # we earlier printed the first line to output regardless of verbosity + print '\n '.join(output[1:]) + if log_file: + log_file_fd.write('\n '.join(output)) + log_file_fd.write('\n') + if continue_on_error: + continue + else: + sys.exit(1) + + if verbosity >= 1: + print ": '%s'" % (comment_str,) + + if dry_run: + self._save_state(state_file, comment.pk) continue - else: + + url = 'http://%s%s' % ( + current_site.domain, + url_path) + thread = client.get_thread_by_url( + url=url, + forum_api_key=forum_api_key) + + # if no thread with the URL could be found, we create a new one. + # to do this, we first need to create the thread and then + # update the thread with a URL. + if not thread: + thread = client.thread_by_identifier( + forum_api_key=forum_api_key, + identifier=unicode(comment.content_object), + title=unicode(comment.content_object), + )['thread'] + client.update_thread( + forum_api_key=forum_api_key, + thread_id=thread['id'], + url=url) + + # name and email are optional in contrib.comments but required + # in DISQUS. If they are not set, dummy values will be used + try: + client.create_post( + forum_api_key=forum_api_key, + thread_id=thread['id'], + message=comment.comment.encode('utf-8'), + author_name=comment.userinfo.get('name', + 'nobody').encode('utf-8'), + author_email=comment.userinfo.get('email', + 'nobody@example.org'), + author_url=comment.userinfo.get('url', ''), + created_at=comment.submit_date.strftime('%Y-%m-%dT%H:%M')) + except urllib2.HTTPError, e: + if verbosity == 0: + sys.exit(1) + print e + print dir(e) + print 'HTTP: args: ', e.args + print 'HTTP: code:', e.code + print 'HTTP: errno:', e.errno + print 'HTTP: geturl:', e.geturl() + print 'HTTP: headers:', e.headers + print 'HTTP: msg:', e.msg + print 'HTTP: strerror:', e.strerror + print 'HTTP: url:', e.url sys.exit(1) - if verbosity >= 1: - print ": '%s'" % (comment_str,) - - url = 'http://%s%s' % ( - current_site.domain, - url_path) - thread = client.get_thread_by_url( - url=url, - forum_api_key=forum_api_key) - - # if no thread with the URL could be found, we create a new one. - # to do this, we first need to create the thread and then - # update the thread with a URL. - if not thread: - thread = client.thread_by_identifier( - forum_api_key=forum_api_key, - identifier=unicode(comment.content_object), - title=unicode(comment.content_object), - )['thread'] - client.update_thread( - forum_api_key=forum_api_key, - thread_id=thread['id'], - url=url) - - # name and email are optional in contrib.comments but required - # in DISQUS. If they are not set, dummy values will be used - try: - client.create_post( - forum_api_key=forum_api_key, - thread_id=thread['id'], - message=comment.comment.encode('utf-8'), - author_name=comment.userinfo.get('name', - 'nobody').encode('utf-8'), - author_email=comment.userinfo.get('email', - 'nobody@example.org'), - author_url=comment.userinfo.get('url', ''), - created_at=comment.submit_date.strftime('%Y-%m-%dT%H:%M')) - except urllib2.HTTPError, e: - if verbosity == 0: - sys.exit(1) - print e - print dir(e) - print 'HTTP: args: ', e.args - print 'HTTP: code:', e.code - print 'HTTP: errno:', e.errno - print 'HTTP: geturl:', e.geturl() - print 'HTTP: headers:', e.headers - print 'HTTP: msg:', e.msg - print 'HTTP: strerror:', e.strerror - print 'HTTP: url:', e.url - sys.exit(1) - - if state_file is not None: - self._save_state(state_file, comment.pk) + if state_file is not None: + self._save_state(state_file, comment.pk) From 4ec1ca517eee6a451f243a19dff372db0198807f Mon Sep 17 00:00:00 2001 From: Tom Shortall Date: Mon, 26 Sep 2011 11:47:46 +0100 Subject: [PATCH 3/3] fix bug when log file wasn't specified --- disqus/management/commands/disqus_export.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/disqus/management/commands/disqus_export.py b/disqus/management/commands/disqus_export.py index 59edd62..916effd 100644 --- a/disqus/management/commands/disqus_export.py +++ b/disqus/management/commands/disqus_export.py @@ -1,5 +1,5 @@ from optparse import make_option -import os.path +import os import sys import urllib2 @@ -71,6 +71,8 @@ def handle(self, **options): jump_store = bool(options.get('jump_store')) continue_on_error = bool(options.get('continue_on_error')) log_file = options.get('log_file') + if not log_file: + log_file = os.devnull last_exported_id = None if state_file is not None and os.path.exists(state_file):