diff --git a/disqus/management/commands/disqus_export.py b/disqus/management/commands/disqus_export.py index f7f3f4a..916effd 100644 --- a/disqus/management/commands/disqus_export.py +++ b/disqus/management/commands/disqus_export.py @@ -1,9 +1,12 @@ from optparse import make_option -import os.path +import os +import sys +import urllib2 from django.conf import settings from django.contrib import comments from django.contrib.sites.models import Site +from django.core.exceptions import ObjectDoesNotExist from django.core.management.base import NoArgsCommand from django.utils import simplejson as json @@ -18,6 +21,15 @@ class Command(NoArgsCommand): make_option('-s', '--state-file', action="store", dest="state_file", help="Saves the state of the export in the given file " + "and auto-resumes from this file if possible."), + make_option('-j', '--jump-store', action="store_true", dest="jump_store", + help="If --state-file is specified then skip the first " + "comment in the state file. Used to get around corrupt comments."), + make_option('-c', '--continue-on-error', action="store_true", dest="continue_on_error", + help="If an error is encountered print a warning and continue. " + + "Default behaviour is to quit."), + make_option('-l', '--log-file', action="store", dest="log_file", + help="Log file - if specified details of data errors " + + "will be stored here."), ) help = 'Export comments from contrib.comments to DISQUS' requires_model_validation = False @@ -56,26 +68,31 @@ def handle(self, **options): verbosity = int(options.get('verbosity')) dry_run = bool(options.get('dry_run')) state_file = options.get('state_file') + jump_store = bool(options.get('jump_store')) + continue_on_error = bool(options.get('continue_on_error')) + log_file = options.get('log_file') + if not log_file: + log_file = os.devnull last_exported_id = None if state_file is not None and os.path.exists(state_file): last_exported_id = self._get_last_state(state_file) + if jump_store: + last_exported_id += 1 - comments = self._get_comments_to_export(last_exported_id) - comments_count = comments.count() + exp_comments = self._get_comments_to_export(last_exported_id) + comments_count = exp_comments.count() if verbosity >= 1: print "Exporting %d comment(s)" % comments_count - - # if this is a dry run, we output the comments and exit if dry_run: - print comments - return + print "DRY RUN" + # if no comments were found we also exit if not comments_count: return - # Get a list of all forums for an API key. Each API key can have - # multiple forums associated. This application only supports the one + # Get a list of all forums for an API key. Each API key can have + # multiple forums associated. This application only supports the one # set in the DISQUS_WEBSITE_SHORTNAME variable forum_list = client.get_forum_list(user_api_key=settings.DISQUS_API_KEY) try: @@ -91,43 +108,99 @@ def handle(self, **options): user_api_key=settings.DISQUS_API_KEY, forum_id=forum['id']) - for comment in comments: - if verbosity >= 1: - print "Exporting comment '%s'" % comment - - # Try to find a thread with the comments URL. - url = 'http://%s%s' % ( - current_site.domain, - comment.content_object.get_absolute_url()) - thread = client.get_thread_by_url( - url=url, - forum_api_key=forum_api_key) - - # if no thread with the URL could be found, we create a new one. - # to do this, we first need to create the thread and then - # update the thread with a URL. - if not thread: - thread = client.thread_by_identifier( - forum_api_key=forum_api_key, - identifier=unicode(comment.content_object), - title=unicode(comment.content_object), - )['thread'] - client.update_thread( - forum_api_key=forum_api_key, - thread_id=thread['id'], - url=url) - - # name and email are optional in contrib.comments but required - # in DISQUS. If they are not set, dummy values will be used - client.create_post( - forum_api_key=forum_api_key, - thread_id=thread['id'], - message=comment.comment.encode('utf-8'), - author_name=comment.userinfo.get('name', - 'nobody').encode('utf-8'), - author_email=comment.userinfo.get('email', - 'nobody@example.org'), - author_url=comment.userinfo.get('url', ''), - created_at=comment.submit_date.strftime('%Y-%m-%dT%H:%M')) - if state_file is not None: - self._save_state(state_file, comment.pk) + with open(log_file, 'a') as log_file_fd: + for i, comment in enumerate(exp_comments): + if verbosity >= 1: + print "Exporting comment %s/%s (%s%%)" % \ + (i+1, comments_count, round((float(i)/comments_count)*100, 2)), + + comment_str = None + try: + # Try to find a thread with the comments URL. + url_path = comment.content_object.get_absolute_url() + comment_str = str(comment).replace('\n', '').replace('\r', '') + except (AttributeError, ObjectDoesNotExist), e: + output = [] + if verbosity >= 1: + print + line_1 = 'ERROR (comment pk: %s): %s' % (comment.pk, e) + print line_1 + output.append(line_1) + + if comment_str is not None: + output.append('Comment contents: "%s"' % (str(comment)[:30],)) + if comment.content_object is not None: + model = comment.content_object.__class__ + else: + model = comment.content_object + output.append(' Comment pk: %s. Content object: %s (model: %s - pk: %s)' % \ + (comment.pk, comment.content_object, model, comment.object_pk,)) + if verbosity >= 1: + # we earlier printed the first line to output regardless of verbosity + print '\n '.join(output[1:]) + if log_file: + log_file_fd.write('\n '.join(output)) + log_file_fd.write('\n') + if continue_on_error: + continue + else: + sys.exit(1) + + if verbosity >= 1: + print ": '%s'" % (comment_str,) + + if dry_run: + self._save_state(state_file, comment.pk) + continue + + url = 'http://%s%s' % ( + current_site.domain, + url_path) + thread = client.get_thread_by_url( + url=url, + forum_api_key=forum_api_key) + + # if no thread with the URL could be found, we create a new one. + # to do this, we first need to create the thread and then + # update the thread with a URL. + if not thread: + thread = client.thread_by_identifier( + forum_api_key=forum_api_key, + identifier=unicode(comment.content_object), + title=unicode(comment.content_object), + )['thread'] + client.update_thread( + forum_api_key=forum_api_key, + thread_id=thread['id'], + url=url) + + # name and email are optional in contrib.comments but required + # in DISQUS. If they are not set, dummy values will be used + try: + client.create_post( + forum_api_key=forum_api_key, + thread_id=thread['id'], + message=comment.comment.encode('utf-8'), + author_name=comment.userinfo.get('name', + 'nobody').encode('utf-8'), + author_email=comment.userinfo.get('email', + 'nobody@example.org'), + author_url=comment.userinfo.get('url', ''), + created_at=comment.submit_date.strftime('%Y-%m-%dT%H:%M')) + except urllib2.HTTPError, e: + if verbosity == 0: + sys.exit(1) + print e + print dir(e) + print 'HTTP: args: ', e.args + print 'HTTP: code:', e.code + print 'HTTP: errno:', e.errno + print 'HTTP: geturl:', e.geturl() + print 'HTTP: headers:', e.headers + print 'HTTP: msg:', e.msg + print 'HTTP: strerror:', e.strerror + print 'HTTP: url:', e.url + sys.exit(1) + + if state_file is not None: + self._save_state(state_file, comment.pk)