From d229afb7494df3f82800eb2a6a6fab8f9823b952 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chris=20Ch=C3=A1vez?= Date: Fri, 17 May 2024 14:21:11 -0500 Subject: [PATCH] fix: Issues when importing a large taxonomy (#189) * feat: Add elpased time to import taxonomy process * Refactor execute() of TagImportPlan() to avoid save logs in every action. This causes slow execution and memory overflow. --- openedx_learning/__init__.py | 2 +- .../core/tagging/import_export/actions.py | 12 ++---- .../core/tagging/import_export/api.py | 38 +++++++++++++++++-- .../core/tagging/import_export/import_plan.py | 8 +++- .../core/tagging/models/import_export.py | 15 +++++--- 5 files changed, 54 insertions(+), 21 deletions(-) diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index bfa3cbd9..ce311436 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -1,4 +1,4 @@ """ Open edX Learning ("Learning Core"). """ -__version__ = "0.9.3" +__version__ = "0.9.4" diff --git a/openedx_tagging/core/tagging/import_export/actions.py b/openedx_tagging/core/tagging/import_export/actions.py index 70061a1b..255c0e47 100644 --- a/openedx_tagging/core/tagging/import_export/actions.py +++ b/openedx_tagging/core/tagging/import_export/actions.py @@ -244,16 +244,13 @@ def execute(self) -> None: """ Creates a Tag """ - parent = None - if self.tag.parent_id: - parent = self.taxonomy.tag_set.get(external_id=self.tag.parent_id) - taxonomy_tag = Tag( + Tag.objects.create( taxonomy=self.taxonomy, - parent=parent, + parent=self.taxonomy.tag_set.get(external_id=self.tag.parent_id) + if self.tag.parent_id is not None else None, value=self.tag.value, external_id=self.tag.id, ) - taxonomy_tag.save() class UpdateParentTag(ImportAction): @@ -409,8 +406,7 @@ def execute(self) -> None: Delete a tag """ try: - taxonomy_tag = self._get_tag() - taxonomy_tag.delete() + self._get_tag().delete() except Tag.DoesNotExist: pass # The tag may be already cascade deleted if the parent tag was deleted diff --git a/openedx_tagging/core/tagging/import_export/api.py b/openedx_tagging/core/tagging/import_export/api.py index 94d628c5..6d4e9bf4 100644 --- a/openedx_tagging/core/tagging/import_export/api.py +++ b/openedx_tagging/core/tagging/import_export/api.py @@ -44,6 +44,7 @@ """ from __future__ import annotations +import time from typing import BinaryIO from django.utils.translation import gettext as _ @@ -77,6 +78,7 @@ def import_tags( Set `plan_only` to True to only generate the actions and not execute them. """ + global_start_time = time.time() _import_validations(taxonomy) # Checks that exists only one task import in progress at a time per taxonomy @@ -92,7 +94,10 @@ def import_tags( task = TagImportTask.create(taxonomy) try: + # Start of parsing + # Get the parser and parse the file + start_time = time.time() task.log_parser_start() parser = get_parser(parser_format) tags, errors = parser.parse_import(file) @@ -102,23 +107,48 @@ def import_tags( task.handle_parser_errors(errors) return False, task, None - task.log_parser_end() + end_time = time.time() + elapsed_time = end_time - start_time + elapsed_time = round(elapsed_time, 5) + task.log_parser_end(elapsed_time) + + # End of parsing + + # Start of generate actions + + start_time = time.time() - # Generate actions task.log_start_planning() tag_import_plan = TagImportPlan(taxonomy) tag_import_plan.generate_actions(tags, replace) - task.log_plan(tag_import_plan) + + end_time = time.time() + elapsed_time = end_time - start_time + elapsed_time = round(elapsed_time, 5) + task.log_plan(tag_import_plan, elapsed_time) + + # End of generate actions if tag_import_plan.errors: task.handle_plan_errors() return False, task, tag_import_plan if not plan_only: + # Start of execute + start_time = time.time() task.log_start_execute() tag_import_plan.execute(task) + end_time = time.time() + elapsed_time = end_time - start_time + elapsed_time = round(elapsed_time, 5) + task.log_end_execute(elapsed_time) + # End of execute + + global_end_time = time.time() + global_elapsed_time = global_end_time - global_start_time + global_elapsed_time = round(global_elapsed_time, 5) - task.end_success() + task.end_success(global_elapsed_time) return True, task, tag_import_plan except Exception as exception: diff --git a/openedx_tagging/core/tagging/import_export/import_plan.py b/openedx_tagging/core/tagging/import_export/import_plan.py index 7d0a1d10..6502c2c1 100644 --- a/openedx_tagging/core/tagging/import_export/import_plan.py +++ b/openedx_tagging/core/tagging/import_export/import_plan.py @@ -207,8 +207,12 @@ def execute(self, task: TagImportTask | None = None): if self.errors: return for action in self.actions: + # Avoid to save each log because it is slow and costs a lot in memory + # It is necessary to save at the end. if task: - task.add_log(f"#{action.index}: {str(action)} [Started]") + task.add_log(f"#{action.index}: {str(action)} [Started]", save=False) action.execute() if task: - task.add_log("Success") + task.add_log("Success", save=False) + if task: + task.save() diff --git a/openedx_tagging/core/tagging/models/import_export.py b/openedx_tagging/core/tagging/models/import_export.py index 6c3390c0..1c682cc0 100644 --- a/openedx_tagging/core/tagging/models/import_export.py +++ b/openedx_tagging/core/tagging/models/import_export.py @@ -90,11 +90,11 @@ def log_parser_start(self): """ self.add_log(_("Starting to load data from file")) - def log_parser_end(self): + def log_parser_end(self, elapsed_time): """ Logs the parser finished event. """ - self.add_log(_("Load data finished")) + self.add_log(_("Load data finished. Time elapsed: ") + str(elapsed_time) + _(" seconds")) def handle_parser_errors(self, errors): """ @@ -113,11 +113,11 @@ def log_start_planning(self): self.status = TagImportTaskState.PLANNING.value self.save() - def log_plan(self, plan): + def log_plan(self, plan, elapsed_time): """ Logs the task plan. """ - self.add_log(_("Plan finished")) + self.add_log(_("Plan finished. Time elapsed: ") + str(elapsed_time) + _(" seconds")) plan_str = plan.plan() self.log += f"\n{plan_str}\n" self.save() @@ -138,10 +138,13 @@ def log_start_execute(self): self.status = TagImportTaskState.EXECUTING.value self.save() - def end_success(self): + def log_end_execute(self, elapsed_time): + self.add_log(_("Execute actions finished. Time elapsed: ") + str(elapsed_time) + _(" seconds")) + + def end_success(self, elapsed_time): """ Completes task execution with a log message, and moves the task status to SUCCESS. """ - self.add_log(_("Execution finished"), save=False) + self.add_log(_("Execution finished. Total time elapsed: ") + str(elapsed_time) + _("seconds"), save=False) self.status = TagImportTaskState.SUCCESS.value self.save()