feat: add features to support new content libraries

This commit adds a number of features needed to accomodate content libraries. These are all collected together because this work was done while iteratively making more and more of libraries functional, and it was difficult to properly separate out later. This bumps the version to 0.5.0. New publishing API functions: * Learning Packages * get_learning_package * get_learning_package_by_key, * update_learning_package * Publishable Entities * get_publishable_entity and * get_publishable_entity_by_key * Draft/Publishing * get_last_publish * get_all_drafts * get_entities_with_unpublished_changes * get_entities_with_unpublished_deletes * get_published_version * set_draft_version * soft_delete_draft * reset_drafts_to_published New Components API functions: * create_next_version * get_component_by_key * component_exists_by_key * get_components PublishableEntityMixin improvements: * New property methods: latest, has_unpublished_changes * Existing methods were improved to use model-cached values. Encoding fixes: * collations for multiple fields were changed to be case insensitive in order to better support simple text searches. Model optimizations: * created the WithRelationsManager to more easily define querysets that involve adding select_related fields. This was used to elininate a number of n+1 queries that came up during libraries development. * PublishableEntityVersion.version_num was reduced to a 4-byte PositiveIntegerField instead of using the 8-byte default for openedx_learning apps (it's assumed we will never make more than 2 billion versions of a particular PublishableEntity). This was done to help reduce the size of indexes. Typing: * type annotations were added to many functions and test functions.
openedx · Jan 31, 2024 · 2ec2918 · 2ec2918
1 parent 61a5902
commit 2ec2918
Show file tree

Hide file tree

Showing 20 changed files with 1,315 additions and 156 deletions.
diff --git a/.gitignore b/.gitignore
@@ -69,3 +69,6 @@ venv/
 
 # Media files (for uploads)
 media/
+
+# Media files generated during test runs
+test_media/
diff --git a/olx_importer/management/commands/load_components.py b/olx_importer/management/commands/load_components.py
@@ -13,7 +13,7 @@
 has really happened between what's currently stored/published for a particular
 item and the new value we want to set? For RawContent that's easy, because we
 have actual hashes of the data. But it's not clear how that would work for
-something like an ComponentVersion. We'd have to have some kind of mechanism where every 
+something like an ComponentVersion. We'd have to have some kind of mechanism where every
 pp that wants to attach data gets to answer the question of "has anything
 changed?" in order to decide if we really make a new ComponentVersion or not.
 """
@@ -35,7 +35,7 @@
 SUPPORTED_TYPES = ["problem", "video", "html"]
 logger = logging.getLogger(__name__)
 
- 
+
 class Command(BaseCommand):
     help = "Load sample Component data from course export"
 
@@ -95,7 +95,7 @@ def load_course_data(self, learning_package_key):
                 self.import_block_type(block_type, now) #, publish_log_entry)
 
             publishing_api.publish_all_drafts(
-                self.learning_package.id, 
+                self.learning_package.id,
                 message="Initial Import from load_components script"
             )
 
@@ -117,7 +117,7 @@ def create_content(self, static_local_path, now, component_version):
             return  # Might as well bail if we can't find the file.
 
         raw_content, _created = contents_api.get_or_create_raw_content(
-            learning_package_id=self.learning_package.id,
+            self.learning_package.id,
             data_bytes=data_bytes,
             mime_type=mime_type,
             created=now,
@@ -153,10 +153,10 @@ def import_block_type(self, block_type, now): # , publish_log_entry):
                 logger.error(f"Parse error for {xml_file_path}: {err}")
                 components_skipped += 1
                 continue
-            
+
             display_name = block_root.attrib.get("display_name", "")
             _component, component_version = components_api.create_component_and_version(
-                learning_package_id=self.learning_package.id,
+                self.learning_package.id,
                 namespace=namespace,
                 type=block_type,
                 local_key=local_key,
@@ -168,7 +168,7 @@ def import_block_type(self, block_type, now): # , publish_log_entry):
             # Create the RawContent entry for the raw data...
             data_bytes = xml_file_path.read_bytes()
             text_content, _created = contents_api.get_or_create_text_content_from_bytes(
-                learning_package_id=self.learning_package.id,
+                self.learning_package.id,
                 data_bytes=data_bytes,
                 mime_type=f"application/vnd.openedx.xblock.v1.{block_type}+xml",
                 created=now,

diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py
@@ -1,4 +1,4 @@
 """
 Open edX Learning ("Learning Core").
 """
-__version__ = "0.4.4"
+__version__ = "0.5.0"
diff --git a/openedx_learning/core/components/api.py b/openedx_learning/core/components/api.py
@@ -15,27 +15,28 @@
 from datetime import datetime
 from pathlib import Path
 
-from django.db.models import Q
+from django.db.models import Q, QuerySet
 from django.db.transaction import atomic
 
-from ..publishing.api import create_publishable_entity, create_publishable_entity_version
+from ..publishing import api as publishing_api
 from .models import Component, ComponentVersion, ComponentVersionRawContent
 
 
 def create_component(
     learning_package_id: int,
+    /,
     namespace: str,
     type: str,  # pylint: disable=redefined-builtin
     local_key: str,
     created: datetime,
     created_by: int | None,
-):
+) -> Component:
     """
     Create a new Component (an entity like a Problem or Video)
     """
     key = f"{namespace}:{type}@{local_key}"
     with atomic():
-        publishable_entity = create_publishable_entity(
+        publishable_entity = publishing_api.create_publishable_entity(
             learning_package_id, key, created, created_by
         )
         component = Component.objects.create(
@@ -50,6 +51,7 @@ def create_component(
 
 def create_component_version(
     component_pk: int,
+    /,
     version_num: int,
     title: str,
     created: datetime,
@@ -59,8 +61,8 @@ def create_component_version(
     Create a new ComponentVersion
     """
     with atomic():
-        publishable_entity_version = create_publishable_entity_version(
-            entity_id=component_pk,
+        publishable_entity_version = publishing_api.create_publishable_entity_version(
+            component_pk,
             version_num=version_num,
             title=title,
             created=created,
@@ -73,15 +75,100 @@ def create_component_version(
     return component_version
 
 
+def create_next_version(
+    component_pk: int,
+    /,
+    title: str,
+    content_to_replace: dict[str, int | None],
+    created: datetime,
+    created_by: int | None = None,
+) -> ComponentVersion:
+    """
+    Create a new ComponentVersion based on the most recent version.
+
+    A very common pattern for making a new ComponentVersion is going to be "make
+    it just like the last version, except changing these one or two things".
+    Before calling this, you should create any new contents via the contents
+    API, since ``content_to_replace`` needs RawContent IDs for the values.
+
+    The ``content_to_replace`` dict is a mapping of strings representing the
+    local path/key for a file, to ``RawContent.id`` values. Using a `None` for
+    a value in this dict means to delete that key in the next version.
+
+    It is okay to mark entries for deletion that don't exist. For instance, if a
+    version has ``a.txt`` and ``b.txt``, sending a ``content_to_replace`` value
+    of ``{"a.txt": None, "c.txt": None}`` will remove ``a.txt`` from the next
+    version, leave ``b.txt`` alone, and will not error–even though there is no
+    ``c.txt`` in the previous version. This is to make it a little more
+    convenient to remove paths (e.g. due to deprecation) without having to
+    always check for its existence first.
+
+    TODO: Have to add learning_downloadable info to this when it comes time to
+          support static asset download.
+    """
+    # This needs to grab the highest version_num for this Publishable Entity.
+    # This will often be the Draft version, but not always. For instance, if
+    # an entity was soft-deleted, the draft would be None, but the version_num
+    # should pick up from the last edited version. Likewise, a Draft might get
+    # reverted to an earlier version, but we want the latest version_num when
+    # creating the next version.
+    component = Component.objects.get(pk=component_pk)
+    last_version = component.versioning.latest
+    if last_version is None:
+        next_version_num = 1
+    else:
+        next_version_num = last_version.version_num + 1
+
+    with atomic():
+        publishable_entity_version = publishing_api.create_publishable_entity_version(
+            component_pk,
+            version_num=next_version_num,
+            title=title,
+            created=created,
+            created_by=created_by,
+        )
+        component_version = ComponentVersion.objects.create(
+            publishable_entity_version=publishable_entity_version,
+            component_id=component_pk,
+        )
+        # First copy the new stuff over...
+        for key, raw_content_pk in content_to_replace.items():
+            # If the raw_content_pk is None, it means we want to remove the
+            # content represented by our key from the next version. Otherwise,
+            # we add our key->raw_content_pk mapping to the next version.
+            if raw_content_pk is not None:
+                ComponentVersionRawContent.objects.create(
+                    raw_content_id=raw_content_pk,
+                    component_version=component_version,
+                    key=key,
+                    learner_downloadable=False,
+                )
+        # Now copy any old associations that existed, as long as they aren't
+        # in conflict with the new stuff or marked for deletion.
+        last_version_content_mapping = ComponentVersionRawContent.objects \
+                                                                 .filter(component_version=last_version)
+        for cvrc in last_version_content_mapping:
+            if cvrc.key not in content_to_replace:
+                ComponentVersionRawContent.objects.create(
+                    raw_content_id=cvrc.raw_content_id,
+                    component_version=component_version,
+                    key=cvrc.key,
+                    learner_downloadable=cvrc.learner_downloadable,
+                )
+
+        return component_version
+
+
 def create_component_and_version(
     learning_package_id: int,
+    /,
     namespace: str,
     type: str,  # pylint: disable=redefined-builtin
     local_key: str,
     title: str,
     created: datetime,
-    created_by: int | None,
-):
+    created_by: int | None = None,
+) -> tuple[Component, ComponentVersion]:
     """
     Create a Component and associated ComponentVersion atomically
     """
@@ -99,8 +186,98 @@ def create_component_and_version(
         return (component, component_version)
 
 
-def get_component_by_pk(component_pk: int) -> Component:
-    return Component.objects.get(pk=component_pk)
+def get_component(component_pk: int, /) -> Component:
+    """
+    Get Component by its primary key.
+
+    This is the same as the PublishableEntity's ID primary key.
+    """
+    return Component.with_publishing_relations.get(pk=component_pk)
+
+
+def get_component_by_key(
+    learning_package_id: int,
+    /,
+    namespace: str,
+    type: str,  # pylint: disable=redefined-builtin
+    local_key: str,
+) -> Component:
+    """
+    Get a Component by its unique (namespace, type, local_key) tuple.
+    """
+    return Component.with_publishing_relations \
+                    .get(
+                        learning_package_id=learning_package_id,
+                        namespace=namespace,
+                        type=type,
+                        local_key=local_key,
+                    )
+
+
+def component_exists_by_key(
+    learning_package_id: int,
+    /,
+    namespace: str,
+    type: str,  # pylint: disable=redefined-builtin
+    local_key: str
+) -> bool:
+    """
+    Return True/False for whether a Component exists.
+
+    Note that a Component still exists even if it's been soft-deleted (there's
+    no current Draft version for it), or if it's been unpublished.
+    """
+    try:
+        _component = Component.objects.only('pk').get(
+            learning_package_id=learning_package_id,
+            namespace=namespace,
+            type=type,
+            local_key=local_key,
+        )
+        return True
+    except Component.DoesNotExist:
+        return False
+
+
+def get_components(
+    learning_package_id: int,
+    /,
+    draft: bool | None = None,
+    published: bool | None = None,
+    namespace: str | None = None,
+    types: list[str] | None = None,
+    draft_title: str | None = None,
+    published_title: str | None = None,
+) -> QuerySet[Component]:
+    """
+    Fetch a QuerySet of Components for a LearningPackage using various filters.
+
+    This method will pre-load all the relations that we need in order to get
+    info from the Component's draft and published versions, since we'll be
+    referencing these a lot.
+    """
+    qset = Component.with_publishing_relations \
+                    .filter(learning_package_id=learning_package_id) \
+                    .order_by('pk')
+
+    if draft is not None:
+        qset = qset.filter(publishable_entity__draft__version__isnull=not draft)
+    if published is not None:
+        qset = qset.filter(publishable_entity__published__version__isnull=not published)
+    if namespace is not None:
+        qset = qset.filter(namespace=namespace)
+    if types is not None:
+        qset = qset.filter(type__in=types)
+    if draft_title is not None:
+        qset = qset.filter(
+            publishable_entity__draft__version__title__icontains=draft_title
+        )
+    if published_title is not None:
+        qset = qset.filter(
+            publishable_entity__published__version__title__icontains=published_title
+        )
+
+    return qset
 
 
 def get_component_version_content(
@@ -115,22 +292,26 @@ def get_component_version_content(
     Can raise a django.core.exceptions.ObjectDoesNotExist error if there is no
     matching ComponentVersionRawContent.
     """
-    return ComponentVersionRawContent.objects.select_related(
-        "raw_content",
-        "raw_content__media_type",
-        "component_version",
-        "component_version__component",
-        "component_version__component__learning_package",
-    ).get(
+    queries = (
         Q(component_version__component__learning_package__key=learning_package_key)
         & Q(component_version__component__publishable_entity__key=component_key)
         & Q(component_version__publishable_entity_version__version_num=version_num)
         & Q(key=key)
     )
+    return ComponentVersionRawContent.objects \
+                                     .select_related(
+                                         "raw_content",
+                                         "raw_content__media_type",
+                                         "raw_content__textcontent",
+                                         "component_version",
+                                         "component_version__component",
+                                         "component_version__component__learning_package",
+                                     ).get(queries)
 
 
 def add_content_to_component_version(
-    component_version: ComponentVersion,
+    component_version_id: int,
+    /,
     raw_content_id: int,
     key: str,
     learner_downloadable=False,
@@ -139,7 +320,7 @@ def add_content_to_component_version(
     Add a RawContent to the given ComponentVersion
     """
     cvrc, _created = ComponentVersionRawContent.objects.get_or_create(
-        component_version=component_version,
+        component_version_id=component_version_id,
         raw_content_id=raw_content_id,
         key=key,
         learner_downloadable=learner_downloadable,

diff --git a/openedx_learning/core/components/migrations/0001_initial.py b/openedx_learning/core/components/migrations/0001_initial.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.23 on 2023-12-04 00:41
+# Generated by Django 3.2.23 on 2024-01-22 00:38
 
 import uuid
 
@@ -13,8 +13,8 @@ class Migration(migrations.Migration):
     initial = True
 
     dependencies = [
-        ('oel_publishing', '0002_alter_fk_on_delete'),
         ('oel_contents', '0001_initial'),
+        ('oel_publishing', '0001_initial'),
     ]
 
     operations = [