openedx · ChrisChV · Sep 13, 2024 · Aug 14, 2024 · Aug 22, 2024 · Aug 23, 2024
diff --git a/docs/hooks/events.rst b/docs/hooks/events.rst
@@ -233,17 +233,29 @@ Content Authoring Events
      - 2023-07-20
 
    * - `LIBRARY_BLOCK_CREATED <https://github.com/openedx/openedx-events/blob/c0eb4ba1a3d7d066d58e5c87920b8ccb0645f769/openedx_events/content_authoring/signals.py#L167>`_
-     - org.openedx.content_authoring.content_library.created.v1
+     - org.openedx.content_authoring.library_block.created.v1
      - 2023-07-20
 
    * - `LIBRARY_BLOCK_UPDATED <https://github.com/openedx/openedx-events/blob/c0eb4ba1a3d7d066d58e5c87920b8ccb0645f769/openedx_events/content_authoring/signals.py#L178>`_
-     - org.openedx.content_authoring.content_library.updated.v1
+     - org.openedx.content_authoring.library_block.updated.v1
      - 2023-07-20
 
    * - `LIBRARY_BLOCK_DELETED <https://github.com/openedx/openedx-events/blob/c0eb4ba1a3d7d066d58e5c87920b8ccb0645f769/openedx_events/content_authoring/signals.py#L189>`_
-     - org.openedx.content_authoring.content_library.deleted.v1
+     - org.openedx.content_authoring.library_block.deleted.v1
      - 2023-07-20
 
-   * - `CONTENT_OBJECT_TAGS_CHANGED <https://github.com/openedx/openedx-events/blob/c0eb4ba1a3d7d066d58e5c87920b8ccb0645f769/openedx_events/content_authoring/signals.py#L207>`_
-     - org.openedx.content_authoring.content.object.tags.changed.v1
-     - 2024-03-31
+   * - `LIBRARY_COLLECTION_CREATED <https://github.com/openedx/openedx-events/blob/main/openedx_events/content_authoring/signals.py#L219>`_
+     - org.openedx.content_authoring.content_library.collection.created.v1
+     - 2024-08-23
+
+   * - `LIBRARY_COLLECTION_UPDATED <https://github.com/openedx/openedx-events/blob/main/openedx_events/content_authoring/signals.py#L230>`_
+     - org.openedx.content_authoring.content_library.collection.updated.v1
+     - 2024-08-23
+
+   * - `LIBRARY_COLLECTION_DELETED <https://github.com/openedx/openedx-events/blob/main/openedx_events/content_authoring/signals.py#L241>`_
+     - org.openedx.content_authoring.content_library.collection.deleted.v1
+     - 2024-08-23
+
+   * - `CONTENT_OBJECT_ASSOCIATIONS_CHANGED <https://github.com/openedx/openedx-events/blob/eb17e03f075b272ad8a29e8435d6a514f8884131/openedx_events/content_authoring/signals.py#L205-L214>`_
+     - org.openedx.content_authoring.content.object.associations.changed.v1
+     - 2024-09-06
diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
@@ -34,6 +34,7 @@
     searchable_doc_for_course_block,
     searchable_doc_for_collection,
     searchable_doc_for_library_block,
+    searchable_doc_collections,
     searchable_doc_tags,
 )
 
@@ -296,16 +297,12 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
     status_cb("Counting courses...")
     num_courses = CourseOverview.objects.count()
 
-    # Get the list of collections
-    status_cb("Counting collections...")
-    num_collections = authoring_api.get_collections().count()
-
     # Some counters so we can track our progress as indexing progresses:
-    num_contexts = num_courses + num_libraries + num_collections
+    num_contexts = num_courses + num_libraries
     num_contexts_done = 0  # How many courses/libraries we've indexed
     num_blocks_done = 0  # How many individual components/XBlocks we've indexed
 
-    status_cb(f"Found {num_courses} courses, {num_libraries} libraries and {num_collections} collections.")
+    status_cb(f"Found {num_courses} courses, {num_libraries} libraries.")
     with _using_temp_index(status_cb) as temp_index_name:
         ############## Configure the index ##############
 
@@ -326,6 +323,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
             Fields.tags + "." + Fields.tags_level1,
             Fields.tags + "." + Fields.tags_level2,
             Fields.tags + "." + Fields.tags_level3,
+            Fields.collections,
             Fields.type,
             Fields.access_id,
             Fields.last_published,
@@ -339,6 +337,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
             Fields.content,
             Fields.tags,
             Fields.description,
+            Fields.collections,
             # If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
             # are searchable only if at least one document in the index has a value. If we didn't list them here and,
             # say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
@@ -379,6 +378,7 @@ def index_library(lib_key: str) -> list:
                     doc = {}
                     doc.update(searchable_doc_for_library_block(metadata))
                     doc.update(searchable_doc_tags(metadata.usage_key))
+                    doc.update(searchable_doc_collections(metadata.usage_key))
                     docs.append(doc)
                 except Exception as err:  # pylint: disable=broad-except
                     status_cb(f"Error indexing library component {component}: {err}")
@@ -390,10 +390,43 @@ def index_library(lib_key: str) -> list:
                     status_cb(f"Error indexing library {lib_key}: {err}")
             return docs
 
+        ############## Collections ##############
+        def index_collection_batch(batch, num_done) -> int:
+            docs = []
+            for collection in batch:
+                try:
+                    doc = searchable_doc_for_collection(collection)
+                    # Uncomment below line once collections are tagged.
+                    # doc.update(searchable_doc_tags(collection.id))
+                    docs.append(doc)
+                except Exception as err:  # pylint: disable=broad-except
+                    status_cb(f"Error indexing collection {collection}: {err}")
+                num_done += 1
+
+            if docs:
+                try:
+                    # Add docs in batch of 100 at once (usually faster than adding one at a time):
+                    _wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
+                except (TypeError, KeyError, MeilisearchError) as err:
+                    status_cb(f"Error indexing collection batch {p}: {err}")
+            return num_done
+
         for lib_key in lib_keys:
-            status_cb(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}")
+            status_cb(f"{num_contexts_done + 1}/{num_contexts}. Now indexing blocks in library {lib_key}")
             lib_docs = index_library(lib_key)
             num_blocks_done += len(lib_docs)
+
+            # To reduce memory usage on large instances, split up the Collections into pages of 100 collections:
+            library = lib_api.get_library(lib_key)
+            collections = authoring_api.get_collections(library.learning_package.id, enabled=True)
+            num_collections = collections.count()
+            num_collections_done = 0
+            status_cb(f"{num_collections_done + 1}/{num_collections}. Now indexing collections in library {lib_key}")
+            paginator = Paginator(collections, 100)
+            for p in paginator.page_range:
+                num_collections_done = index_collection_batch(paginator.page(p).object_list, num_collections_done)
+            status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}")
+
             num_contexts_done += 1
 
         ############## Courses ##############
@@ -430,39 +463,6 @@ def add_with_children(block):
                 num_contexts_done += 1
                 num_blocks_done += len(course_docs)
 
-        ############## Collections ##############
-        status_cb("Indexing collections...")
-
-        def index_collection_batch(batch, num_contexts_done) -> int:
-            docs = []
-            for collection in batch:
-                status_cb(
-                    f"{num_contexts_done + 1}/{num_contexts}. "
-                    f"Now indexing collection {collection.title} ({collection.id})"
-                )
-                try:
-                    doc = searchable_doc_for_collection(collection)
-                    # Uncomment below line once collections are tagged.
-                    # doc.update(searchable_doc_tags(collection.id))
-                    docs.append(doc)
-                except Exception as err:  # pylint: disable=broad-except
-                    status_cb(f"Error indexing collection {collection}: {err}")
-                finally:
-                    num_contexts_done += 1
-
-            if docs:
-                try:
-                    # Add docs in batch of 100 at once (usually faster than adding one at a time):
-                    _wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
-                except (TypeError, KeyError, MeilisearchError) as err:
-                    status_cb(f"Error indexing collection batch {p}: {err}")
-            return num_contexts_done
-
-        # To reduce memory usage on large instances, split up the Collections into pages of 100 collections:
-        paginator = Paginator(authoring_api.get_collections(enabled=True), 100)
-        for p in paginator.page_range:
-            num_contexts_done = index_collection_batch(paginator.page(p).object_list, num_contexts_done)
-
     status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses, collections and libraries.")
 
 
@@ -575,6 +575,15 @@ def upsert_block_tags_index_docs(usage_key: UsageKey):
     _update_index_docs([doc])
 
 
+def upsert_block_collections_index_docs(usage_key: UsageKey):
+    """
+    Updates the collections data in documents for the given Course/Library block
+    """
+    doc = {Fields.id: meili_id_from_opaque_key(usage_key)}
+    doc.update(searchable_doc_collections(usage_key))
+    _update_index_docs([doc])
+
+
 def _get_user_orgs(request: Request) -> list[str]:
     """
     Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole.

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
@@ -7,7 +7,9 @@
 from hashlib import blake2b
 
 from django.utils.text import slugify
+from django.core.exceptions import ObjectDoesNotExist
 from opaque_keys.edx.keys import LearningContextKey, UsageKey
+from openedx_learning.api import authoring as authoring_api
 
 from openedx.core.djangoapps.content.search.models import SearchAccess
 from openedx.core.djangoapps.content_libraries import api as lib_api
@@ -52,6 +54,8 @@ class Fields:
     tags_level1 = "level1"
     tags_level2 = "level2"
     tags_level3 = "level3"
+    # List of collection.key strings this object belongs to.
+    collections = "collections"
     # The "content" field is a dictionary of arbitrary data, depending on the block_type.
     # It comes from each XBlock's index_dictionary() method (if present) plus some processing.
     # Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on.
@@ -223,6 +227,35 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
     return {Fields.tags: result}
 
 
+def _collections_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
+    """
+    Given an XBlock, course, library, etc., get the collections for its index doc.
+
+    e.g. for something in Collections "COL_A" and "COL_B", this would return:
+        {
+            "collections": ["COL_A", "COL_B"],
-            "collections": ["COL_A", "COL_B"],
+            "collections": [
+                { "display_name": "Collection A", key: "COL_A" },
+                { "display_name": "Collection B", key: "COL_B" },
+            ],
-            "collections": ["COL_A", "COL_B"],
+            "collections": [
+                { "display_name": "Collection A", key: "COL_A" },
+                { "display_name": "Collection B", key: "COL_B" },
+            ],
+        }
+
+    Returns an empty dict if the object is not in any collections.
+    """
+    # Gather the collections associated with this object
+    result = {}
+    collections = []
+    try:
+        component = lib_api.get_component_from_usage_key(object_id)
+        collections = authoring_api.get_entity_collections(
+            component.learning_package_id,
+            component.key,
+        ).values_list("key", flat=True)
+    except ObjectDoesNotExist:
+        log.warning(f"No component found for {object_id}")
+
+    if collections:
+        result[Fields.collections] = list(collections)
+
+    return result
+
+
 def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine
@@ -265,6 +298,19 @@ def searchable_doc_tags(usage_key: UsageKey) -> dict:
     return doc
 
 
+def searchable_doc_collections(usage_key: UsageKey) -> dict:
+    """
+    Generate a dictionary document suitable for ingestion into a search engine
+    like Meilisearch or Elasticsearch, with the collections data for the given content object.
+    """
+    doc = {
+        Fields.id: meili_id_from_opaque_key(usage_key),
+    }
+    doc.update(_collections_for_content_object(usage_key))
+
+    return doc
+
+
 def searchable_doc_for_course_block(block) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine
@@ -288,7 +334,7 @@ def searchable_doc_for_collection(collection) -> dict:
     found using faceted search.
     """
     doc = {
-        Fields.id: collection.id,
+        Fields.id: collection.key,
         Fields.type: DocType.collection,
         Fields.display_name: collection.title,
         Fields.description: collection.description,

diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py
@@ -6,7 +6,14 @@
 
 from django.db.models.signals import post_delete
 from django.dispatch import receiver
-from openedx_events.content_authoring.data import ContentLibraryData, ContentObjectData, LibraryBlockData, XBlockData
+from openedx_events.content_authoring.data import (
+    ContentLibraryData,
+    ContentObjectChangedData,
+    LibraryBlockData,
+    XBlockData,
+)
+from opaque_keys import InvalidKeyError
+from opaque_keys.edx.keys import UsageKey
 from openedx_events.content_authoring.signals import (
     CONTENT_LIBRARY_DELETED,
     CONTENT_LIBRARY_UPDATED,
@@ -16,20 +23,19 @@
     XBLOCK_CREATED,
     XBLOCK_DELETED,
     XBLOCK_UPDATED,
-    CONTENT_OBJECT_TAGS_CHANGED,
+    CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
 )
-from openedx.core.djangoapps.content_tagging.utils import get_content_key_from_string
 
 from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
 from openedx.core.djangoapps.content.search.models import SearchAccess
 
-from .api import only_if_meilisearch_enabled, upsert_block_tags_index_docs
+from .api import only_if_meilisearch_enabled, upsert_block_collections_index_docs, upsert_block_tags_index_docs
 from .tasks import (
     delete_library_block_index_doc,
     delete_xblock_index_doc,
     update_content_library_index_docs,
     upsert_library_block_index_doc,
-    upsert_xblock_index_doc
+    upsert_xblock_index_doc,
 )
 
 log = logging.getLogger(__name__)
@@ -145,22 +151,27 @@ def content_library_updated_handler(**kwargs) -> None:
     update_content_library_index_docs.apply(args=[str(content_library_data.library_key)])
 
 
-@receiver(CONTENT_OBJECT_TAGS_CHANGED)
+@receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED)
 @only_if_meilisearch_enabled
-def content_object_tags_changed_handler(**kwargs) -> None:
+def content_object_associations_changed_handler(**kwargs) -> None:
     """
-    Update the tags data in the index for the Content Object
+    Update the collections/tags data in the index for the Content Object
     """
-    content_object_tags = kwargs.get("content_object", None)
-    if not content_object_tags or not isinstance(content_object_tags, ContentObjectData):
+    content_object = kwargs.get("content_object", None)
+    if not content_object or not isinstance(content_object, ContentObjectChangedData):
         log.error("Received null or incorrect data for event")
         return
 
     try:
         # Check if valid if course or library block
-        get_content_key_from_string(content_object_tags.object_id)
-    except ValueError:
+        usage_key = UsageKey.from_string(str(content_object.object_id))
+    except InvalidKeyError:
         log.error("Received invalid content object id")
         return
 
-    upsert_block_tags_index_docs(content_object_tags.object_id)
+    # This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever.
+    # So we allow a potential double "upsert" here.
+    if "tags" in content_object.changes:
+        upsert_block_tags_index_docs(usage_key)
+    elif "collections" in content_object.changes:
+        upsert_block_collections_index_docs(usage_key)
-    if "tags" in content_object.changes:
-        upsert_block_tags_index_docs(usage_key)
-    elif "collections" in content_object.changes:
-        upsert_block_collections_index_docs(usage_key)
+    if not content_object.changes or "tags" in content_object.changes:
+        upsert_block_tags_index_docs(usage_key)
+    if not content_object.changes or "collections" in content_object.changes:
+        upsert_block_collections_index_docs(usage_key)
-    if "tags" in content_object.changes:
-        upsert_block_tags_index_docs(usage_key)
-    elif "collections" in content_object.changes:
-        upsert_block_collections_index_docs(usage_key)
+    if not content_object.changes or "tags" in content_object.changes:
+        upsert_block_tags_index_docs(usage_key)
+    if not content_object.changes or "collections" in content_object.changes:
+        upsert_block_collections_index_docs(usage_key)