-
Notifications
You must be signed in to change notification settings - Fork 3.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Store content object collections in search index [FC-0062] #35469
Changes from 37 commits
ed30ce2
3d64ea4
931f688
b2b38cb
a16b398
be39d03
22fa791
0bf0f78
45cf886
366a7e9
504aa4f
a2da207
aa3c1e3
6ae83ba
abb1eb1
0eb52cf
6f39767
bcc5cbb
0e32451
3c6617e
0c29552
e20ee24
9619341
1764e87
ae7926f
007f80a
197733b
436822e
d517d80
d312ff3
9a94316
66b2aa8
715527c
d5aeff8
e6b469d
4bc7c22
bd44874
bfd548b
0037530
8a380a7
eb78583
5bdcc9e
a81ea9a
360ec35
ab25e2c
2c6c8cf
79f83d7
8d9f738
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,9 @@ | |
from hashlib import blake2b | ||
|
||
from django.utils.text import slugify | ||
from django.core.exceptions import ObjectDoesNotExist | ||
from opaque_keys.edx.keys import LearningContextKey, UsageKey | ||
from openedx_learning.api import authoring as authoring_api | ||
|
||
from openedx.core.djangoapps.content.search.models import SearchAccess | ||
from openedx.core.djangoapps.content_libraries import api as lib_api | ||
|
@@ -52,6 +54,8 @@ class Fields: | |
tags_level1 = "level1" | ||
tags_level2 = "level2" | ||
tags_level3 = "level3" | ||
# List of collection.key strings this object belongs to. | ||
collections = "collections" | ||
# The "content" field is a dictionary of arbitrary data, depending on the block_type. | ||
# It comes from each XBlock's index_dictionary() method (if present) plus some processing. | ||
# Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on. | ||
|
@@ -223,6 +227,35 @@ def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict: | |
return {Fields.tags: result} | ||
|
||
|
||
def _collections_for_content_object(object_id: UsageKey | LearningContextKey) -> dict: | ||
""" | ||
Given an XBlock, course, library, etc., get the collections for its index doc. | ||
|
||
e.g. for something in Collections "COL_A" and "COL_B", this would return: | ||
{ | ||
"collections": ["COL_A", "COL_B"], | ||
} | ||
|
||
Returns an empty dict if the object is not in any collections. | ||
""" | ||
# Gather the collections associated with this object | ||
result = {} | ||
collections = [] | ||
try: | ||
component = lib_api.get_component_from_usage_key(object_id) | ||
collections = authoring_api.get_entity_collections( | ||
component.learning_package_id, | ||
component.key, | ||
).values_list("key", flat=True) | ||
except ObjectDoesNotExist: | ||
log.warning(f"No component found for {object_id}") | ||
|
||
if collections: | ||
result[Fields.collections] = list(collections) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the component is not in any collections, we must return an empty list to ensure the index is updated with this information. I created a small PR here: open-craft#685 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
return result | ||
|
||
|
||
def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetadata) -> dict: | ||
""" | ||
Generate a dictionary document suitable for ingestion into a search engine | ||
|
@@ -265,6 +298,19 @@ def searchable_doc_tags(usage_key: UsageKey) -> dict: | |
return doc | ||
|
||
|
||
def searchable_doc_collections(usage_key: UsageKey) -> dict: | ||
""" | ||
Generate a dictionary document suitable for ingestion into a search engine | ||
like Meilisearch or Elasticsearch, with the collections data for the given content object. | ||
""" | ||
doc = { | ||
Fields.id: meili_id_from_opaque_key(usage_key), | ||
} | ||
doc.update(_collections_for_content_object(usage_key)) | ||
|
||
return doc | ||
|
||
|
||
def searchable_doc_for_course_block(block) -> dict: | ||
""" | ||
Generate a dictionary document suitable for ingestion into a search engine | ||
|
@@ -288,7 +334,7 @@ def searchable_doc_for_collection(collection) -> dict: | |
found using faceted search. | ||
""" | ||
doc = { | ||
Fields.id: collection.id, | ||
Fields.id: collection.key, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we are better with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #35321 is merged, so we can update this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need the collection.key in order to call the REST APIs though -- I've re-added it in the internal PR: open-craft@5bdcc9e There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for catching this @pomegranited! |
||
Fields.type: DocType.collection, | ||
Fields.display_name: collection.title, | ||
Fields.description: collection.description, | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -6,7 +6,14 @@ | |||||||||||||||||
|
||||||||||||||||||
from django.db.models.signals import post_delete | ||||||||||||||||||
from django.dispatch import receiver | ||||||||||||||||||
from openedx_events.content_authoring.data import ContentLibraryData, ContentObjectData, LibraryBlockData, XBlockData | ||||||||||||||||||
from openedx_events.content_authoring.data import ( | ||||||||||||||||||
ContentLibraryData, | ||||||||||||||||||
ContentObjectChangedData, | ||||||||||||||||||
LibraryBlockData, | ||||||||||||||||||
XBlockData, | ||||||||||||||||||
) | ||||||||||||||||||
from opaque_keys import InvalidKeyError | ||||||||||||||||||
from opaque_keys.edx.keys import UsageKey | ||||||||||||||||||
from openedx_events.content_authoring.signals import ( | ||||||||||||||||||
CONTENT_LIBRARY_DELETED, | ||||||||||||||||||
CONTENT_LIBRARY_UPDATED, | ||||||||||||||||||
|
@@ -16,20 +23,19 @@ | |||||||||||||||||
XBLOCK_CREATED, | ||||||||||||||||||
XBLOCK_DELETED, | ||||||||||||||||||
XBLOCK_UPDATED, | ||||||||||||||||||
CONTENT_OBJECT_TAGS_CHANGED, | ||||||||||||||||||
CONTENT_OBJECT_ASSOCIATIONS_CHANGED, | ||||||||||||||||||
) | ||||||||||||||||||
from openedx.core.djangoapps.content_tagging.utils import get_content_key_from_string | ||||||||||||||||||
|
||||||||||||||||||
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview | ||||||||||||||||||
from openedx.core.djangoapps.content.search.models import SearchAccess | ||||||||||||||||||
|
||||||||||||||||||
from .api import only_if_meilisearch_enabled, upsert_block_tags_index_docs | ||||||||||||||||||
from .api import only_if_meilisearch_enabled, upsert_block_collections_index_docs, upsert_block_tags_index_docs | ||||||||||||||||||
from .tasks import ( | ||||||||||||||||||
delete_library_block_index_doc, | ||||||||||||||||||
delete_xblock_index_doc, | ||||||||||||||||||
update_content_library_index_docs, | ||||||||||||||||||
upsert_library_block_index_doc, | ||||||||||||||||||
upsert_xblock_index_doc | ||||||||||||||||||
upsert_xblock_index_doc, | ||||||||||||||||||
) | ||||||||||||||||||
|
||||||||||||||||||
log = logging.getLogger(__name__) | ||||||||||||||||||
|
@@ -145,22 +151,27 @@ def content_library_updated_handler(**kwargs) -> None: | |||||||||||||||||
update_content_library_index_docs.apply(args=[str(content_library_data.library_key)]) | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
@receiver(CONTENT_OBJECT_TAGS_CHANGED) | ||||||||||||||||||
@receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED) | ||||||||||||||||||
@only_if_meilisearch_enabled | ||||||||||||||||||
def content_object_tags_changed_handler(**kwargs) -> None: | ||||||||||||||||||
def content_object_associations_changed_handler(**kwargs) -> None: | ||||||||||||||||||
""" | ||||||||||||||||||
Update the tags data in the index for the Content Object | ||||||||||||||||||
Update the collections/tags data in the index for the Content Object | ||||||||||||||||||
""" | ||||||||||||||||||
content_object_tags = kwargs.get("content_object", None) | ||||||||||||||||||
if not content_object_tags or not isinstance(content_object_tags, ContentObjectData): | ||||||||||||||||||
content_object = kwargs.get("content_object", None) | ||||||||||||||||||
if not content_object or not isinstance(content_object, ContentObjectChangedData): | ||||||||||||||||||
log.error("Received null or incorrect data for event") | ||||||||||||||||||
return | ||||||||||||||||||
|
||||||||||||||||||
try: | ||||||||||||||||||
# Check if valid if course or library block | ||||||||||||||||||
get_content_key_from_string(content_object_tags.object_id) | ||||||||||||||||||
except ValueError: | ||||||||||||||||||
usage_key = UsageKey.from_string(str(content_object.object_id)) | ||||||||||||||||||
except InvalidKeyError: | ||||||||||||||||||
log.error("Received invalid content object id") | ||||||||||||||||||
return | ||||||||||||||||||
|
||||||||||||||||||
upsert_block_tags_index_docs(content_object_tags.object_id) | ||||||||||||||||||
# This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever. | ||||||||||||||||||
# So we allow a potential double "upsert" here. | ||||||||||||||||||
if "tags" in content_object.changes: | ||||||||||||||||||
upsert_block_tags_index_docs(usage_key) | ||||||||||||||||||
elif "collections" in content_object.changes: | ||||||||||||||||||
upsert_block_collections_index_docs(usage_key) | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: If we have empty
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See 360ec35 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@pomegranited Sorry for the late request (I overlooked this before), but what about changing the structure here?
That way, we can use the
display_name
as a searchable attribute and theslug/key
to a dev action (like a redirect).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure @rpenido , but I decided to store the collections data like we store the tags data, e.g.
cf a81ea9a