Skip to content

Commit

Permalink
fix: address bradens comments
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielVZ96 committed Nov 28, 2024
1 parent de1d918 commit 73957b6
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 72 deletions.
190 changes: 118 additions & 72 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,65 @@

EXCLUDED_XBLOCK_TYPES = ['course', 'course_info']

INDEX_DISTINCT_ATTRIBUTE = "usage_key"
INDEX_FILTRABLE_ATTRIBUTES = [
# Get specific block/collection using combination of block_id and context_key
Fields.block_id,
Fields.block_type,
Fields.context_key,
Fields.usage_key,
Fields.org,
Fields.tags,
Fields.tags + "." + Fields.tags_taxonomy,
Fields.tags + "." + Fields.tags_level0,
Fields.tags + "." + Fields.tags_level1,
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
Fields.collections,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
Fields.type,
Fields.access_id,
Fields.last_published,
Fields.content + "." + Fields.problem_types,
]
INDEX_SEARCHABLE_ATTRIBUTES = [
# Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
Fields.display_name,
Fields.block_id,
Fields.content,
Fields.description,
Fields.tags,
Fields.collections,
# If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
# are searchable only if at least one document in the index has a value. If we didn't list them here and,
# say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
# these sub-fields: "Attribute `tags.level3` is not searchable."
Fields.tags + "." + Fields.tags_taxonomy,
Fields.tags + "." + Fields.tags_level0,
Fields.tags + "." + Fields.tags_level1,
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
Fields.published + "." + Fields.display_name,
Fields.published + "." + Fields.published_description,
]
INDEX_SORTABLE_ATTRIBUTES = [
Fields.display_name,
Fields.created,
Fields.modified,
Fields.last_published,
]
INDEX_RANKING_RULES = [
"sort",
"words",
"typo",
"proximity",
"attribute",
"exactness",
]


@contextmanager
def _index_rebuild_lock() -> Generator[str, None, None]:
Expand Down Expand Up @@ -217,6 +276,18 @@ def _using_temp_index(status_cb: Callable[[str], None] | None = None) -> Generat
_wait_for_meili_task(client.delete_index(temp_index_name))


def _index_is_empty(index_name: str) -> bool:
"""
Check if an index is empty
Args:
index_name (str): The name of the index to check
"""
client = _get_meilisearch_client()
index = client.get_index(index_name)
return index.get_stats().number_of_documents == 0


def _configure_index(index_name):
"""
Configure the index. The following index settings are best changed on an empty index.
Expand All @@ -228,78 +299,17 @@ def _configure_index(index_name):
client = _get_meilisearch_client()

# Mark usage_key as unique (it's not the primary key for the index, but nevertheless must be unique):
client.index(index_name).update_distinct_attribute(Fields.usage_key)
client.index(index_name).update_distinct_attribute(INDEX_DISTINCT_ATTRIBUTE)
# Mark which attributes can be used for filtering/faceted search:
client.index(index_name).update_filterable_attributes(
[
# Get specific block/collection using combination of block_id and context_key
Fields.block_id,
Fields.block_type,
Fields.context_key,
Fields.usage_key,
Fields.org,
Fields.tags,
Fields.tags + "." + Fields.tags_taxonomy,
Fields.tags + "." + Fields.tags_level0,
Fields.tags + "." + Fields.tags_level1,
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
Fields.collections,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
Fields.type,
Fields.access_id,
Fields.last_published,
Fields.content + "." + Fields.problem_types,
]
)
client.index(index_name).update_filterable_attributes(INDEX_FILTRABLE_ATTRIBUTES)
# Mark which attributes are used for keyword search, in order of importance:
client.index(index_name).update_searchable_attributes(
[
# Keyword search does _not_ search the course name, course ID, breadcrumbs, block type, or other fields.
Fields.display_name,
Fields.block_id,
Fields.content,
Fields.description,
Fields.tags,
Fields.collections,
# If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
# are searchable only if at least one document in the index has a value. If we didn't list them here and,
# say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
# these sub-fields: "Attribute `tags.level3` is not searchable."
Fields.tags + "." + Fields.tags_taxonomy,
Fields.tags + "." + Fields.tags_level0,
Fields.tags + "." + Fields.tags_level1,
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
Fields.collections + "." + Fields.collections_display_name,
Fields.collections + "." + Fields.collections_key,
Fields.published + "." + Fields.display_name,
Fields.published + "." + Fields.published_description,
]
)
client.index(index_name).update_searchable_attributes(INDEX_SEARCHABLE_ATTRIBUTES)
# Mark which attributes can be used for sorting search results:
client.index(index_name).update_sortable_attributes(
[
Fields.display_name,
Fields.created,
Fields.modified,
Fields.last_published,
]
)
client.index(index_name).update_sortable_attributes(INDEX_SORTABLE_ATTRIBUTES)

# Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance.
# cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy
client.index(index_name).update_ranking_rules(
[
"sort",
"words",
"typo",
"proximity",
"attribute",
"exactness",
]
)
client.index(index_name).update_ranking_rules(INDEX_RANKING_RULES)


def _recurse_children(block, fn, status_cb: Callable[[str], None] | None = None) -> None:
Expand Down Expand Up @@ -378,6 +388,32 @@ def reset_index(status_cb: Callable[[str], None] | None = None) -> None:
status_cb("Index reset complete.")


def _is_index_configured(index_name: str) -> bool:
"""
Check if an index is completely configured
Args:
index_name (str): The name of the index to check
"""
client = _get_meilisearch_client()
index = client.get_index(index_name)
settings = index.get_settings()
for k, v in (
("distinctAttribute", INDEX_DISTINCT_ATTRIBUTE),
("filterableAttributes", INDEX_FILTRABLE_ATTRIBUTES),
("searchableAttributes", INDEX_SEARCHABLE_ATTRIBUTES),
("sortableAttributes", INDEX_SORTABLE_ATTRIBUTES),
("rankingRules", INDEX_RANKING_RULES),
):
setting = settings.get(k, [])
if isinstance(v, list):
v = set(v)
setting = set(setting)
if setting != v:
return False
return True


def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable[[str], None] | None = None) -> None:
"""
Initialize the Meilisearch index, creating it and configuring it if it doesn't exist
Expand All @@ -388,10 +424,19 @@ def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable
warn_cb = log.warning

if _index_exists(STUDIO_INDEX_NAME):
warn_cb(
"A rebuild of the index is required. Please run ./manage.py cms reindex_studio"
" --experimental [--incremental]"
)
if _index_is_empty(STUDIO_INDEX_NAME):
warn_cb(
"The studio search index is empty. Please run ./manage.py cms reindex_studio"
" --experimental [--incremental]"
)
return
if not _is_index_configured(STUDIO_INDEX_NAME):
warn_cb(
"A rebuild of the index is required. Please run ./manage.py cms reindex_studio"
" --experimental [--incremental]"
)
return
status_cb("Index already exists and is configured.")
return

reset_index(status_cb)
Expand Down Expand Up @@ -424,8 +469,9 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
num_courses = CourseOverview.objects.count()

# Some counters so we can track our progress as indexing progresses:
num_contexts = num_courses + num_libraries
num_contexts_done = 0 # How many courses/libraries we've indexed
num_libs_skipped = len(keys_indexed)
num_contexts = num_courses + num_libraries + num_libs_skipped
num_contexts_done = 0 + num_libs_skipped # How many courses/libraries we've indexed
num_blocks_done = 0 # How many individual components/XBlocks we've indexed

status_cb(f"Found {num_courses} courses, {num_libraries} libraries.")
Expand Down
8 changes: 8 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,14 @@ def test_reset_meilisearch_index(self, mock_meilisearch):

@override_settings(MEILISEARCH_ENABLED=True)
def test_init_meilisearch_index(self, mock_meilisearch):
# Test index already exists
api.init_index()
mock_meilisearch.return_value.swap_indexes.assert_not_called()
mock_meilisearch.return_value.create_index.assert_not_called()
mock_meilisearch.return_value.delete_index.assert_not_called()

# Test index already exists and has no documents
mock_meilisearch.return_value.get_stats.return_value = 0
api.init_index()
mock_meilisearch.return_value.swap_indexes.assert_not_called()
mock_meilisearch.return_value.create_index.assert_not_called()
Expand Down

0 comments on commit 73957b6

Please sign in to comment.