From d18ae34cba287047588929cff679fb7446efa168 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 10 Oct 2024 09:41:58 -0400 Subject: [PATCH 1/2] feat: new view & api calls to serve content library assets This commit adds a new view to serve static assets for content libraries, along with Content Library API calls to add, delete, and get metadata about these assets. These assets come from Learning Core and should ONLY BE ACCESSED FROM STUDIO. Users must have read access to the library in order to see an asset in that library. This also re-implments video transcript support for content libraries and re-enables some previously disabled tests around it. --- cms/envs/test.py | 10 ++ .../core/djangoapps/content_libraries/api.py | 126 ++++++++++++++++-- .../tests/test_content_libraries.py | 20 ++- .../tests/test_static_assets.py | 92 +++++++++++-- .../core/djangoapps/content_libraries/urls.py | 5 + .../djangoapps/content_libraries/views.py | 80 ++++++++++- xmodule/video_block/transcripts_utils.py | 73 +++++++++- 7 files changed, 373 insertions(+), 33 deletions(-) diff --git a/cms/envs/test.py b/cms/envs/test.py index 38b7c7817149..49db50608858 100644 --- a/cms/envs/test.py +++ b/cms/envs/test.py @@ -333,3 +333,13 @@ "SECRET": "***", "URL": "***", } + +############## openedx-learning (Learning Core) config ############## +OPENEDX_LEARNING = { + 'MEDIA': { + 'BACKEND': 'django.core.files.storage.InMemoryStorage', + 'OPTIONS': { + 'location': MEDIA_ROOT + "_private" + } + } +} diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py index f30d5f047747..4d09eb4549b7 100644 --- a/openedx/core/djangoapps/content_libraries/api.py +++ b/openedx/core/djangoapps/content_libraries/api.py @@ -56,6 +56,7 @@ import base64 import hashlib import logging +import mimetypes import attr import requests @@ -68,6 +69,7 @@ from django.db.models import Q, QuerySet from django.utils.translation import gettext as _ from edx_rest_api_client.client import OAuthAPIClient +from django.urls import reverse from lxml import etree from opaque_keys.edx.keys import BlockTypeKey, UsageKey, UsageKeyV2 from opaque_keys.edx.locator import ( @@ -96,7 +98,11 @@ from xblock.core import XBlock from xblock.exceptions import XBlockNotFoundError -from openedx.core.djangoapps.xblock.api import get_component_from_usage_key, xblock_type_display_name +from openedx.core.djangoapps.xblock.api import ( + get_component_from_usage_key, + get_xblock_app_config, + xblock_type_display_name, +) from openedx.core.lib.xblock_serializer.api import serialize_modulestore_block_for_learning_core from xmodule.modulestore.django import modulestore @@ -1018,18 +1024,48 @@ def get_library_block_static_asset_files(usage_key) -> list[LibraryXBlockStaticF Returns a list of LibraryXBlockStaticFile objects, sorted by path. - TODO: This is not yet implemented for Learning Core backed libraries. TODO: Should this be in the general XBlock API rather than the libraries API? """ - return [] + component = get_component_from_usage_key(usage_key) + component_version = component.versioning.draft + + # If there is no Draft version, then this was soft-deleted + if component_version is None: + return [] + + # cvc = the ComponentVersionContent through table + cvc_set = ( + component_version + .componentversioncontent_set + .filter(content__has_file=True) + .order_by('key') + .select_related('content') + ) + + site_root_url = get_xblock_app_config().get_site_root_url() + + return [ + LibraryXBlockStaticFile( + path=cvc.key, + size=cvc.content.size, + url=site_root_url + reverse( + 'content_libraries:library-assets', + kwargs={ + 'component_version_uuid': component_version.uuid, + 'asset_path': cvc.key, + } + ), + ) + for cvc in cvc_set + ] -def add_library_block_static_asset_file(usage_key, file_name, file_content) -> LibraryXBlockStaticFile: +def add_library_block_static_asset_file(usage_key, file_path, file_content, user=None) -> LibraryXBlockStaticFile: """ Upload a static asset file into the library, to be associated with the specified XBlock. Will silently overwrite an existing file of the same name. - file_name should be a name like "doc.pdf". It may optionally contain slashes + file_path should be a name like "doc.pdf". It may optionally contain slashes like 'en/doc.pdf' file_content should be a binary string. @@ -1041,10 +1077,67 @@ def add_library_block_static_asset_file(usage_key, file_name, file_content) -> L video_block = UsageKey.from_string("lb:VideoTeam:python-intro:video:1") add_library_block_static_asset_file(video_block, "subtitles-en.srt", subtitles.encode('utf-8')) """ - raise NotImplementedError("Static assets not yet implemented for Learning Core") + # File path validations copied over from v1 library logic. This can't really + # hurt us inside our system because we never use these paths in an actual + # file system–they're just string keys that point to hash-named data files + # in a common library (learning package) level directory. But it might + # become a security issue during import/export serialization. + if file_path != file_path.strip().strip('/'): + raise InvalidNameError("file_path cannot start/end with / or whitespace.") + if '//' in file_path or '..' in file_path: + raise InvalidNameError("Invalid sequence (// or ..) in file_path.") + + component = get_component_from_usage_key(usage_key) + + media_type_str, _encoding = mimetypes.guess_type(file_path) + # We use "application/octet-stream" as a generic fallback media type, per + # RFC 2046: https://datatracker.ietf.org/doc/html/rfc2046 + # TODO: This probably makes sense to push down to openedx-learning? + media_type_str = media_type_str or "application/octet-stream" + + now = datetime.now(tz=timezone.utc) + + with transaction.atomic(): + media_type = authoring_api.get_or_create_media_type(media_type_str) + content = authoring_api.get_or_create_file_content( + component.publishable_entity.learning_package.id, + media_type.id, + data=file_content, + created=now, + ) + component_version = authoring_api.create_next_component_version( + component.pk, + content_to_replace={file_path: content.id}, + created=now, + created_by=user.id if user else None, + ) + transaction.on_commit( + lambda: LIBRARY_BLOCK_UPDATED.send_event( + library_block=LibraryBlockData( + library_key=usage_key.context_key, + usage_key=usage_key, + ) + ) + ) + # Now figure out the URL for the newly created asset... + site_root_url = get_xblock_app_config().get_site_root_url() + local_path = reverse( + 'content_libraries:library-assets', + kwargs={ + 'component_version_uuid': component_version.uuid, + 'asset_path': file_path, + } + ) + + return LibraryXBlockStaticFile( + path=file_path, + url=site_root_url + local_path, + size=content.size, + ) -def delete_library_block_static_asset_file(usage_key, file_name): + +def delete_library_block_static_asset_file(usage_key, file_path, user=None): """ Delete a static asset file from the library. @@ -1054,7 +1147,24 @@ def delete_library_block_static_asset_file(usage_key, file_name): video_block = UsageKey.from_string("lb:VideoTeam:python-intro:video:1") delete_library_block_static_asset_file(video_block, "subtitles-en.srt") """ - raise NotImplementedError("Static assets not yet implemented for Learning Core") + component = get_component_from_usage_key(usage_key) + now = datetime.now(tz=timezone.utc) + + with transaction.atomic(): + component_version = authoring_api.create_next_component_version( + component.pk, + content_to_replace={file_path: None}, + created=now, + created_by=user.id if user else None, + ) + transaction.on_commit( + lambda: LIBRARY_BLOCK_UPDATED.send_event( + library_block=LibraryBlockData( + library_key=usage_key.context_key, + usage_key=usage_key, + ) + ) + ) def get_allowed_block_types(library_key): # pylint: disable=unused-argument diff --git a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py index 56e258f8a1f1..03b32e08ad36 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_content_libraries.py @@ -661,13 +661,13 @@ def test_library_permissions(self): # pylint: disable=too-many-statements self._get_library_block_olx(block3_key, expect_response=403) self._get_library_block_fields(block3_key, expect_response=403) self._get_library_block_assets(block3_key, expect_response=403) - self._get_library_block_asset(block3_key, file_name="whatever.png", expect_response=403) + self._get_library_block_asset(block3_key, file_name="static/whatever.png", expect_response=403) # Nor can they preview the block: self._render_block_view(block3_key, view_name="student_view", expect_response=403) # But if we grant allow_public_read, then they can: with self.as_user(admin): self._update_library(lib_id, allow_public_read=True) - # self._set_library_block_asset(block3_key, "whatever.png", b"data") + self._set_library_block_asset(block3_key, "static/whatever.png", b"data") with self.as_user(random_user): self._get_library_block_olx(block3_key) self._render_block_view(block3_key, view_name="student_view") @@ -680,7 +680,7 @@ def test_library_permissions(self): # pylint: disable=too-many-statements with self.as_user(user): self._set_library_block_olx(block3_key, "", expect_response=403) self._set_library_block_fields(block3_key, {"data": "", "metadata": {}}, expect_response=403) - # self._set_library_block_asset(block3_key, "test.txt", b"data", expect_response=403) + self._set_library_block_asset(block3_key, "static/test.txt", b"data", expect_response=403) self._delete_library_block(block3_key, expect_response=403) self._commit_library_changes(lib_id, expect_response=403) self._revert_library_changes(lib_id, expect_response=403) @@ -690,9 +690,9 @@ def test_library_permissions(self): # pylint: disable=too-many-statements olx = self._get_library_block_olx(block3_key) self._set_library_block_olx(block3_key, olx) self._set_library_block_fields(block3_key, {"data": olx, "metadata": {}}) - # self._get_library_block_assets(block3_key) - # self._set_library_block_asset(block3_key, "test.txt", b"data") - # self._get_library_block_asset(block3_key, file_name="test.txt") + self._get_library_block_assets(block3_key) + self._set_library_block_asset(block3_key, "static/test.txt", b"data") + self._get_library_block_asset(block3_key, file_name="static/test.txt") self._delete_library_block(block3_key) self._commit_library_changes(lib_id) self._revert_library_changes(lib_id) # This is a no-op after the commit, but should still have 200 response @@ -915,7 +915,6 @@ def test_library_block_olx_update_event(self): event_receiver.call_args.kwargs ) - @skip("We still need to re-implement static asset handling.") def test_library_block_add_asset_update_event(self): """ Check that LIBRARY_BLOCK_CREATED event is sent when a static asset is @@ -934,7 +933,7 @@ def test_library_block_add_asset_update_event(self): block = self._add_block_to_library(lib_id, "unit", "u1") block_id = block["id"] - self._set_library_block_asset(block_id, "test.txt", b"data") + self._set_library_block_asset(block_id, "static/test.txt", b"data") usage_key = LibraryUsageLocatorV2( lib_key=library_key, @@ -955,7 +954,6 @@ def test_library_block_add_asset_update_event(self): event_receiver.call_args.kwargs ) - @skip("We still need to re-implement static asset handling.") def test_library_block_del_asset_update_event(self): """ Check that LIBRARY_BLOCK_CREATED event is sent when a static asset is @@ -974,9 +972,9 @@ def test_library_block_del_asset_update_event(self): block = self._add_block_to_library(lib_id, "unit", "u1") block_id = block["id"] - self._set_library_block_asset(block_id, "test.txt", b"data") + self._set_library_block_asset(block_id, "static/test.txt", b"data") - self._delete_library_block_asset(block_id, 'text.txt') + self._delete_library_block_asset(block_id, 'static/text.txt') usage_key = LibraryUsageLocatorV2( lib_key=library_key, diff --git a/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py b/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py index 92ff4c1767d0..a5f69f94b174 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py @@ -1,11 +1,16 @@ """ Tests for static asset files in Learning-Core-based Content Libraries """ -from unittest import skip +from uuid import UUID +from opaque_keys.edx.keys import UsageKey + +from common.djangoapps.student.tests.factories import UserFactory from openedx.core.djangoapps.content_libraries.tests.base import ( ContentLibrariesRestApiTest, ) +from openedx.core.djangoapps.xblock.api import get_component_from_usage_key +from openedx.core.djangolib.testing.utils import skip_unless_cms # Binary data representing an SVG image file SVG_DATA = """ @@ -23,15 +28,10 @@ """ -@skip("Assets are being reimplemented in Learning Core. Disable until that's ready.") +@skip_unless_cms class ContentLibrariesStaticAssetsTest(ContentLibrariesRestApiTest): """ Tests for static asset files in Learning-Core-based Content Libraries - - WARNING: every test should have a unique library slug, because even though - the django/mysql database gets reset for each test case, the lookup between - library slug and bundle UUID does not because it's assumed to be immutable - and cached forever. """ def test_asset_filenames(self): @@ -79,7 +79,7 @@ def test_video_transcripts(self): /> """) # Upload the transcript file - self._set_library_block_asset(block_id, "3_yD_cEKoCk-en.srt", TRANSCRIPT_DATA) + self._set_library_block_asset(block_id, "static/3_yD_cEKoCk-en.srt", TRANSCRIPT_DATA) transcript_handler_url = self._get_block_handler_url(block_id, "transcript") @@ -108,3 +108,79 @@ def check_download(): self._commit_library_changes(library["id"]) check_sjson() check_download() + + +@skip_unless_cms +class ContentLibrariesComponentVersionAssetTest(ContentLibrariesRestApiTest): + """ + Tests for the view that actually delivers the Library asset in Studio. + """ + + def setUp(self): + super().setUp() + + library = self._create_library(slug="asset-lib2", title="Static Assets Test Library") + block = self._add_block_to_library(library["id"], "html", "html1") + self._set_library_block_asset(block["id"], "static/test.svg", SVG_DATA) + usage_key = UsageKey.from_string(block["id"]) + self.component = get_component_from_usage_key(usage_key) + self.draft_component_version = self.component.versioning.draft + + def test_good_responses(self): + get_response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/static/test.svg" + ) + assert get_response.status_code == 200 + content = b''.join(chunk for chunk in get_response.streaming_content) + assert content == SVG_DATA + + good_head_response = self.client.head( + f"/library_assets/{self.draft_component_version.uuid}/static/test.svg" + ) + assert good_head_response.headers == get_response.headers + + def test_missing(self): + """Test asset requests that should 404.""" + # Non-existent version... + wrong_version_uuid = UUID('11111111-1111-1111-1111-111111111111') + response = self.client.get( + f"/library_assets/{wrong_version_uuid}/static/test.svg" + ) + assert response.status_code == 404 + + # Non-existent file... + response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/static/missing.svg" + ) + assert response.status_code == 404 + + # File-like ComponenVersionContent entry that isn't an actually + # downloadable file... + response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/block.xml" + ) + assert response.status_code == 404 + + def test_anonymous_user(self): + """Anonymous users shouldn't get access to library assets.""" + self.client.logout() + response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/static/test.svg" + ) + assert response.status_code == 403 + + def test_unauthorized_user(self): + """User who is not a Content Library staff should not have access.""" + self.client.logout() + student = UserFactory.create( + username="student", + email="student@example.com", + password="student-pass", + is_staff=False, + is_superuser=False, + ) + self.client.login(username="student", password="student-pass") + get_response = self.client.get( + f"/library_assets/{self.draft_component_version.uuid}/static/test.svg" + ) + assert get_response.status_code == 403 diff --git a/openedx/core/djangoapps/content_libraries/urls.py b/openedx/core/djangoapps/content_libraries/urls.py index e77c1b34d277..b9dc05fabc84 100644 --- a/openedx/core/djangoapps/content_libraries/urls.py +++ b/openedx/core/djangoapps/content_libraries/urls.py @@ -75,4 +75,9 @@ path('pub/jwks/', views.LtiToolJwksView.as_view(), name='lti-pub-jwks'), ])), ])), + path( + 'library_assets//', + views.component_version_asset, + name='library-assets', + ), ] diff --git a/openedx/core/djangoapps/content_libraries/views.py b/openedx/core/djangoapps/content_libraries/views.py index 6e50559f38f6..a20ba06b23d7 100644 --- a/openedx/core/djangoapps/content_libraries/views.py +++ b/openedx/core/djangoapps/content_libraries/views.py @@ -71,14 +71,16 @@ from django.conf import settings from django.contrib.auth import authenticate, get_user_model, login from django.contrib.auth.models import Group +from django.core.exceptions import ObjectDoesNotExist from django.db.transaction import atomic, non_atomic_requests -from django.http import Http404, HttpResponseBadRequest, JsonResponse +from django.http import Http404, HttpResponse, HttpResponseBadRequest, JsonResponse, StreamingHttpResponse from django.shortcuts import get_object_or_404 from django.urls import reverse from django.utils.decorators import method_decorator from django.utils.translation import gettext as _ from django.views.decorators.clickjacking import xframe_options_exempt from django.views.decorators.csrf import csrf_exempt +from django.views.decorators.http import require_safe from django.views.generic.base import TemplateResponseMixin, View from pylti1p3.contrib.django import DjangoCacheDataStorage, DjangoDbToolConf, DjangoMessageLaunch, DjangoOIDCLogin from pylti1p3.exception import LtiException, OIDCException @@ -86,6 +88,7 @@ import edx_api_doc_tools as apidocs from opaque_keys import InvalidKeyError from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2 +from openedx_learning.api import authoring from organizations.api import ensure_organization from organizations.exceptions import InvalidOrganizationException from organizations.models import Organization @@ -792,7 +795,7 @@ def put(self, request, usage_key_str, file_path): raise ValidationError("File too big") file_content = file_wrapper.read() try: - result = api.add_library_block_static_asset_file(usage_key, file_path, file_content) + result = api.add_library_block_static_asset_file(usage_key, file_path, file_content, request.user) except ValueError: raise ValidationError("Invalid file path") # lint-amnesty, pylint: disable=raise-missing-from return Response(LibraryXBlockStaticFileSerializer(result).data) @@ -807,7 +810,7 @@ def delete(self, request, usage_key_str, file_path): usage_key.lib_key, request.user, permissions.CAN_EDIT_THIS_CONTENT_LIBRARY, ) try: - api.delete_library_block_static_asset_file(usage_key, file_path) + api.delete_library_block_static_asset_file(usage_key, file_path, request.user) except ValueError: raise ValidationError("Invalid file path") # lint-amnesty, pylint: disable=raise-missing-from return Response(status=status.HTTP_204_NO_CONTENT) @@ -1143,3 +1146,74 @@ def get(self, request): Return the JWKS. """ return JsonResponse(self.lti_tool_config.get_jwks(), safe=False) + + +@require_safe +def component_version_asset(request, component_version_uuid, asset_path): + """ + Serves static assets associated with particular Component versions. + + Important notes: + * This is meant for Studio/authoring use ONLY. It requires read access to + the content library. + * It uses the UUID because that's easier to parse than the key field (which + could be part of an OpaqueKey, but could also be almost anything else). + * This is not very performant, and we still want to use the X-Accel-Redirect + method for serving LMS traffic in the longer term (and probably Studio + eventually). + """ + try: + component_version = authoring.get_component_version_by_uuid( + component_version_uuid + ) + except ObjectDoesNotExist as exc: + raise Http404() from exc + + # Permissions check... + learning_package = component_version.component.learning_package + library_key = LibraryLocatorV2.from_string(learning_package.key) + api.require_permission_for_library_key( + library_key, request.user, permissions.CAN_VIEW_THIS_CONTENT_LIBRARY, + ) + + # We already have logic for getting the correct content and generating the + # proper headers in Learning Core, but the response generated here is an + # X-Accel-Redirect and lacks the actual content. We eventually want to use + # this response in conjunction with a media reverse proxy (Caddy or Nginx), + # but in the short term we're just going to remove the redirect and stream + # the content directly. + redirect_response = authoring.get_redirect_response_for_component_asset( + component_version_uuid, + asset_path, + public=False, + learner_downloadable_only=False, + ) + + # If there was any error, we return that response because it will have the + # correct headers set and won't have any X-Accel-Redirect header set. + if redirect_response.status_code != 200: + return redirect_response + + # If we got here, we know that the asset exists and it's okay to download. + cv_content = component_version.componentversioncontent_set.get(key=asset_path) + content = cv_content.content + + # Delete the re-direct part of the response headers. We'll copy the rest. + headers = redirect_response.headers + headers.pop('X-Accel-Redirect') + + # We need to set the content size header manually because this is a + # streaming response. It's not included in the redirect headers because it's + # not needed there (the reverse-proxy would have direct access to the file). + headers['Content-Length'] = content.size + + if request.method == "HEAD": + return HttpResponse(headers=headers) + + # Otherwise it's going to be a GET response. We don't support response + # offsets or anything fancy, because we don't expect to run this view at + # scale. + return StreamingHttpResponse( + content.read_file().chunks(), + headers=redirect_response.headers, + ) diff --git a/xmodule/video_block/transcripts_utils.py b/xmodule/video_block/transcripts_utils.py index 132b8cff1e14..18ea6152aa80 100644 --- a/xmodule/video_block/transcripts_utils.py +++ b/xmodule/video_block/transcripts_utils.py @@ -8,17 +8,20 @@ import html import logging import os +import pathlib import re from functools import wraps import requests import simplejson as json from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist from lxml import etree from opaque_keys.edx.keys import UsageKeyV2 from pysrt import SubRipFile, SubRipItem, SubRipTime from pysrt.srtexc import Error +from openedx.core.djangoapps.xblock.api import get_component_from_usage_key from xmodule.contentstore.content import StaticContent from xmodule.contentstore.django import contentstore from xmodule.exceptions import NotFoundError @@ -1041,6 +1044,8 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran """ Get video transcript from Learning Core. + Limitation: This is only going to grab from the Draft version. + HISTORIC INFORMATION FROM WHEN THIS FUNCTION WAS `get_transcript_from_blockstore`: Blockstore expects video transcripts to be placed into the 'static/' @@ -1072,9 +1077,71 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran Returns: tuple containing content, filename, mimetype """ - # TODO: Update to use Learning Core data models once static assets support - # has been added. - raise NotFoundError("No transcript - transcripts not supported yet by learning core components.") + usage_key = video_block.scope_ids.usage_id + + # Validate that the format is something we even support... + if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT): + raise NotFoundError(f'Invalid transcript format `{output_format}`') + + # See if the requested language exists. + transcripts = transcripts_info['transcripts'] + if language not in transcripts: + raise NotFoundError( + f"Video {usage_key} does not have a transcript file defined for the " + f"'{language}' language in its OLX." + ) + + # Grab the underlying Component. There's no version parameter to this call, + # so we're just going to grab the file associated with the latest draft + # version for now. + component = get_component_from_usage_key(usage_key) + component_version = component.versioning.draft + if not component_version: + raise NotFoundError( + f"No transcript for {usage_key} because Component {component.uuid} " + "was soft-deleted." + ) + + file_path = pathlib.Path(f"static/{transcripts[language]}") + if file_path.suffix != '.srt': + # We want to standardize on .srt + raise NotFoundError( + "Video XBlocks in Content Libraries only support storing .srt " + f"transcript files, but we tried to look up {file_path} for {usage_key}" + ) + + # TODO: There should be a Learning Core API call for this: + try: + content = ( + component_version + .componentversioncontent_set + .filter(content__has_file=True) + .select_related('content') + .get(key=file_path) + .content + ) + data = content.read_file().read() + except ObjectDoesNotExist as exc: + raise NotFoundError( + f"No file {file_path} found for {usage_key} " + f"(ComponentVersion {component_version.uuid})" + ) from exc + + # Now convert the transcript data to the requested format: + output_filename = f'{file_path.stem}.{output_format}' + output_transcript = Transcript.convert( + data.decode('utf-8'), + input_format=Transcript.SRT, + output_format=output_format, + ) + if not output_transcript.strip(): + raise NotFoundError( + f"Transcript file {file_path} found for {usage_key} " + f"(ComponentVersion {component_version.uuid}), but it has no " + "content or is malformed." + ) + + return output_transcript, output_filename, Transcript.mime_types[output_format] def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=None): From ec599b34895040979a822c63d0721cc8cf7b631b Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 15 Oct 2024 20:06:33 -0400 Subject: [PATCH 2/2] temp: fixups based on review feedback. will squash --- .../djangoapps/content_libraries/views.py | 2 +- xmodule/video_block/transcripts_utils.py | 42 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/openedx/core/djangoapps/content_libraries/views.py b/openedx/core/djangoapps/content_libraries/views.py index a20ba06b23d7..afae04f40394 100644 --- a/openedx/core/djangoapps/content_libraries/views.py +++ b/openedx/core/djangoapps/content_libraries/views.py @@ -1212,7 +1212,7 @@ def component_version_asset(request, component_version_uuid, asset_path): # Otherwise it's going to be a GET response. We don't support response # offsets or anything fancy, because we don't expect to run this view at - # scale. + # LMS-scale. return StreamingHttpResponse( content.read_file().chunks(), headers=redirect_response.headers, diff --git a/xmodule/video_block/transcripts_utils.py b/xmodule/video_block/transcripts_utils.py index 18ea6152aa80..866edf596812 100644 --- a/xmodule/video_block/transcripts_utils.py +++ b/xmodule/video_block/transcripts_utils.py @@ -1042,29 +1042,29 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts def get_transcript_from_learning_core(video_block, language, output_format, transcripts_info): """ - Get video transcript from Learning Core. + Get video transcript from Learning Core (used for Content Libraries) Limitation: This is only going to grab from the Draft version. - HISTORIC INFORMATION FROM WHEN THIS FUNCTION WAS `get_transcript_from_blockstore`: - - Blockstore expects video transcripts to be placed into the 'static/' - subfolder of the XBlock's folder in a Blockstore bundle. For example, if the - video XBlock's definition is in the standard location of - video/video1/definition.xml - Then the .srt files should be placed at e.g. - video/video1/static/video1-en.srt - This is the same place where other public static files are placed for other - XBlocks, such as image files used by HTML blocks. - - Video XBlocks in Blockstore must set the 'transcripts' XBlock field to a - JSON dictionary listing the filename of the transcript for each language: - + Learning Core models a VideoBlock's data in a more generic thing it calls a + Component. Each Component has its own virtual space for file-like data. The + OLX for the VideoBlock itself is stored at the root of that space, as + ``block.xml``. Static assets that are meant to be user-downloadable are + placed in a `static/` directory for that Component, and this is where we + expect to store transcript files. + + So if there is a ``video1-en.srt`` file for a particular VideoBlock, we + expect that to be stored as ``static/video1-en.srt`` in the Component. Any + other downloadable files would be here as well, such as thumbnails. + + Video XBlocks in Blockstore must set the 'transcripts' XBlock field to a + JSON dictionary listing the filename of the transcript for each language: + This method is tested in openedx/core/djangoapps/content_libraries/tests/test_static_assets.py @@ -1077,7 +1077,7 @@ def get_transcript_from_learning_core(video_block, language, output_format, tran Returns: tuple containing content, filename, mimetype """ - usage_key = video_block.scope_ids.usage_id + usage_key = video_block.usage_key # Validate that the format is something we even support... if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):