Skip to content

Commit

Permalink
Merge pull request #12944 from jredrejo/annotate_ordered_channels
Browse files Browse the repository at this point in the history
Annotate channels with some ordered metadata
  • Loading branch information
rtibbles authored Dec 17, 2024
2 parents 8421cf0 + c466ef2 commit 597ec1a
Show file tree
Hide file tree
Showing 8 changed files with 287 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from sqlalchemy import BigInteger
from sqlalchemy import Boolean
from sqlalchemy import CHAR
from sqlalchemy import CheckConstraint
from sqlalchemy import Column
from sqlalchemy import Float
from sqlalchemy import ForeignKey
from sqlalchemy import ForeignKeyConstraint
from sqlalchemy import Index
from sqlalchemy import Integer
from sqlalchemy import String
Expand Down Expand Up @@ -45,6 +47,23 @@ class ContentLocalfile(Base):
class ContentContentnode(Base):
__tablename__ = "content_contentnode"
__table_args__ = (
CheckConstraint("lft >= 0"),
CheckConstraint("tree_id >= 0"),
CheckConstraint("level >= 0"),
CheckConstraint("duration >= 0"),
CheckConstraint("rght >= 0"),
ForeignKeyConstraint(
["lang_id"],
["content_language.id"],
deferrable=True,
initially="DEFERRED",
),
ForeignKeyConstraint(
["parent_id"],
["content_contentnode.id"],
deferrable=True,
initially="DEFERRED",
),
Index(
"content_contentnode_level_channel_id_available_29f0bb18_idx",
"level",
Expand All @@ -69,11 +88,10 @@ class ContentContentnode(Base):
author = Column(String(200), nullable=False)
kind = Column(String(200), nullable=False)
available = Column(Boolean, nullable=False)
lft = Column(Integer, nullable=False, index=True)
rght = Column(Integer, nullable=False, index=True)
lft = Column(Integer, nullable=False)
tree_id = Column(Integer, nullable=False, index=True)
level = Column(Integer, nullable=False, index=True)
lang_id = Column(ForeignKey("content_language.id"), index=True)
level = Column(Integer, nullable=False)
lang_id = Column(String(14), index=True)
license_description = Column(Text)
license_name = Column(String(50))
coach_content = Column(Boolean, nullable=False)
Expand All @@ -94,7 +112,8 @@ class ContentContentnode(Base):
learning_activities_bitmask_0 = Column(BigInteger)
ancestors = Column(Text)
admin_imported = Column(Boolean)
parent_id = Column(ForeignKey("content_contentnode.id"), index=True)
rght = Column(Integer, nullable=False)
parent_id = Column(CHAR(32), index=True)

lang = relationship("ContentLanguage")
parent = relationship("ContentContentnode", remote_side=[id])
Expand All @@ -118,6 +137,13 @@ class ContentAssessmentmetadata(Base):

class ContentChannelmetadata(Base):
__tablename__ = "content_channelmetadata"
__table_args__ = (
CheckConstraint('"order" >= 0'),
ForeignKeyConstraint(
["root_id"],
["content_contentnode.id"],
),
)

id = Column(CHAR(32), primary_key=True)
name = Column(String(200), nullable=False)
Expand All @@ -127,13 +153,15 @@ class ContentChannelmetadata(Base):
thumbnail = Column(Text, nullable=False)
last_updated = Column(String)
min_schema_version = Column(String(50), nullable=False)
root_id = Column(ForeignKey("content_contentnode.id"), nullable=False, index=True)
root_id = Column(CHAR(32), nullable=False, index=True)
published_size = Column(BigInteger)
total_resource_count = Column(Integer)
order = Column(Integer)
public = Column(Boolean)
tagline = Column(String(150))
partial = Column(Boolean)
included_categories = Column(Text)
included_grade_levels = Column(Text)

root = relationship("ContentContentnode")

Expand Down Expand Up @@ -242,12 +270,21 @@ class ContentFile(Base):

class ContentChannelmetadataIncludedLanguages(Base):
__tablename__ = "content_channelmetadata_included_languages"
__table_args__ = (
Index(
"content_channelmetadata_included_languages_channelmetadata_id_language_id_51f20415_uniq",
"channelmetadata_id",
"language_id",
unique=True,
),
)

id = Column(Integer, primary_key=True)
channelmetadata_id = Column(
ForeignKey("content_channelmetadata.id"), nullable=False
ForeignKey("content_channelmetadata.id"), nullable=False, index=True
)
language_id = Column(ForeignKey("content_language.id"), nullable=False)
language_id = Column(ForeignKey("content_language.id"), nullable=False, index=True)
sort_value = Column(Integer, nullable=False)

channelmetadata = relationship("ContentChannelmetadata")
language = relationship("ContentLanguage")
25 changes: 25 additions & 0 deletions kolibri/core/content/management/commands/generate_schema.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import inspect
import io
import json
import os
Expand All @@ -10,6 +11,30 @@
from django.core.management import call_command
from django.core.management.base import BaseCommand
from django.db import connections

# Compatibility layer for Python 3.12+ where ArgSpec is removed
if not hasattr(inspect, "ArgSpec"):

class ArgSpec:
def __init__(self, args, varargs, keywords, defaults):
self.args = args
self.varargs = varargs
self.keywords = keywords
self.defaults = defaults

def getargspec(func):
spec = inspect.getfullargspec(func)
return ArgSpec(
args=spec.args,
varargs=spec.varargs,
keywords=spec.varkw,
defaults=spec.defaults,
)

inspect.ArgSpec = ArgSpec
inspect.getargspec = getargspec


from sqlacodegen.codegen import CodeGenerator
from sqlalchemy import create_engine
from sqlalchemy import MetaData
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Generated by Django 3.2.25 on 2024-12-13 17:17
import sortedm2m.fields
import sortedm2m.operations
from django.db import migrations
from django.db import models


class Migration(migrations.Migration):

dependencies = [
("content", "0038_alter_localfile_extension"),
]

operations = [
migrations.AddField(
model_name="channelmetadata",
name="included_categories",
field=models.TextField(blank=True, null=True),
),
migrations.AddField(
model_name="channelmetadata",
name="included_grade_levels",
field=models.TextField(blank=True, null=True),
),
sortedm2m.operations.AlterSortedManyToManyField(
model_name="channelmetadata",
name="included_languages",
field=sortedm2m.fields.SortedManyToManyField(
blank=True,
related_name="channels",
to="content.Language",
verbose_name="languages",
),
),
]
10 changes: 8 additions & 2 deletions kolibri/core/content/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from morango.models.fields import UUIDField
from mptt.managers import TreeManager
from mptt.querysets import TreeQuerySet
from sortedm2m.fields import SortedManyToManyField

from .utils import paths
from kolibri.core.auth.models import Facility
Expand Down Expand Up @@ -376,9 +377,14 @@ class ChannelMetadata(base_models.ChannelMetadata):
# precalculated fields during annotation/migration
published_size = models.BigIntegerField(default=0, null=True, blank=True)
total_resource_count = models.IntegerField(default=0, null=True, blank=True)
included_languages = models.ManyToManyField(
"Language", related_name="channels", verbose_name="languages", blank=True
included_languages = SortedManyToManyField(
Language,
related_name="channels",
verbose_name="languages",
blank=True,
)
included_categories = models.TextField(null=True, blank=True)
included_grade_levels = models.TextField(null=True, blank=True)
order = models.PositiveIntegerField(default=0, null=True, blank=True)
public = models.BooleanField(null=True)
# Has only a subset of this channel's metadata been imported?
Expand Down
115 changes: 115 additions & 0 deletions kolibri/core/content/test/test_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from kolibri.core.content.models import LocalFile
from kolibri.core.content.test.test_channel_upgrade import ChannelBuilder
from kolibri.core.content.utils.annotation import calculate_included_languages
from kolibri.core.content.utils.annotation import calculate_ordered_categories
from kolibri.core.content.utils.annotation import calculate_ordered_grade_levels
from kolibri.core.content.utils.annotation import calculate_published_size
from kolibri.core.content.utils.annotation import calculate_total_resource_count
from kolibri.core.content.utils.annotation import mark_local_files_as_available
Expand Down Expand Up @@ -962,6 +964,119 @@ def test_calculate_included_languages(self):
list(self.channel.included_languages.values_list("id", flat=True)), ["en"]
)

def test_calculate_ordered_categories(self):
# Test with no categories
calculate_ordered_categories(self.channel)
self.assertIsNone(self.channel.included_categories)

# Create nodes with different categories
ContentNode.objects.filter(id=self.node.id).update(categories="math,science")
ContentNode.objects.create(
title="test2",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
categories="math,history",
available=True,
)
node3 = ContentNode.objects.create(
title="test3",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
categories="math",
available=True,
)

# Test ordering by frequency
calculate_ordered_categories(self.channel)
self.assertEqual(self.channel.included_categories, "math,science,history")

# Test with unavailable node
node3.available = False
node3.save()
calculate_ordered_categories(self.channel)
self.assertEqual(self.channel.included_categories, "math,science,history")

def test_calculate_ordered_grade_levels(self):
# Test with no grade levels
calculate_ordered_grade_levels(self.channel)
self.assertIsNone(self.channel.included_grade_levels)

# Create nodes with different grade levels
ContentNode.objects.filter(id=self.node.id).update(grade_levels="1,2")
ContentNode.objects.create(
title="test2",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
grade_levels="2,3",
available=True,
)
node3 = ContentNode.objects.create(
title="test3",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
grade_levels="2",
available=True,
)

# Test ordering by frequency
calculate_ordered_grade_levels(self.channel)
self.assertEqual(self.channel.included_grade_levels, "2,1,3")

# Test with unavailable node
node3.available = False
node3.save()
calculate_ordered_grade_levels(self.channel)
self.assertEqual(self.channel.included_grade_levels, "2,1,3")

def test_calculate_included_languages_frequency(self):
# Create additional languages
Language.objects.create(id="es", lang_code="es")
Language.objects.create(id="fr", lang_code="fr")

# Create nodes with different languages
self.node.lang_id = "en"
self.node.save()
ContentNode.objects.create(
title="test2",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
lang_id="es",
available=True,
)
node3 = ContentNode.objects.create(
title="test3",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
lang_id="es",
available=True,
)
ContentNode.objects.create(
title="test4",
id=uuid.uuid4().hex,
content_id=uuid.uuid4().hex,
channel_id=self.node.channel_id,
lang_id="fr",
available=True,
)

# Test ordering by frequency
calculate_included_languages(self.channel)
languages = set(self.channel.included_languages.values_list("id", flat=True))
self.assertEqual(languages, {"en", "es", "fr"})

# Test with unavailable node
node3.available = False
node3.save()
calculate_included_languages(self.channel)
languages = set(self.channel.included_languages.values_list("id", flat=True))
self.assertEqual(languages, {"en", "es", "fr"})

def test_calculate_total_resources(self):
local_file = LocalFile.objects.create(
id=uuid.uuid4().hex, extension="mp4", available=True, file_size=10
Expand Down
15 changes: 15 additions & 0 deletions kolibri/core/content/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.models import ContentNode
from kolibri.core.content.tasks import enqueue_automatic_resource_import_if_needed
from kolibri.core.content.utils.annotation import calculate_included_languages
from kolibri.core.content.utils.annotation import calculate_ordered_categories
from kolibri.core.content.utils.annotation import calculate_ordered_grade_levels
from kolibri.core.content.utils.annotation import set_channel_ancestors
from kolibri.core.content.utils.annotation import set_content_visibility_from_disk
from kolibri.core.content.utils.channel_import import FutureSchemaError
Expand Down Expand Up @@ -343,3 +346,15 @@ def synchronize_content_requests_upgrade():
synchronize_content_requests(dataset_id, transfer_session=None)

enqueue_automatic_resource_import_if_needed()


@version_upgrade(old_version="<0.18.0")
def ordered_metadata_in_channels():
"""
Update the channel metadata to have grade_levels, categories,
and included languages ordered by occurrence in the channel resources
"""
for channel in ChannelMetadata.objects.all():
calculate_ordered_categories(channel)
calculate_ordered_grade_levels(channel)
calculate_included_languages(channel)
Loading

0 comments on commit 597ec1a

Please sign in to comment.