From 39ce52466ff0bca886d599d5fe43a2260d489f72 Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Tue, 31 Mar 2026 20:37:39 -0400 Subject: [PATCH 01/10] temp: ignore .claude directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ea7591535..828320592 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.claude *.py[cod] __pycache__ .pytest_cache From c0a0fbc9261e2d29d7cbacb92bb2c01b1a5f177a Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Mon, 30 Mar 2026 10:27:55 -0400 Subject: [PATCH 02/10] feat!: Collection.key -> Collection.collection_code Also, standardize internal usage of collection_key to collection_code. This helps clarify that Collection.key is *not* an OpaqueKey, but is rather a local slug, which can be combined with other identifiers to form a fully- qualified LibraryCollectionKey instance. BREAKING CHANGE: Collection.key has been renamed to Collection.collection_code. BREAKING CHANGE: Collection.collection_code now validates that its contents matches '[A-Za-z0-9\-\_\.]+'. This was already effectively true, because LibraryCollectionKey can only be built with slug-like parts, but we now we explicitly raise ValiationError from create_collection. Backup now writes both 'key' and 'collection_code' to collection TOML files, so older software (which only knows 'key') can still read new archives. Restore accepts either field, preferring 'collection_code' and falling back to the legacy 'key'. Co-Authored-By: Claude Sonnet 4.6 Part of: https://github.com/openedx/openedx-core/issues/322 --- .../applets/backup_restore/serializers.py | 16 ++- .../applets/backup_restore/toml.py | 5 +- .../applets/backup_restore/zipper.py | 9 +- .../applets/collections/admin.py | 6 +- .../applets/collections/api.py | 42 +++--- .../applets/collections/models.py | 21 +-- src/openedx_content/applets/components/api.py | 4 +- .../applets/components/models.py | 14 +- .../applets/components/readme.rst | 4 +- src/openedx_content/applets/units/models.py | 1 + ...rename_collection_key_to_code_in_python.py | 28 ++++ ...008_rename_collection_key_to_code_in_db.py | 20 +++ ...09_add_collection_code_regex_validation.py | 23 ++++ src/openedx_django_lib/fields.py | 26 ++++ src/openedx_tagging/models/base.py | 2 +- .../applets/backup_restore/test_backup.py | 15 ++- .../applets/backup_restore/test_restore.py | 49 ++++++- .../applets/collections/test_api.py | 120 +++++++++++------- .../applets/components/test_api.py | 6 +- 19 files changed, 306 insertions(+), 105 deletions(-) create mode 100644 src/openedx_content/migrations/0007_rename_collection_key_to_code_in_python.py create mode 100644 src/openedx_content/migrations/0008_rename_collection_key_to_code_in_db.py create mode 100644 src/openedx_content/migrations/0009_add_collection_code_regex_validation.py diff --git a/src/openedx_content/applets/backup_restore/serializers.py b/src/openedx_content/applets/backup_restore/serializers.py index d8f7a5c15..69abbdfe2 100644 --- a/src/openedx_content/applets/backup_restore/serializers.py +++ b/src/openedx_content/applets/backup_restore/serializers.py @@ -156,10 +156,24 @@ class CollectionSerializer(serializers.Serializer): # pylint: disable=abstract- Serializer for collections. """ title = serializers.CharField(required=True) - key = serializers.CharField(required=True) + # 'collection_code' is the current field name; 'key' is the old name kept for + # back-compat with archives written before the rename. At least one must be present. + collection_code = serializers.CharField(required=False) + key = serializers.CharField(required=False) description = serializers.CharField(required=True, allow_blank=True) entities = serializers.ListField( child=serializers.CharField(), required=True, allow_empty=True, ) + + def validate(self, attrs): + # Prefer 'collection_code'; fall back to legacy 'key'. Always remove + # both so only the normalised 'collection_code' key reaches the caller. + code = attrs.pop("collection_code", None) + legacy_key = attrs.pop("key", None) + code = code or legacy_key + if not code: + raise serializers.ValidationError("Either 'collection_code' or 'key' is required.") + attrs["collection_code"] = code + return attrs diff --git a/src/openedx_content/applets/backup_restore/toml.py b/src/openedx_content/applets/backup_restore/toml.py index d39861803..4470834c6 100644 --- a/src/openedx_content/applets/backup_restore/toml.py +++ b/src/openedx_content/applets/backup_restore/toml.py @@ -220,7 +220,10 @@ def toml_collection(collection: Collection, entity_keys: list[str]) -> str: collection_table = tomlkit.table() collection_table.add("title", collection.title) - collection_table.add("key", collection.key) + # Write both names so that older software (which reads 'key') stays compatible + # with archives produced after the Collection.key -> Collection.collection_code rename. + collection_table.add("key", collection.collection_code) + collection_table.add("collection_code", collection.collection_code) collection_table.add("description", collection.description) collection_table.add("created", collection.created) collection_table.add("entities", entities_array) diff --git a/src/openedx_content/applets/backup_restore/zipper.py b/src/openedx_content/applets/backup_restore/zipper.py index 5f7b2457e..82d478757 100644 --- a/src/openedx_content/applets/backup_restore/zipper.py +++ b/src/openedx_content/applets/backup_restore/zipper.py @@ -401,7 +401,7 @@ def create_zip(self, path: str) -> None: collections = self.get_collections() for collection in collections: - collection_hash_slug = self.get_entity_toml_filename(collection.key) + collection_hash_slug = self.get_entity_toml_filename(collection.collection_code) collection_toml_file_path = collections_folder / f"{collection_hash_slug}.toml" entity_keys_related = collection.entities.order_by("key").values_list("key", flat=True) self.add_file_to_zip( @@ -720,7 +720,10 @@ def _extract_collections( if entity_key not in self.all_publishable_entities_keys: self.errors.append({ "file": file, - "errors": f"Entity key {entity_key} not found for collection {collection_validated.get('key')}" + "errors": ( + f"Entity key {entity_key} not found for collection " + + collection_validated.get('collection_code') + ) }) results["collections"].append(collection_validated) @@ -779,7 +782,7 @@ def _save_collections(self, learning_package, collections): ) collection = collections_api.add_to_collection( learning_package_id=learning_package.id, - key=collection.key, + collection_code=collection.collection_code, entities_qset=publishing_api.get_publishable_entities(learning_package.id).filter(key__in=entities) ) diff --git a/src/openedx_content/applets/collections/admin.py b/src/openedx_content/applets/collections/admin.py index eb0685a27..41db9e082 100644 --- a/src/openedx_content/applets/collections/admin.py +++ b/src/openedx_content/applets/collections/admin.py @@ -13,14 +13,14 @@ class CollectionAdmin(admin.ModelAdmin): Allows users to easily disable/enable (aka soft delete and restore) or bulk delete Collections. """ - readonly_fields = ["key", "learning_package"] + readonly_fields = ["collection_code", "learning_package"] list_filter = ["enabled"] - list_display = ["key", "title", "enabled", "modified"] + list_display = ["collection_code", "title", "enabled", "modified"] fieldsets = [ ( "", { - "fields": ["key", "learning_package"], + "fields": ["collection_code", "learning_package"], } ), ( diff --git a/src/openedx_content/applets/collections/api.py b/src/openedx_content/applets/collections/api.py index b57b3bea8..ef9dae340 100644 --- a/src/openedx_content/applets/collections/api.py +++ b/src/openedx_content/applets/collections/api.py @@ -34,7 +34,7 @@ def create_collection( learning_package_id: LearningPackage.ID, - key: str, + collection_code: str, *, title: str, created_by: int | None, @@ -44,35 +44,37 @@ def create_collection( """ Create a new Collection """ - collection = Collection.objects.create( + collection = Collection( learning_package_id=learning_package_id, - key=key, + collection_code=collection_code, title=title, created_by_id=created_by, description=description, enabled=enabled, ) + collection.full_clean() + collection.save() return collection -def get_collection(learning_package_id: LearningPackage.ID, collection_key: str) -> Collection: +def get_collection(learning_package_id: LearningPackage.ID, collection_code: str) -> Collection: """ Get a Collection by ID """ - return Collection.objects.get_by_key(learning_package_id, collection_key) + return Collection.objects.get_by_code(learning_package_id, collection_code) def update_collection( learning_package_id: LearningPackage.ID, - key: str, + collection_code: str, *, title: str | None = None, description: str | None = None, ) -> Collection: """ - Update a Collection identified by the learning_package_id + key. + Update a Collection identified by the learning_package_id + collection_code. """ - collection = get_collection(learning_package_id, key) + collection = get_collection(learning_package_id, collection_code) # If no changes were requested, there's nothing to update, so just return # the Collection as-is @@ -90,17 +92,17 @@ def update_collection( def delete_collection( learning_package_id: LearningPackage.ID, - key: str, + collection_code: str, *, hard_delete=False, ) -> Collection: """ - Disables or deletes a collection identified by the given learning_package + key. + Disables or deletes a collection identified by the given learning_package + collection_code. By default (hard_delete=False), the collection is "soft deleted", i.e disabled. Soft-deleted collections can be re-enabled using restore_collection. """ - collection = get_collection(learning_package_id, key) + collection = get_collection(learning_package_id, collection_code) if hard_delete: collection.delete() @@ -112,12 +114,12 @@ def delete_collection( def restore_collection( learning_package_id: LearningPackage.ID, - key: str, + collection_code: str, ) -> Collection: """ Undo a "soft delete" by re-enabling a Collection. """ - collection = get_collection(learning_package_id, key) + collection = get_collection(learning_package_id, collection_code) collection.enabled = True collection.save() @@ -126,7 +128,7 @@ def restore_collection( def add_to_collection( learning_package_id: LearningPackage.ID, - key: str, + collection_code: str, entities_qset: QuerySet[PublishableEntity], created_by: int | None = None, ) -> Collection: @@ -146,10 +148,10 @@ def add_to_collection( if invalid_entity: raise ValidationError( f"Cannot add entity {invalid_entity.id} in learning package {invalid_entity.learning_package_id} " - f"to collection {key} in learning package {learning_package_id}." + f"to collection {collection_code} in learning package {learning_package_id}." ) - collection = get_collection(learning_package_id, key) + collection = get_collection(learning_package_id, collection_code) collection.entities.add( *entities_qset.all(), through_defaults={"created_by_id": created_by}, @@ -162,7 +164,7 @@ def add_to_collection( def remove_from_collection( learning_package_id: LearningPackage.ID, - key: str, + collection_code: str, entities_qset: QuerySet[PublishableEntity], ) -> Collection: """ @@ -174,7 +176,7 @@ def remove_from_collection( Returns the updated Collection. """ - collection = get_collection(learning_package_id, key) + collection = get_collection(learning_package_id, collection_code) collection.entities.remove(*entities_qset.all()) collection.modified = datetime.now(tz=timezone.utc) @@ -198,7 +200,7 @@ def get_entity_collections(learning_package_id: LearningPackage.ID, entity_key: def get_collection_entities( learning_package_id: LearningPackage.ID, - collection_key: str, + collection_code: str, ) -> QuerySet[PublishableEntity]: """ Returns a QuerySet of PublishableEntities in a Collection. @@ -207,7 +209,7 @@ def get_collection_entities( """ return PublishableEntity.objects.filter( learning_package_id=learning_package_id, - collections__key=collection_key, + collections__collection_code=collection_code, ).order_by("pk") diff --git a/src/openedx_content/applets/collections/models.py b/src/openedx_content/applets/collections/models.py index 2f315b247..3258ccc9c 100644 --- a/src/openedx_content/applets/collections/models.py +++ b/src/openedx_content/applets/collections/models.py @@ -70,7 +70,7 @@ from django.db import models from django.utils.translation import gettext_lazy as _ -from openedx_django_lib.fields import MultiCollationTextField, case_insensitive_char_field, key_field +from openedx_django_lib.fields import MultiCollationTextField, case_insensitive_char_field, code_field from openedx_django_lib.validators import validate_utc_datetime from ..publishing.models import LearningPackage, PublishableEntity @@ -85,12 +85,12 @@ class CollectionManager(models.Manager): """ Custom manager for Collection class. """ - def get_by_key(self, learning_package_id: int, key: str): + def get_by_code(self, learning_package_id: int, collection_code: str): """ - Get the Collection for the given Learning Package + key. + Get the Collection for the given Learning Package + collection code. """ return self.select_related('learning_package') \ - .get(learning_package_id=learning_package_id, key=key) + .get(learning_package_id=learning_package_id, collection_code=collection_code) class Collection(models.Model): @@ -105,10 +105,11 @@ class Collection(models.Model): learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) # Every collection is uniquely and permanently identified within its learning package - # by a 'key' that is set during creation. Both will appear in the + # by a 'code' that is set during creation. Both will appear in the # collection's opaque key: - # e.g. "lib-collection:lib:key" is the opaque key for a library collection. - key = key_field(db_column='_key') + # e.g. "lib-collection:{org_code}:{library_code}:{collection_code}" + # is the opaque key for a library collection. + collection_code = code_field() title = case_insensitive_char_field( null=False, @@ -170,11 +171,11 @@ class Collection(models.Model): class Meta: verbose_name_plural = "Collections" constraints = [ - # Keys are unique within a given LearningPackage. + # Collection codes are unique within a given LearningPackage. models.UniqueConstraint( fields=[ "learning_package", - "key", + "collection_code", ], name="oel_coll_uniq_lp_key", ), @@ -196,7 +197,7 @@ def __str__(self) -> str: """ User-facing string representation of a Collection. """ - return f"<{self.__class__.__name__}> (lp:{self.learning_package_id} {self.key}:{self.title})" + return f"<{self.__class__.__name__}> (lp:{self.learning_package_id} {self.collection_code}:{self.title})" class CollectionPublishableEntity(models.Model): diff --git a/src/openedx_content/applets/components/api.py b/src/openedx_content/applets/components/api.py index 6127cd3ec..1c3ed3e40 100644 --- a/src/openedx_content/applets/components/api.py +++ b/src/openedx_content/applets/components/api.py @@ -436,7 +436,7 @@ def get_components( # pylint: disable=too-many-positional-arguments def get_collection_components( learning_package_id: LearningPackage.ID, - collection_key: str, + collection_code: str, ) -> QuerySet[Component]: """ Returns a QuerySet of Components relating to the PublishableEntities in a Collection. @@ -445,7 +445,7 @@ def get_collection_components( """ return Component.objects.filter( learning_package_id=learning_package_id, - publishable_entity__collections__key=collection_key, + publishable_entity__collections__collection_code=collection_code, ).order_by('pk') diff --git a/src/openedx_content/applets/components/models.py b/src/openedx_content/applets/components/models.py index 6fbab2ba3..8f4b24171 100644 --- a/src/openedx_content/applets/components/models.py +++ b/src/openedx_content/applets/components/models.py @@ -1,5 +1,5 @@ """ -The model hierarchy is Component -> ComponentVersion -> Media. +The model hierarchy is Component -> ComponentVersion -> Content. A Component is an entity like a Problem or Video. It has enough information to identify the Component and determine what the handler should be (e.g. XBlock @@ -10,7 +10,7 @@ publishing app. Component maps 1:1 to PublishableEntity and ComponentVersion maps 1:1 to PublishableEntityVersion. -Multiple pieces of Media may be associated with a ComponentVersion, through +Multiple pieces of Content may be associated with a ComponentVersion, through the ComponentVersionMedia model. ComponentVersionMedia allows to specify a ComponentVersion-local identifier. We're using this like a file path by convention, but it's possible we might want to have special identifiers later. @@ -97,7 +97,7 @@ class Component(PublishableEntityMixin): Problem), but little beyond that. A Component will have many ComponentVersions over time, and most metadata is - associated with the ComponentVersion model and the Media that + associated with the ComponentVersion model and the Content that ComponentVersions are associated with. A Component belongs to exactly one LearningPackage. @@ -224,7 +224,7 @@ class ComponentVersion(PublishableEntityVersionMixin): """ A particular version of a Component. - This holds the media using a M:M relationship with Media via + This holds the media using a M:M relationship with Content via ComponentVersionMedia. """ @@ -250,18 +250,18 @@ class Meta: class ComponentVersionMedia(models.Model): """ - Determines the Media for a given ComponentVersion. + Determines the Content for a given ComponentVersion. An ComponentVersion may be associated with multiple pieces of binary data. For instance, a Video ComponentVersion might be associated with multiple transcripts in different languages. - When Media is associated with an ComponentVersion, it has some local + When Content is associated with an ComponentVersion, it has some local key that is unique within the the context of that ComponentVersion. This allows the ComponentVersion to do things like store an image file and reference it by a "path" key. - Media is immutable and sharable across multiple ComponentVersions. + Content is immutable and sharable across multiple ComponentVersions. """ component_version = models.ForeignKey(ComponentVersion, on_delete=models.CASCADE) diff --git a/src/openedx_content/applets/components/readme.rst b/src/openedx_content/applets/components/readme.rst index eb97d22f2..b79134bc6 100644 --- a/src/openedx_content/applets/components/readme.rst +++ b/src/openedx_content/applets/components/readme.rst @@ -1,7 +1,7 @@ Components App ============== -The ``components`` app holds the versioned data models for the lowest-level pieces of content that can be stored in Open edX: Components (e.g. XBlocks), as well as the individual pieces of raw data Media that they reference. +The ``components`` app holds the versioned data models for the lowest-level pieces of content that can be stored in Open edX: Components (e.g. XBlocks), as well as the individual pieces of raw data content that they reference. Motivation ---------- @@ -18,6 +18,6 @@ Architecture Guidelines * We're keeping nearly unlimited history, so per-version metadata (i.e. the space/time cost of making a new version) must be kept low. * Do not assume that all Components will be XBlocks. -* Encourage other apps to make models that join to (and add their own metadata to) Component, ComponentVersion, Media, etc. But it should be done in such a way that this app is not aware of them. +* Encourage other apps to make models that join to (and add their own metadata to) Component, ComponentVersion, Content, etc. But it should be done in such a way that this app is not aware of them. * Always preserve the most raw version of the data possible, e.g. OLX, even if XBlocks then extend that with more sophisticated data models. At some point those XBlocks will get deprecated/removed, and we will still want to be able to export the raw data. * Exports should be fast and *not* require the invocation of plugin code. \ No newline at end of file diff --git a/src/openedx_content/applets/units/models.py b/src/openedx_content/applets/units/models.py index 913ea4890..109fb38b9 100644 --- a/src/openedx_content/applets/units/models.py +++ b/src/openedx_content/applets/units/models.py @@ -1,6 +1,7 @@ """ Models that implement units """ +from __future__ import annotations from typing import NewType, cast, override diff --git a/src/openedx_content/migrations/0007_rename_collection_key_to_code_in_python.py b/src/openedx_content/migrations/0007_rename_collection_key_to_code_in_python.py new file mode 100644 index 000000000..604fdedee --- /dev/null +++ b/src/openedx_content/migrations/0007_rename_collection_key_to_code_in_python.py @@ -0,0 +1,28 @@ +# Generated by Django 5.2.12 on 2026-03-24 20:33 + +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('openedx_content', '0006_typed_ids'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.RemoveConstraint( + model_name='collection', + name='oel_coll_uniq_lp_key', + ), + migrations.RenameField( + model_name='collection', + old_name='key', + new_name='collection_code', + ), + migrations.AddConstraint( + model_name='collection', + constraint=models.UniqueConstraint(fields=('learning_package', 'collection_code'), name='oel_coll_uniq_lp_key'), + ), + ] diff --git a/src/openedx_content/migrations/0008_rename_collection_key_to_code_in_db.py b/src/openedx_content/migrations/0008_rename_collection_key_to_code_in_db.py new file mode 100644 index 000000000..06e390fb5 --- /dev/null +++ b/src/openedx_content/migrations/0008_rename_collection_key_to_code_in_db.py @@ -0,0 +1,20 @@ +# Generated by Django 5.2.12 on 2026-03-24 20:33 + +from django.db import migrations + +import openedx_django_lib.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('openedx_content', '0007_rename_collection_key_to_code_in_python'), + ] + + operations = [ + migrations.AlterField( + model_name='collection', + name='collection_code', + field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500), + ), + ] diff --git a/src/openedx_content/migrations/0009_add_collection_code_regex_validation.py b/src/openedx_content/migrations/0009_add_collection_code_regex_validation.py new file mode 100644 index 000000000..bcd2c9cc6 --- /dev/null +++ b/src/openedx_content/migrations/0009_add_collection_code_regex_validation.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.12 on 2026-03-24 20:33 + +import re + +import django.core.validators +from django.db import migrations + +import openedx_django_lib.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('openedx_content', '0008_rename_collection_key_to_code_in_db'), + ] + + operations = [ + migrations.AlterField( + model_name='collection', + name='collection_code', + field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=255, validators=[django.core.validators.RegexValidator(re.compile('^[a-zA-Z0-9\\-\\_\\.]+\\Z'), 'Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.', 'invalid')]), + ), + ] diff --git a/src/openedx_django_lib/fields.py b/src/openedx_django_lib/fields.py index 44bcb2a23..eb431c505 100644 --- a/src/openedx_django_lib/fields.py +++ b/src/openedx_django_lib/fields.py @@ -12,10 +12,13 @@ from __future__ import annotations import hashlib +import re import uuid from typing import Any +from django.core.validators import RegexValidator from django.db import models +from django.utils.translation import gettext_lazy as _ from .collations import MultiCollationMixin # Re-export these fields which are in a separate file so we can use .pyi type stubs: @@ -114,6 +117,29 @@ def immutable_uuid_field() -> models.UUIDField: ) +# Alphanumeric, hyphens, underscores, periods +CODE_REGEX = re.compile(r"^[a-zA-Z0-9\-\_\.]+\Z") + + +def code_field(**kwargs) -> MultiCollationCharField: + """ + Field to hold a 'code', i.e. a slug-like local identifier. + """ + return case_sensitive_char_field( + max_length=255, + blank=False, + validators=[ + RegexValidator( + CODE_REGEX, + # Translators: "letters" means latin letters: a-z and A-Z. + _('Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.'), + "invalid", + ), + ], + **kwargs, + ) + + def key_field(**kwargs) -> MultiCollationCharField: """ Externally created Identifier fields. diff --git a/src/openedx_tagging/models/base.py b/src/openedx_tagging/models/base.py index 000d4d338..7957a45df 100644 --- a/src/openedx_tagging/models/base.py +++ b/src/openedx_tagging/models/base.py @@ -421,7 +421,7 @@ def copy(self, taxonomy: Taxonomy) -> Taxonomy: return self - def get_filtered_tags( # pylint: disable=too-many-positional-arguments + def get_filtered_tags( self, depth: int | None = None, parent_tag_value: str | None = None, diff --git a/tests/openedx_content/applets/backup_restore/test_backup.py b/tests/openedx_content/applets/backup_restore/test_backup.py index efff635de..91cb1bd26 100644 --- a/tests/openedx_content/applets/backup_restore/test_backup.py +++ b/tests/openedx_content/applets/backup_restore/test_backup.py @@ -146,7 +146,7 @@ def setUpTestData(cls): cls.collection = api.create_collection( cls.learning_package.id, - key="COL1", + collection_code="COL1", created_by=cls.user.id, title="Collection 1", description="Description of Collection 1", @@ -154,7 +154,7 @@ def setUpTestData(cls): api.add_to_collection( cls.learning_package.id, - cls.collection.key, + cls.collection.collection_code, components ) @@ -277,6 +277,17 @@ def test_lp_dump_command(self): for file_path, expected_content in expected_files.items(): self.check_toml_file(zip_path, Path(file_path), expected_content) + # Verify that collection TOMLs include both 'key' (legacy) and 'collection_code' + # (new name), so older software can still read archives produced after the rename. + self.check_toml_file( + zip_path, + Path("collections/col1.toml"), + [ + 'key = "COL1"', + 'collection_code = "COL1"', + ] + ) + # Check the output message message = f'{lp_key} written to {file_name}' self.assertIn(message, out.getvalue()) diff --git a/tests/openedx_content/applets/backup_restore/test_restore.py b/tests/openedx_content/applets/backup_restore/test_restore.py index 0116731c4..372d04939 100644 --- a/tests/openedx_content/applets/backup_restore/test_restore.py +++ b/tests/openedx_content/applets/backup_restore/test_restore.py @@ -8,6 +8,7 @@ from django.core.management import call_command from django.test import TestCase +from openedx_content.applets.backup_restore.serializers import CollectionSerializer from openedx_content.applets.backup_restore.zipper import LearningPackageUnzipper, generate_staged_lp_key from openedx_content.applets.collections import api as collections_api from openedx_content.applets.components import api as components_api @@ -183,7 +184,7 @@ def verify_collections(self, lp): assert collections.count() == 1 collection = collections.first() assert collection.title == "Collection test1" - assert collection.key == "collection-test" + assert collection.collection_code == "collection-test" assert collection.description == "" assert collection.created_by is not None assert collection.created_by.username == "lp_user" @@ -354,6 +355,52 @@ def test_success_metadata_using_user_context(self): assert metadata == expected_metadata +class CollectionSerializerTest(TestCase): + """ + Unit tests for CollectionSerializer's back-compat handling of 'key' vs 'collection_code'. + """ + + BASE_DATA = { + "title": "My Collection", + "description": "", + "entities": [], + } + + def _serialize(self, extra): + data = {**self.BASE_DATA, **extra} + s = CollectionSerializer(data=data) + s.is_valid() + return s + + def test_legacy_key_field(self): + """Archives written before the rename use 'key'; restore must accept it.""" + s = self._serialize({"key": "my-col"}) + assert s.is_valid(), s.errors + assert s.validated_data["collection_code"] == "my-col" + assert "key" not in s.validated_data + + def test_new_collection_code_field(self): + """Archives written after the rename use 'collection_code'.""" + s = self._serialize({"collection_code": "my-col"}) + assert s.is_valid(), s.errors + assert s.validated_data["collection_code"] == "my-col" + assert "key" not in s.validated_data + + def test_both_fields_collection_code_wins(self): + """When both fields are present (new archives include both for back-compat), + 'collection_code' takes precedence over 'key'.""" + s = self._serialize({"key": "old-value", "collection_code": "new-value"}) + assert s.is_valid(), s.errors + assert s.validated_data["collection_code"] == "new-value" + assert "key" not in s.validated_data + + def test_neither_field_is_an_error(self): + """Missing both 'key' and 'collection_code' must fail validation.""" + s = self._serialize({}) + assert not s.is_valid() + assert "non_field_errors" in s.errors + + class RestoreUtilitiesTest(TestCase): """Tests for utility functions used in the restore process.""" diff --git a/tests/openedx_content/applets/collections/test_api.py b/tests/openedx_content/applets/collections/test_api.py index f53d9fbb7..42e4f1638 100644 --- a/tests/openedx_content/applets/collections/test_api.py +++ b/tests/openedx_content/applets/collections/test_api.py @@ -64,35 +64,35 @@ def setUpTestData(cls) -> None: super().setUpTestData() cls.collection1 = api.create_collection( cls.learning_package.id, - key="COL1", + collection_code="COL1", created_by=None, title="Collection 1", description="Description of Collection 1", ) cls.collection2 = api.create_collection( cls.learning_package.id, - key="COL2", + collection_code="COL2", created_by=None, title="Collection 2", description="Description of Collection 2", ) cls.collection3 = api.create_collection( cls.learning_package.id, - key="COL3", + collection_code="COL3", created_by=None, title="Collection 3", description="Description of Collection 3", ) cls.another_library_collection = api.create_collection( cls.learning_package_2.id, - key="another_library", + collection_code="another_library", created_by=None, title="Collection 4", description="Description of Collection 4", ) cls.disabled_collection = api.create_collection( cls.learning_package.id, - key="disabled_collection", + collection_code="disabled_collection", created_by=None, title="Disabled Collection", description="Description of Disabled Collection", @@ -123,7 +123,7 @@ def test_get_collection_wrong_learning_package(self): Test getting a collection that doesn't exist in the requested learning package. """ with self.assertRaises(ObjectDoesNotExist): - api.get_collection(self.learning_package.id, self.another_library_collection.key) + api.get_collection(self.learning_package.id, self.another_library_collection.collection_code) def test_get_collections(self): """ @@ -191,14 +191,14 @@ def test_create_collection(self): with freeze_time(created_time): collection = api.create_collection( self.learning_package.id, - key='MYCOL', + collection_code='MYCOL', title="My Collection", created_by=user.id, description="This is my collection", ) assert collection.title == "My Collection" - assert collection.key == "MYCOL" + assert collection.collection_code == "MYCOL" assert collection.description == "This is my collection" assert collection.enabled assert collection.created == created_time @@ -211,15 +211,35 @@ def test_create_collection_without_description(self): """ collection = api.create_collection( self.learning_package.id, - key='MYCOL', + collection_code='MYCOL', created_by=None, title="My Collection", ) assert collection.title == "My Collection" - assert collection.key == "MYCOL" + assert collection.collection_code == "MYCOL" assert collection.description == "" assert collection.enabled + def test_create_collection_invalid_code(self): + """ + collection_code must only contain alphanumerics, hyphens, underscores, and periods. + """ + invalid_codes = [ + "has space", + "has@symbol", + "has/slash", + "has#hash", + ] + for code in invalid_codes: + with self.subTest(code=code): + with self.assertRaises(ValidationError): + api.create_collection( + self.learning_package.id, + collection_code=code, + title="Test", + created_by=None, + ) + class CollectionEntitiesTestCase(CollectionsTestCase): """ @@ -283,16 +303,16 @@ def setUpTestData(cls) -> None: # Add some shared components to the collections cls.collection1 = api.add_to_collection( cls.learning_package.id, - key=cls.collection1.key, - entities_qset=PublishableEntity.objects.filter(pk__in=[ + collection_code=cls.collection1.collection_code, + entities_qset=PublishableEntity.objects.filter(id__in=[ cls.published_component.id, ]), created_by=cls.user.id, ) cls.collection2 = api.add_to_collection( cls.learning_package.id, - key=cls.collection2.key, - entities_qset=PublishableEntity.objects.filter(pk__in=[ + collection_code=cls.collection2.collection_code, + entities_qset=PublishableEntity.objects.filter(id__in=[ cls.published_component.id, cls.draft_component.id, cls.draft_unit.id, @@ -300,8 +320,8 @@ def setUpTestData(cls) -> None: ) cls.disabled_collection = api.add_to_collection( cls.learning_package.id, - key=cls.disabled_collection.key, - entities_qset=PublishableEntity.objects.filter(pk__in=[ + collection_code=cls.disabled_collection.collection_code, + entities_qset=PublishableEntity.objects.filter(id__in=[ cls.published_component.id, ]), ) @@ -335,8 +355,8 @@ def test_add_to_collection(self): with freeze_time(modified_time): self.collection1 = api.add_to_collection( self.learning_package.id, - self.collection1.key, - PublishableEntity.objects.filter(pk__in=[ + self.collection1.collection_code, + PublishableEntity.objects.filter(id__in=[ self.draft_component.id, self.draft_unit.id, ]), @@ -360,8 +380,8 @@ def test_add_to_collection_again(self): with freeze_time(modified_time): self.collection2 = api.add_to_collection( self.learning_package.id, - self.collection2.key, - PublishableEntity.objects.filter(pk__in=[ + self.collection2.collection_code, + PublishableEntity.objects.filter(id__in=[ self.published_component.id, ]), ) @@ -380,8 +400,8 @@ def test_add_to_collection_wrong_learning_package(self): with self.assertRaises(ValidationError): api.add_to_collection( self.learning_package_2.id, - self.another_library_collection.key, - PublishableEntity.objects.filter(pk__in=[ + self.another_library_collection.collection_code, + PublishableEntity.objects.filter(id__in=[ self.published_component.id, ]), ) @@ -396,8 +416,8 @@ def test_remove_from_collection(self): with freeze_time(modified_time): self.collection2 = api.remove_from_collection( self.learning_package.id, - self.collection2.key, - PublishableEntity.objects.filter(pk__in=[ + self.collection2.collection_code, + PublishableEntity.objects.filter(id__in=[ self.published_component.id, self.draft_unit.id, ]), @@ -424,46 +444,46 @@ def test_get_entity_collections(self): def test_get_collection_components(self): assert list(api.get_collection_components( self.learning_package.id, - self.collection1.key, + self.collection1.collection_code, )) == [self.published_component] assert list(api.get_collection_components( self.learning_package.id, - self.collection2.key, + self.collection2.collection_code, )) == [self.published_component, self.draft_component] assert not list(api.get_collection_components( self.learning_package.id, - self.collection3.key, + self.collection3.collection_code, )) assert not list(api.get_collection_components( self.learning_package.id, - self.another_library_collection.key, + self.another_library_collection.collection_code, )) def test_get_collection_containers(self): """ Test using `get_collection_entities()` to get containers """ - def get_collection_containers(learning_package_id: LearningPackage.ID, collection_key: str): + def get_collection_containers(learning_package_id: LearningPackage.ID, collection_code: str): return ( pe.container for pe in - api.get_collection_entities(learning_package_id, collection_key).exclude(container=None) + api.get_collection_entities(learning_package_id, collection_code).exclude(container=None) ) assert not list(get_collection_containers( self.learning_package.id, - self.collection1.key, + self.collection1.collection_code, )) assert list(get_collection_containers( self.learning_package.id, - self.collection2.key, + self.collection2.collection_code, )) == [self.draft_unit.container] assert not list(get_collection_containers( self.learning_package.id, - self.collection3.key, + self.collection3.collection_code, )) assert not list(get_collection_containers( self.learning_package.id, - self.another_library_collection.key, + self.another_library_collection.collection_code, )) @@ -481,7 +501,7 @@ def setUpTestData(cls) -> None: super().setUpTestData() cls.collection = api.create_collection( cls.learning_package.id, - key="MYCOL", + collection_code="MYCOL", title="Collection", created_by=None, description="Description of Collection", @@ -495,7 +515,7 @@ def test_update_collection(self): with freeze_time(modified_time): collection = api.update_collection( self.learning_package.id, - key=self.collection.key, + collection_code=self.collection.collection_code, title="New Title", description="", ) @@ -511,17 +531,19 @@ def test_update_collection_partial(self): """ collection = api.update_collection( self.learning_package.id, - key=self.collection.key, + collection_code=self.collection.collection_code, title="New Title", ) assert collection.title == "New Title" assert collection.description == self.collection.description # unchanged - assert f"{collection}" == f" (lp:{self.learning_package.id} {self.collection.key}:New Title)" + assert f"{collection}" == ( + f" (lp:{self.learning_package.id} {self.collection.collection_code}:New Title)" + ) collection = api.update_collection( self.learning_package.id, - key=self.collection.key, + collection_code=self.collection.collection_code, description="New description", ) @@ -536,7 +558,7 @@ def test_update_collection_empty(self): with freeze_time(modified_time): collection = api.update_collection( self.learning_package.id, - key=self.collection.key, + collection_code=self.collection.collection_code, ) assert collection.title == self.collection.title # unchanged @@ -550,7 +572,7 @@ def test_update_collection_not_found(self): with self.assertRaises(ObjectDoesNotExist): api.update_collection( self.learning_package.id, - key="12345", + collection_code="12345", title="New Title", ) @@ -568,13 +590,13 @@ def test_soft_delete(self): with freeze_time(modified_time): collection = api.delete_collection( self.learning_package.id, - key=self.collection2.key, + collection_code=self.collection2.collection_code, ) # Collection was disabled and still exists in the database assert not collection.enabled assert collection.modified == modified_time - assert collection == api.get_collection(self.learning_package.id, collection.key) + assert collection == api.get_collection(self.learning_package.id, collection.collection_code) # ...and the collection's entities remain intact. assert list(collection.entities.all()) == [ self.draft_unit.publishable_entity, @@ -590,7 +612,7 @@ def test_delete(self): with freeze_time(modified_time): collection = api.delete_collection( self.learning_package.id, - key=self.collection2.key, + collection_code=self.collection2.collection_code, hard_delete=True, ) @@ -598,7 +620,7 @@ def test_delete(self): assert collection.enabled assert not collection.id with self.assertRaises(ObjectDoesNotExist): - api.get_collection(self.learning_package.id, collection.key) + api.get_collection(self.learning_package.id, collection.collection_code) # ...and the entities have been removed from this collection assert list(api.get_entity_collections( self.learning_package.id, @@ -615,20 +637,20 @@ def test_restore(self): """ collection = api.delete_collection( self.learning_package.id, - key=self.collection2.key, + collection_code=self.collection2.collection_code, ) modified_time = datetime(2024, 8, 8, tzinfo=timezone.utc) with freeze_time(modified_time): collection = api.restore_collection( self.learning_package.id, - key=self.collection2.key, + collection_code=self.collection2.collection_code, ) # Collection was enabled and still exists in the database assert collection.enabled assert collection.modified == modified_time - assert collection == api.get_collection(self.learning_package.id, collection.key) + assert collection == api.get_collection(self.learning_package.id, collection.collection_code) # ...and the collection's entities remain intact. assert list(collection.entities.all()) == [ self.draft_unit.publishable_entity, @@ -719,7 +741,7 @@ def test_set_collection_wrong_learning_package(self): ) collection = api.create_collection( learning_package_3.id, - key="MYCOL", + collection_code="MYCOL", title="My Collection", created_by=None, description="Description of Collection", diff --git a/tests/openedx_content/applets/components/test_api.py b/tests/openedx_content/applets/components/test_api.py index cbaa889b2..74f482090 100644 --- a/tests/openedx_content/applets/components/test_api.py +++ b/tests/openedx_content/applets/components/test_api.py @@ -666,21 +666,21 @@ def setUpTestData(cls) -> None: ) cls.collection1 = collection_api.create_collection( cls.learning_package.id, - key="MYCOL1", + collection_code="MYCOL1", title="Collection1", created_by=None, description="Description of Collection 1", ) cls.collection2 = collection_api.create_collection( cls.learning_package.id, - key="MYCOL2", + collection_code="MYCOL2", title="Collection2", created_by=None, description="Description of Collection 2", ) cls.collection3 = collection_api.create_collection( cls.learning_package.id, - key="MYCOL3", + collection_code="MYCOL3", title="Collection3", created_by=None, description="Description of Collection 3", From 200f68bdac368e377dd90bb47f2565db25478046 Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Thu, 16 Apr 2026 10:22:07 -0400 Subject: [PATCH 03/10] fix(squash): restore Content->Media docstring renames from 7cf1539 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/openedx_content/applets/components/models.py | 14 +++++++------- src/openedx_content/applets/components/readme.rst | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/openedx_content/applets/components/models.py b/src/openedx_content/applets/components/models.py index 8f4b24171..6fbab2ba3 100644 --- a/src/openedx_content/applets/components/models.py +++ b/src/openedx_content/applets/components/models.py @@ -1,5 +1,5 @@ """ -The model hierarchy is Component -> ComponentVersion -> Content. +The model hierarchy is Component -> ComponentVersion -> Media. A Component is an entity like a Problem or Video. It has enough information to identify the Component and determine what the handler should be (e.g. XBlock @@ -10,7 +10,7 @@ publishing app. Component maps 1:1 to PublishableEntity and ComponentVersion maps 1:1 to PublishableEntityVersion. -Multiple pieces of Content may be associated with a ComponentVersion, through +Multiple pieces of Media may be associated with a ComponentVersion, through the ComponentVersionMedia model. ComponentVersionMedia allows to specify a ComponentVersion-local identifier. We're using this like a file path by convention, but it's possible we might want to have special identifiers later. @@ -97,7 +97,7 @@ class Component(PublishableEntityMixin): Problem), but little beyond that. A Component will have many ComponentVersions over time, and most metadata is - associated with the ComponentVersion model and the Content that + associated with the ComponentVersion model and the Media that ComponentVersions are associated with. A Component belongs to exactly one LearningPackage. @@ -224,7 +224,7 @@ class ComponentVersion(PublishableEntityVersionMixin): """ A particular version of a Component. - This holds the media using a M:M relationship with Content via + This holds the media using a M:M relationship with Media via ComponentVersionMedia. """ @@ -250,18 +250,18 @@ class Meta: class ComponentVersionMedia(models.Model): """ - Determines the Content for a given ComponentVersion. + Determines the Media for a given ComponentVersion. An ComponentVersion may be associated with multiple pieces of binary data. For instance, a Video ComponentVersion might be associated with multiple transcripts in different languages. - When Content is associated with an ComponentVersion, it has some local + When Media is associated with an ComponentVersion, it has some local key that is unique within the the context of that ComponentVersion. This allows the ComponentVersion to do things like store an image file and reference it by a "path" key. - Content is immutable and sharable across multiple ComponentVersions. + Media is immutable and sharable across multiple ComponentVersions. """ component_version = models.ForeignKey(ComponentVersion, on_delete=models.CASCADE) diff --git a/src/openedx_content/applets/components/readme.rst b/src/openedx_content/applets/components/readme.rst index b79134bc6..eb97d22f2 100644 --- a/src/openedx_content/applets/components/readme.rst +++ b/src/openedx_content/applets/components/readme.rst @@ -1,7 +1,7 @@ Components App ============== -The ``components`` app holds the versioned data models for the lowest-level pieces of content that can be stored in Open edX: Components (e.g. XBlocks), as well as the individual pieces of raw data content that they reference. +The ``components`` app holds the versioned data models for the lowest-level pieces of content that can be stored in Open edX: Components (e.g. XBlocks), as well as the individual pieces of raw data Media that they reference. Motivation ---------- @@ -18,6 +18,6 @@ Architecture Guidelines * We're keeping nearly unlimited history, so per-version metadata (i.e. the space/time cost of making a new version) must be kept low. * Do not assume that all Components will be XBlocks. -* Encourage other apps to make models that join to (and add their own metadata to) Component, ComponentVersion, Content, etc. But it should be done in such a way that this app is not aware of them. +* Encourage other apps to make models that join to (and add their own metadata to) Component, ComponentVersion, Media, etc. But it should be done in such a way that this app is not aware of them. * Always preserve the most raw version of the data possible, e.g. OLX, even if XBlocks then extend that with more sophisticated data models. At some point those XBlocks will get deprecated/removed, and we will still want to be able to export the raw data. * Exports should be fast and *not* require the invocation of plugin code. \ No newline at end of file From 1b5b8cd22a042db606ad1df7032b56b6e18ab2cd Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Thu, 16 Apr 2026 10:25:43 -0400 Subject: [PATCH 04/10] fix(squash): keep archive format using 'key' for collections, not 'collection_code' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The backup_restore archive format stays unchanged — collections are still serialized with "key". A comment notes the divergence from the model field name (collection_code) and that a future v2 format may align them. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../applets/backup_restore/serializers.py | 18 ++----- .../applets/backup_restore/toml.py | 5 +- .../applets/backup_restore/zipper.py | 7 ++- .../applets/backup_restore/test_backup.py | 11 ----- .../applets/backup_restore/test_restore.py | 47 ------------------- 5 files changed, 8 insertions(+), 80 deletions(-) diff --git a/src/openedx_content/applets/backup_restore/serializers.py b/src/openedx_content/applets/backup_restore/serializers.py index 69abbdfe2..f2085e751 100644 --- a/src/openedx_content/applets/backup_restore/serializers.py +++ b/src/openedx_content/applets/backup_restore/serializers.py @@ -156,24 +156,12 @@ class CollectionSerializer(serializers.Serializer): # pylint: disable=abstract- Serializer for collections. """ title = serializers.CharField(required=True) - # 'collection_code' is the current field name; 'key' is the old name kept for - # back-compat with archives written before the rename. At least one must be present. - collection_code = serializers.CharField(required=False) - key = serializers.CharField(required=False) + # Note: the model field is now Collection.collection_code, but the archive + # format still uses "key". A future v2 format may align the name. + key = serializers.CharField(required=True) description = serializers.CharField(required=True, allow_blank=True) entities = serializers.ListField( child=serializers.CharField(), required=True, allow_empty=True, ) - - def validate(self, attrs): - # Prefer 'collection_code'; fall back to legacy 'key'. Always remove - # both so only the normalised 'collection_code' key reaches the caller. - code = attrs.pop("collection_code", None) - legacy_key = attrs.pop("key", None) - code = code or legacy_key - if not code: - raise serializers.ValidationError("Either 'collection_code' or 'key' is required.") - attrs["collection_code"] = code - return attrs diff --git a/src/openedx_content/applets/backup_restore/toml.py b/src/openedx_content/applets/backup_restore/toml.py index 4470834c6..520d6b877 100644 --- a/src/openedx_content/applets/backup_restore/toml.py +++ b/src/openedx_content/applets/backup_restore/toml.py @@ -220,10 +220,9 @@ def toml_collection(collection: Collection, entity_keys: list[str]) -> str: collection_table = tomlkit.table() collection_table.add("title", collection.title) - # Write both names so that older software (which reads 'key') stays compatible - # with archives produced after the Collection.key -> Collection.collection_code rename. + # Note: the model field is now Collection.collection_code, but the archive + # format still uses "key". A future v2 format may align the name. collection_table.add("key", collection.collection_code) - collection_table.add("collection_code", collection.collection_code) collection_table.add("description", collection.description) collection_table.add("created", collection.created) collection_table.add("entities", entities_array) diff --git a/src/openedx_content/applets/backup_restore/zipper.py b/src/openedx_content/applets/backup_restore/zipper.py index 82d478757..132171322 100644 --- a/src/openedx_content/applets/backup_restore/zipper.py +++ b/src/openedx_content/applets/backup_restore/zipper.py @@ -720,10 +720,7 @@ def _extract_collections( if entity_key not in self.all_publishable_entities_keys: self.errors.append({ "file": file, - "errors": ( - f"Entity key {entity_key} not found for collection " + - collection_validated.get('collection_code') - ) + "errors": f"Entity key {entity_key} not found for collection {collection_validated.get('key')}" }) results["collections"].append(collection_validated) @@ -777,6 +774,8 @@ def _save_collections(self, learning_package, collections): """Save collections and their entities.""" for valid_collection in collections.get("collections", []): entities = valid_collection.pop("entities", []) + # The archive format uses "key"; the API now expects "collection_code". + valid_collection["collection_code"] = valid_collection.pop("key") collection = collections_api.create_collection( learning_package.id, created_by=self.user_id, **valid_collection ) diff --git a/tests/openedx_content/applets/backup_restore/test_backup.py b/tests/openedx_content/applets/backup_restore/test_backup.py index 91cb1bd26..edafe3f66 100644 --- a/tests/openedx_content/applets/backup_restore/test_backup.py +++ b/tests/openedx_content/applets/backup_restore/test_backup.py @@ -277,17 +277,6 @@ def test_lp_dump_command(self): for file_path, expected_content in expected_files.items(): self.check_toml_file(zip_path, Path(file_path), expected_content) - # Verify that collection TOMLs include both 'key' (legacy) and 'collection_code' - # (new name), so older software can still read archives produced after the rename. - self.check_toml_file( - zip_path, - Path("collections/col1.toml"), - [ - 'key = "COL1"', - 'collection_code = "COL1"', - ] - ) - # Check the output message message = f'{lp_key} written to {file_name}' self.assertIn(message, out.getvalue()) diff --git a/tests/openedx_content/applets/backup_restore/test_restore.py b/tests/openedx_content/applets/backup_restore/test_restore.py index 372d04939..a905427e8 100644 --- a/tests/openedx_content/applets/backup_restore/test_restore.py +++ b/tests/openedx_content/applets/backup_restore/test_restore.py @@ -8,7 +8,6 @@ from django.core.management import call_command from django.test import TestCase -from openedx_content.applets.backup_restore.serializers import CollectionSerializer from openedx_content.applets.backup_restore.zipper import LearningPackageUnzipper, generate_staged_lp_key from openedx_content.applets.collections import api as collections_api from openedx_content.applets.components import api as components_api @@ -355,52 +354,6 @@ def test_success_metadata_using_user_context(self): assert metadata == expected_metadata -class CollectionSerializerTest(TestCase): - """ - Unit tests for CollectionSerializer's back-compat handling of 'key' vs 'collection_code'. - """ - - BASE_DATA = { - "title": "My Collection", - "description": "", - "entities": [], - } - - def _serialize(self, extra): - data = {**self.BASE_DATA, **extra} - s = CollectionSerializer(data=data) - s.is_valid() - return s - - def test_legacy_key_field(self): - """Archives written before the rename use 'key'; restore must accept it.""" - s = self._serialize({"key": "my-col"}) - assert s.is_valid(), s.errors - assert s.validated_data["collection_code"] == "my-col" - assert "key" not in s.validated_data - - def test_new_collection_code_field(self): - """Archives written after the rename use 'collection_code'.""" - s = self._serialize({"collection_code": "my-col"}) - assert s.is_valid(), s.errors - assert s.validated_data["collection_code"] == "my-col" - assert "key" not in s.validated_data - - def test_both_fields_collection_code_wins(self): - """When both fields are present (new archives include both for back-compat), - 'collection_code' takes precedence over 'key'.""" - s = self._serialize({"key": "old-value", "collection_code": "new-value"}) - assert s.is_valid(), s.errors - assert s.validated_data["collection_code"] == "new-value" - assert "key" not in s.validated_data - - def test_neither_field_is_an_error(self): - """Missing both 'key' and 'collection_code' must fail validation.""" - s = self._serialize({}) - assert not s.is_valid() - assert "non_field_errors" in s.errors - - class RestoreUtilitiesTest(TestCase): """Tests for utility functions used in the restore process.""" From 9f08503cda4aa094c8a0aada7850d756161e2ee9 Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Thu, 16 Apr 2026 10:37:30 -0400 Subject: [PATCH 05/10] fix(squash): add code_field_check() helper for DB-level regex constraint Adds a code_field_check(field_name, name=...) companion to code_field() that returns a CheckConstraint using Django's Regex lookup. This enforces the code regex at the database level (not just via validators), and Django also runs it as a Python-level validator automatically. Applied to Collection.collection_code as the first usage. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../applets/collections/models.py | 3 +- ...llection_oel_coll_collection_code_regex.py | 20 +++++++++++ src/openedx_django_lib/fields.py | 34 ++++++++++++++++++- 3 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 src/openedx_content/migrations/0010_collection_oel_coll_collection_code_regex.py diff --git a/src/openedx_content/applets/collections/models.py b/src/openedx_content/applets/collections/models.py index 3258ccc9c..a1c0fa4a4 100644 --- a/src/openedx_content/applets/collections/models.py +++ b/src/openedx_content/applets/collections/models.py @@ -70,7 +70,7 @@ from django.db import models from django.utils.translation import gettext_lazy as _ -from openedx_django_lib.fields import MultiCollationTextField, case_insensitive_char_field, code_field +from openedx_django_lib.fields import MultiCollationTextField, case_insensitive_char_field, code_field, code_field_check from openedx_django_lib.validators import validate_utc_datetime from ..publishing.models import LearningPackage, PublishableEntity @@ -179,6 +179,7 @@ class Meta: ], name="oel_coll_uniq_lp_key", ), + code_field_check("collection_code", name="oel_coll_collection_code_regex"), ] indexes = [ models.Index( diff --git a/src/openedx_content/migrations/0010_collection_oel_coll_collection_code_regex.py b/src/openedx_content/migrations/0010_collection_oel_coll_collection_code_regex.py new file mode 100644 index 000000000..666f6efdc --- /dev/null +++ b/src/openedx_content/migrations/0010_collection_oel_coll_collection_code_regex.py @@ -0,0 +1,20 @@ +# Generated by Django 5.2.13 on 2026-04-16 14:37 + +import django.db.models.lookups +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('openedx_content', '0009_add_collection_code_regex_validation'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AddConstraint( + model_name='collection', + constraint=models.CheckConstraint(condition=django.db.models.lookups.Regex(models.F('collection_code'), '^[a-zA-Z0-9\\-\\_\\.]+\\Z'), name='oel_coll_collection_code_regex', violation_error_message='Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.'), + ), + ] diff --git a/src/openedx_django_lib/fields.py b/src/openedx_django_lib/fields.py index eb431c505..4ef8be913 100644 --- a/src/openedx_django_lib/fields.py +++ b/src/openedx_django_lib/fields.py @@ -18,6 +18,7 @@ from django.core.validators import RegexValidator from django.db import models +from django.db.models.lookups import Regex from django.utils.translation import gettext_lazy as _ from .collations import MultiCollationMixin @@ -121,9 +122,17 @@ def immutable_uuid_field() -> models.UUIDField: CODE_REGEX = re.compile(r"^[a-zA-Z0-9\-\_\.]+\Z") +_CODE_VIOLATION_MSG = _( + 'Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.' +) + + def code_field(**kwargs) -> MultiCollationCharField: """ Field to hold a 'code', i.e. a slug-like local identifier. + + Use together with :func:`code_field_check` to enforce the same regex at + the database level via a ``CheckConstraint``. """ return case_sensitive_char_field( max_length=255, @@ -132,7 +141,7 @@ def code_field(**kwargs) -> MultiCollationCharField: RegexValidator( CODE_REGEX, # Translators: "letters" means latin letters: a-z and A-Z. - _('Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.'), + _CODE_VIOLATION_MSG, "invalid", ), ], @@ -140,6 +149,29 @@ def code_field(**kwargs) -> MultiCollationCharField: ) +def code_field_check(field_name: str, *, name: str) -> models.CheckConstraint: + """ + Return a ``CheckConstraint`` that enforces :data:`CODE_REGEX` at the DB level. + + Django validators (used by :func:`code_field`) are not called on ``.save()`` + or ``.update()``. Adding this constraint ensures the regex is also enforced + by the database itself, and Django will additionally run it as a Python-level + validator automatically. + + Usage:: + + class Meta: + constraints = [ + code_field_check("my_code_field", name="myapp_mymodel_my_code_field_regex"), + ] + """ + return models.CheckConstraint( + condition=Regex(models.F(field_name), CODE_REGEX.pattern), + name=name, + violation_error_message=_CODE_VIOLATION_MSG, + ) + + def key_field(**kwargs) -> MultiCollationCharField: """ Externally created Identifier fields. From 5e472a288f238a46b7889a7d0cbc2eb5d548bae3 Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Thu, 16 Apr 2026 11:08:42 -0400 Subject: [PATCH 06/10] fix(squash): squash migrations 0007-0010 into single migration Co-Authored-By: Claude Opus 4.6 (1M context) --- ...rename_collection_key_to_code_in_python.py | 28 ------- ...ename_collection_key_to_collection_code.py | 75 +++++++++++++++++++ ...008_rename_collection_key_to_code_in_db.py | 20 ----- ...09_add_collection_code_regex_validation.py | 23 ------ ...llection_oel_coll_collection_code_regex.py | 20 ----- 5 files changed, 75 insertions(+), 91 deletions(-) delete mode 100644 src/openedx_content/migrations/0007_rename_collection_key_to_code_in_python.py create mode 100644 src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py delete mode 100644 src/openedx_content/migrations/0008_rename_collection_key_to_code_in_db.py delete mode 100644 src/openedx_content/migrations/0009_add_collection_code_regex_validation.py delete mode 100644 src/openedx_content/migrations/0010_collection_oel_coll_collection_code_regex.py diff --git a/src/openedx_content/migrations/0007_rename_collection_key_to_code_in_python.py b/src/openedx_content/migrations/0007_rename_collection_key_to_code_in_python.py deleted file mode 100644 index 604fdedee..000000000 --- a/src/openedx_content/migrations/0007_rename_collection_key_to_code_in_python.py +++ /dev/null @@ -1,28 +0,0 @@ -# Generated by Django 5.2.12 on 2026-03-24 20:33 - -from django.conf import settings -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('openedx_content', '0006_typed_ids'), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.RemoveConstraint( - model_name='collection', - name='oel_coll_uniq_lp_key', - ), - migrations.RenameField( - model_name='collection', - old_name='key', - new_name='collection_code', - ), - migrations.AddConstraint( - model_name='collection', - constraint=models.UniqueConstraint(fields=('learning_package', 'collection_code'), name='oel_coll_uniq_lp_key'), - ), - ] diff --git a/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py b/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py new file mode 100644 index 000000000..205b040bd --- /dev/null +++ b/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py @@ -0,0 +1,75 @@ +""" +Rename Collection.key -> Collection.collection_code and change from key_field to code_field. + +Squashed from 0007–0010. +""" +import re + +import django.core.validators +import django.db.models.lookups +from django.conf import settings +from django.db import migrations, models + +import openedx_django_lib.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('openedx_content', '0006_typed_ids'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + # Drop old constraint (references the old field name). + migrations.RemoveConstraint( + model_name='collection', + name='oel_coll_uniq_lp_key', + ), + # Rename the column. + migrations.RenameField( + model_name='collection', + old_name='key', + new_name='collection_code', + ), + # Change from key_field (max_length=500, no validator) to code_field + # (max_length=255, with regex validator). + migrations.AlterField( + model_name='collection', + name='collection_code', + field=openedx_django_lib.fields.MultiCollationCharField( + db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, + max_length=255, + validators=[ + django.core.validators.RegexValidator( + re.compile('^[a-zA-Z0-9\\-\\_\\.]+\\Z'), + 'Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.', + 'invalid', + ), + ], + ), + ), + # Re-add uniqueness constraint with the new field name. + migrations.AddConstraint( + model_name='collection', + constraint=models.UniqueConstraint( + fields=('learning_package', 'collection_code'), + name='oel_coll_uniq_lp_key', + ), + ), + # DB-level regex check constraint. + migrations.AddConstraint( + model_name='collection', + constraint=models.CheckConstraint( + condition=django.db.models.lookups.Regex( + models.F('collection_code'), + '^[a-zA-Z0-9\\-\\_\\.]+\\Z', + ), + name='oel_coll_collection_code_regex', + violation_error_message=( + 'Enter a valid "code name" consisting of letters, numbers,' + ' underscores, hyphens, or periods.' + ), + ), + ), + ] diff --git a/src/openedx_content/migrations/0008_rename_collection_key_to_code_in_db.py b/src/openedx_content/migrations/0008_rename_collection_key_to_code_in_db.py deleted file mode 100644 index 06e390fb5..000000000 --- a/src/openedx_content/migrations/0008_rename_collection_key_to_code_in_db.py +++ /dev/null @@ -1,20 +0,0 @@ -# Generated by Django 5.2.12 on 2026-03-24 20:33 - -from django.db import migrations - -import openedx_django_lib.fields - - -class Migration(migrations.Migration): - - dependencies = [ - ('openedx_content', '0007_rename_collection_key_to_code_in_python'), - ] - - operations = [ - migrations.AlterField( - model_name='collection', - name='collection_code', - field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500), - ), - ] diff --git a/src/openedx_content/migrations/0009_add_collection_code_regex_validation.py b/src/openedx_content/migrations/0009_add_collection_code_regex_validation.py deleted file mode 100644 index bcd2c9cc6..000000000 --- a/src/openedx_content/migrations/0009_add_collection_code_regex_validation.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 5.2.12 on 2026-03-24 20:33 - -import re - -import django.core.validators -from django.db import migrations - -import openedx_django_lib.fields - - -class Migration(migrations.Migration): - - dependencies = [ - ('openedx_content', '0008_rename_collection_key_to_code_in_db'), - ] - - operations = [ - migrations.AlterField( - model_name='collection', - name='collection_code', - field=openedx_django_lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=255, validators=[django.core.validators.RegexValidator(re.compile('^[a-zA-Z0-9\\-\\_\\.]+\\Z'), 'Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.', 'invalid')]), - ), - ] diff --git a/src/openedx_content/migrations/0010_collection_oel_coll_collection_code_regex.py b/src/openedx_content/migrations/0010_collection_oel_coll_collection_code_regex.py deleted file mode 100644 index 666f6efdc..000000000 --- a/src/openedx_content/migrations/0010_collection_oel_coll_collection_code_regex.py +++ /dev/null @@ -1,20 +0,0 @@ -# Generated by Django 5.2.13 on 2026-04-16 14:37 - -import django.db.models.lookups -from django.conf import settings -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('openedx_content', '0009_add_collection_code_regex_validation'), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.AddConstraint( - model_name='collection', - constraint=models.CheckConstraint(condition=django.db.models.lookups.Regex(models.F('collection_code'), '^[a-zA-Z0-9\\-\\_\\.]+\\Z'), name='oel_coll_collection_code_regex', violation_error_message='Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.'), - ), - ] From b7b9a4aa91f8b25796296600178beadcc60bdfdf Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Thu, 16 Apr 2026 11:20:27 -0400 Subject: [PATCH 07/10] fix(squash): regex style; remove inaccurate migration comment --- .../0007_rename_collection_key_to_collection_code.py | 6 ++---- src/openedx_django_lib/fields.py | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py b/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py index 205b040bd..fad98b2f4 100644 --- a/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py +++ b/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py @@ -1,7 +1,5 @@ """ Rename Collection.key -> Collection.collection_code and change from key_field to code_field. - -Squashed from 0007–0010. """ import re @@ -42,7 +40,7 @@ class Migration(migrations.Migration): max_length=255, validators=[ django.core.validators.RegexValidator( - re.compile('^[a-zA-Z0-9\\-\\_\\.]+\\Z'), + re.compile('^[a-zA-Z0-9_.-]+\\Z'), 'Enter a valid "code name" consisting of letters, numbers, underscores, hyphens, or periods.', 'invalid', ), @@ -63,7 +61,7 @@ class Migration(migrations.Migration): constraint=models.CheckConstraint( condition=django.db.models.lookups.Regex( models.F('collection_code'), - '^[a-zA-Z0-9\\-\\_\\.]+\\Z', + '^[a-zA-Z0-9_.-]+\\Z', ), name='oel_coll_collection_code_regex', violation_error_message=( diff --git a/src/openedx_django_lib/fields.py b/src/openedx_django_lib/fields.py index 4ef8be913..3374ac12b 100644 --- a/src/openedx_django_lib/fields.py +++ b/src/openedx_django_lib/fields.py @@ -119,7 +119,7 @@ def immutable_uuid_field() -> models.UUIDField: # Alphanumeric, hyphens, underscores, periods -CODE_REGEX = re.compile(r"^[a-zA-Z0-9\-\_\.]+\Z") +CODE_REGEX = re.compile(r"^[a-zA-Z0-9_.-]+\Z") _CODE_VIOLATION_MSG = _( From d55764c5880ca46df2c173913b97d5491e6307cc Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Thu, 16 Apr 2026 12:49:47 -0400 Subject: [PATCH 08/10] fix(squash): renumber migration 0007->0008 after upstream/main rebase Co-Authored-By: Claude Opus 4.6 (1M context) --- ...code.py => 0008_rename_collection_key_to_collection_code.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/openedx_content/migrations/{0007_rename_collection_key_to_collection_code.py => 0008_rename_collection_key_to_collection_code.py} (97%) diff --git a/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py b/src/openedx_content/migrations/0008_rename_collection_key_to_collection_code.py similarity index 97% rename from src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py rename to src/openedx_content/migrations/0008_rename_collection_key_to_collection_code.py index fad98b2f4..9bdab4915 100644 --- a/src/openedx_content/migrations/0007_rename_collection_key_to_collection_code.py +++ b/src/openedx_content/migrations/0008_rename_collection_key_to_collection_code.py @@ -14,7 +14,7 @@ class Migration(migrations.Migration): dependencies = [ - ('openedx_content', '0006_typed_ids'), + ('openedx_content', '0007_publishlogrecord_direct'), migrations.swappable_dependency(settings.AUTH_USER_MODEL), ] From 97f850bd64224e0f6bc07725da6268ee30ecc3aa Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Thu, 16 Apr 2026 19:05:46 -0400 Subject: [PATCH 09/10] fix(squash): move key->collection_code mapping into CollectionSerializer.validate() Co-Authored-By: Claude Opus 4.6 (1M context) --- src/openedx_content/applets/backup_restore/serializers.py | 4 ++++ src/openedx_content/applets/backup_restore/zipper.py | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/openedx_content/applets/backup_restore/serializers.py b/src/openedx_content/applets/backup_restore/serializers.py index f2085e751..1ed019c22 100644 --- a/src/openedx_content/applets/backup_restore/serializers.py +++ b/src/openedx_content/applets/backup_restore/serializers.py @@ -165,3 +165,7 @@ class CollectionSerializer(serializers.Serializer): # pylint: disable=abstract- required=True, allow_empty=True, ) + + def validate(self, attrs): + attrs["collection_code"] = attrs.pop("key") + return attrs diff --git a/src/openedx_content/applets/backup_restore/zipper.py b/src/openedx_content/applets/backup_restore/zipper.py index 132171322..97629c596 100644 --- a/src/openedx_content/applets/backup_restore/zipper.py +++ b/src/openedx_content/applets/backup_restore/zipper.py @@ -774,8 +774,6 @@ def _save_collections(self, learning_package, collections): """Save collections and their entities.""" for valid_collection in collections.get("collections", []): entities = valid_collection.pop("entities", []) - # The archive format uses "key"; the API now expects "collection_code". - valid_collection["collection_code"] = valid_collection.pop("key") collection = collections_api.create_collection( learning_package.id, created_by=self.user_id, **valid_collection ) From 0614fcb80d7d4896d507c03fd91e063cb3ebe760 Mon Sep 17 00:00:00 2001 From: Kyle D McCormick Date: Thu, 16 Apr 2026 19:16:04 -0400 Subject: [PATCH 10/10] fix(squash): use source='collection_code' instead of validate() override Co-Authored-By: Claude Opus 4.6 (1M context) --- .../applets/backup_restore/serializers.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/openedx_content/applets/backup_restore/serializers.py b/src/openedx_content/applets/backup_restore/serializers.py index 1ed019c22..549f3280f 100644 --- a/src/openedx_content/applets/backup_restore/serializers.py +++ b/src/openedx_content/applets/backup_restore/serializers.py @@ -156,16 +156,12 @@ class CollectionSerializer(serializers.Serializer): # pylint: disable=abstract- Serializer for collections. """ title = serializers.CharField(required=True) - # Note: the model field is now Collection.collection_code, but the archive - # format still uses "key". A future v2 format may align the name. - key = serializers.CharField(required=True) + # The model field is now Collection.collection_code, but the archive format + # still uses "key". A future v2 format may align the name. + key = serializers.CharField(required=True, source="collection_code") description = serializers.CharField(required=True, allow_blank=True) entities = serializers.ListField( child=serializers.CharField(), required=True, allow_empty=True, ) - - def validate(self, attrs): - attrs["collection_code"] = attrs.pop("key") - return attrs