From f504a6a946830f8bafd9573e7c84e120279a4b65 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 18 Apr 2025 16:12:33 +0200 Subject: [PATCH 1/2] feat: track publishing side-effects Create the PublishSideEffect model as the publishing analog for the DraftSideEffect model, and create these side effects for parent containers when one of the children is published. This allows us to efficiently query when any subtree of content was affected by a publish. --- openedx_learning/__init__.py | 2 +- .../apps/authoring/publishing/api.py | 133 ++++++++++++++++-- .../0009_create_publishsideeffect.py | 86 +++++++++++ .../authoring/publishing/models/__init__.py | 2 +- .../publishing/models/publish_log.py | 46 ++++++ .../apps/authoring/publishing/test_api.py | 124 ++++++++++++++++ 6 files changed, 378 insertions(+), 15 deletions(-) create mode 100644 openedx_learning/apps/authoring/publishing/migrations/0009_create_publishsideeffect.py diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index be9e6c288..bd62127ba 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -2,4 +2,4 @@ Open edX Learning ("Learning Core"). """ -__version__ = "0.26.0" +__version__ = "0.27.0" diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 6ba022ac1..10c1a1329 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -34,6 +34,7 @@ PublishableEntityVersionMixin, PublishLog, PublishLogRecord, + PublishSideEffect, ) from .models.publish_log import Published @@ -357,10 +358,9 @@ def publish_from_drafts( # TODO: this only handles one level deep and would need to be updated to support sections > subsections > units # Get the IDs of the ContainerVersion for any Containers whose drafts are slated to be published. - container_version_ids = ( - Container.objects.filter(publishable_entity__draft__in=draft_qset) - .values_list("publishable_entity__draft__version__containerversion__pk", flat=True) - ) + container_version_ids = draft_qset.filter(entity__container__isnull=False) \ + .values_list("version_id", flat=True) + if container_version_ids: # We are publishing at least one container. Check if it has any child components that aren't already slated # to be published. @@ -414,6 +414,9 @@ def publish_from_drafts( }, ) + # We calculate side-effects as the last step in the process. + _create_container_side_effects_for_publish_log(publish_log) + return publish_log @@ -614,6 +617,89 @@ def _add_to_existing_draft_change_log( return change +def _create_container_side_effects_for_publish_log(publish_log: PublishLog): + """ + Iterate through PublishLog and add the appropriate PublishSideEffects. + + A container is considered to have been "published" if any of its descendents + were published. So publishing a Component implicitly publishes any Units it + was in, which publishes the Subsections those Units were in, etc. If the + Container is not already in the PublishLog (i.e. its own metadata did not + change), then we create a PublishLogRecord where the old_version is equal + to the new_version. We also create a PublishSideEffect for every child + change that affects a container, regardless of whether that container also + changed. + + This is a slightly simplified version of the work we have to do with Drafts, + because we don't have to worry about being called from a bulk context or + whether we're calculating for an entire log or just one log record. + + Note: I first tried to combine this logic with its draft-related counterpart + because of their obvious similaries, but I felt that the resulting code was + overly confusing, and that it was better to just repeat ourselves slightly, + rather than to try to parameterize things like the model classes, and have + ugly if-else blocks to work out the small differences. + """ + # processed_entity_ids holds the entity IDs that we've already calculated + # side-effects for. This is to save us from recalculating side-effects for + # the same ancestor relationships over and over again. So if we're calling + # this function in a loop for all the Components in a Unit, we won't be + # recalculating the Unit's side-effect on its Subsection, and its + # Subsection's side-effect on its Section each time through the loop. + # It also guards against infinite parent-child relationship loops, though + # those aren't *supposed* to be allowed anyhow. + processed_entity_ids: set[int] = set() + for original_change in publish_log.records.all(): + changes_and_containers = [ + (original_change, container) + for container + in get_containers_with_entity( + original_change.entity_id, + ignore_pinned=True, + published=True, + ) + ] + while changes_and_containers: + change, container = changes_and_containers.pop() + + # If the container is not already in the PublishLog, we need to + # add it. Since it's being caused as a PublishSideEffect, we're + # going add it with the old_version == new_version convention, i.e. + # the *only* change is because of one its children. + container_published_version_pk = container.versioning.published.pk + container_change, _created = PublishLogRecord.objects.get_or_create( + publish_log=change.publish_log, + entity_id=container.pk, + defaults={ + 'old_version_id': container_published_version_pk, + 'new_version_id': container_published_version_pk + } + ) + + # Mark that change in the current loop has the side effect of + # changing the parent container. We'll do this regardless of whether + # the container version itself also changed. If a Unit has a + # Component and both the Unit and Component have their versions + # incremented, then the Unit has changed in both ways (the Unit's + # internal metadata as well as the new version of the child + # component). + PublishSideEffect.objects.get_or_create(cause=change, effect=container_change) + processed_entity_ids.add(change.entity_id) + + # Now we find the next layer up of containers. So if the originally + # passed in publishable_entity_id was for a Component, then the + # ``container`` we've been creating the side effect for in this loop + # is the Unit, and ``parents_of_container`` would be any Subsections + # that contain the Unit. + parents_of_container = get_containers_with_entity(container.pk, ignore_pinned=True) + + changes_and_containers.extend( + (container_change, container_parent) + for container_parent in parents_of_container + if container_parent.pk not in processed_entity_ids + ) + + def _create_container_side_effects_for_draft_change_log(change_log: DraftChangeLog): """ Iterate through the whole DraftChangeLog and process side-effects. @@ -685,7 +771,7 @@ def _create_container_side_effects_for_draft_change( # Now we find the next layer up of containers. So if the originally # passed in publishable_entity_id was for a Component, then the # ``container`` we've been creating the side effect for in this loop - # is the Unit, and ``parents_of_container`` would be any Sequences + # is the Unit, and ``parents_of_container`` would be any Subsections # that contain the Unit. parents_of_container = get_containers_with_entity(container.pk, ignore_pinned=True) @@ -1363,6 +1449,7 @@ def get_containers_with_entity( publishable_entity_pk: int, *, ignore_pinned=False, + published=False, ) -> QuerySet[Container]: """ [ 🛑 UNSTABLE ] @@ -1375,16 +1462,36 @@ def get_containers_with_entity( publishable_entity_pk: The ID of the PublishableEntity to search for. ignore_pinned: if true, ignore any pinned references to the entity. """ + relation_model = "published" if published else "draft" if ignore_pinned: - qs = Container.objects.filter( - # Note: these two conditions must be in the same filter() call, or the query won't be correct. - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_id=publishable_entity_pk, # pylint: disable=line-too-long # noqa: E501 - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_version_id=None, # pylint: disable=line-too-long # noqa: E501 - ) + # TODO: Do we need to run distinct() on this? Will fix in https://github.com/openedx/openedx-learning/issues/303 + filter_dict = { + # Note: these two conditions must be in the same filter() call, + # or the query won't be correct. + ( + f"publishable_entity__{relation_model}__version__" + "containerversion__entity_list__entitylistrow__entity_id" + ): publishable_entity_pk, + ( + f"publishable_entity__{relation_model}__version__" + "containerversion__entity_list__entitylistrow__entity_version_id" + ): None, + } + qs = Container.objects.filter(**filter_dict) else: - qs = Container.objects.filter( - publishable_entity__draft__version__containerversion__entity_list__entitylistrow__entity_id=publishable_entity_pk, # pylint: disable=line-too-long # noqa: E501 - ) + filter_dict = { + ( + f"publishable_entity__{relation_model}__version__" + "containerversion__entity_list__entitylistrow__entity_id" + ): publishable_entity_pk + } + qs = Container.objects.filter(**filter_dict) + # Could alternately do this query in two steps. Not sure which is more efficient; depends on how the DB plans it. + # # Find all the EntityLists that contain the given entity: + # lists = EntityList.objects.filter(entitylistrow__entity_id=publishable_entity_pk).values_list('pk', flat=True) + # qs = Container.objects.filter( + # publishable_entity__draft__version__containerversion__entity_list__in=lists + # ) return qs.order_by("pk").distinct() # Ordering is mostly for consistent test cases. diff --git a/openedx_learning/apps/authoring/publishing/migrations/0009_create_publishsideeffect.py b/openedx_learning/apps/authoring/publishing/migrations/0009_create_publishsideeffect.py new file mode 100644 index 000000000..03b3208b3 --- /dev/null +++ b/openedx_learning/apps/authoring/publishing/migrations/0009_create_publishsideeffect.py @@ -0,0 +1,86 @@ +# Generated by Django 4.2.16 on 2025-04-17 08:47 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("oel_publishing", "0008_alter_draftchangelogrecord_options_and_more"), + ] + + operations = [ + migrations.AlterModelOptions( + name="draftchangelogrecord", + options={ + "verbose_name": "Draft Change Log Record", + "verbose_name_plural": "Draft Change Log Records", + }, + ), + migrations.AlterModelOptions( + name="draftsideeffect", + options={ + "verbose_name": "Draft Side Effect", + "verbose_name_plural": "Draft Side Effects", + }, + ), + migrations.AlterField( + model_name="draftchangelogrecord", + name="draft_change_log", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="records", + to="oel_publishing.draftchangelog", + ), + ), + migrations.AlterField( + model_name="draftsideeffect", + name="effect", + field=models.ForeignKey( + on_delete=django.db.models.deletion.RESTRICT, + related_name="affected_by", + to="oel_publishing.draftchangelogrecord", + ), + ), + migrations.CreateModel( + name="PublishSideEffect", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "cause", + models.ForeignKey( + on_delete=django.db.models.deletion.RESTRICT, + related_name="causes", + to="oel_publishing.publishlogrecord", + ), + ), + ( + "effect", + models.ForeignKey( + on_delete=django.db.models.deletion.RESTRICT, + related_name="affected_by", + to="oel_publishing.publishlogrecord", + ), + ), + ], + options={ + "verbose_name": "Publish Side Effect", + "verbose_name_plural": "Publish Side Effects", + }, + ), + migrations.AddConstraint( + model_name="publishsideeffect", + constraint=models.UniqueConstraint( + fields=("cause", "effect"), name="oel_pub_pse_uniq_c_e" + ), + ), + ] diff --git a/openedx_learning/apps/authoring/publishing/models/__init__.py b/openedx_learning/apps/authoring/publishing/models/__init__.py index 7e13c426a..769774b37 100644 --- a/openedx_learning/apps/authoring/publishing/models/__init__.py +++ b/openedx_learning/apps/authoring/publishing/models/__init__.py @@ -17,7 +17,7 @@ from .draft_log import Draft, DraftChangeLog, DraftChangeLogRecord, DraftSideEffect from .entity_list import EntityList, EntityListRow from .learning_package import LearningPackage -from .publish_log import Published, PublishLog, PublishLogRecord +from .publish_log import Published, PublishLog, PublishLogRecord, PublishSideEffect from .publishable_entity import ( PublishableContentModelRegistry, PublishableEntity, diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index 037f1b8b1..e49a3837b 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -3,6 +3,7 @@ """ from django.conf import settings from django.db import models +from django.utils.translation import gettext_lazy as _ from openedx_learning.lib.fields import case_insensitive_char_field, immutable_uuid_field, manual_date_time_field @@ -148,3 +149,48 @@ class Published(models.Model): class Meta: verbose_name = "Published Entity" verbose_name_plural = "Published Entities" + + +class PublishSideEffect(models.Model): + """ + Model to track when a change in one Published entity affects others. + + Our first use case for this is that changes involving child components are + thought to affect parent Units, even if the parent's version doesn't change. + + Side-effects are recorded in a collapsed form that only captures one level. + So if Components C1 and C2 are both published and they are part of Unit U1, + which is in turn a part of Subsection SS1, then the PublishSideEffect + entries are:: + + (C1, U1) + (C2, U1) + (U1, SS1) + + We do not keep entries for (C1, SS1) or (C2, SS1). This is to make the model + simpler, so we don't have to differentiate between direct side-effects and + transitive side-effects in the model. + .. no_pii: + """ + cause = models.ForeignKey( + PublishLogRecord, + on_delete=models.RESTRICT, + related_name='causes', + ) + effect = models.ForeignKey( + PublishLogRecord, + on_delete=models.RESTRICT, + related_name='affected_by', + ) + + class Meta: + constraints = [ + # Duplicate entries for cause & effect are just redundant. This is + # here to guard against weird bugs that might introduce this state. + models.UniqueConstraint( + fields=["cause", "effect"], + name="oel_pub_pse_uniq_c_e", + ) + ] + verbose_name = _("Publish Side Effect") + verbose_name_plural = _("Publish Side Effects") diff --git a/tests/openedx_learning/apps/authoring/publishing/test_api.py b/tests/openedx_learning/apps/authoring/publishing/test_api.py index b9e45ec91..618ab0a0e 100644 --- a/tests/openedx_learning/apps/authoring/publishing/test_api.py +++ b/tests/openedx_learning/apps/authoring/publishing/test_api.py @@ -19,6 +19,7 @@ DraftSideEffect, LearningPackage, PublishableEntity, + PublishLog, ) from openedx_learning.lib.test_utils import TestCase @@ -832,6 +833,108 @@ def test_multiple_draft_changes_all_cancel_out(self) -> None: assert DraftChangeLog.objects.all().count() == 1 +class PublishLogTestCase(TestCase): + """ + Test basic operations with PublishLogs and PublishSideEffects. + """ + now: datetime + learning_package_1: LearningPackage + learning_package_2: LearningPackage + + @classmethod + def setUpTestData(cls) -> None: + cls.now = datetime(2024, 1, 28, 16, 45, 30, tzinfo=timezone.utc) + cls.learning_package_1 = publishing_api.create_learning_package( + "my_package_key_1", + "PublishLog Testing LearningPackage 🔥 1", + created=cls.now, + ) + cls.learning_package_2 = publishing_api.create_learning_package( + "my_package_key_2", + "PublishLog Testing LearningPackage 🔥 2", + created=cls.now, + ) + + def test_simple_publish_log(self) -> None: + """ + Simplest test that multiple writes make it into one PublishLog. + """ + with publishing_api.bulk_draft_changes_for(self.learning_package_1.id): + entity1 = publishing_api.create_publishable_entity( + self.learning_package_1.id, + "my_entity", + created=self.now, + created_by=None, + ) + entity1_v1 = publishing_api.create_publishable_entity_version( + entity1.id, + version_num=1, + title="An Entity 🌴", + created=self.now, + created_by=None, + ) + entity2 = publishing_api.create_publishable_entity( + self.learning_package_1.id, + "my_entity2", + created=self.now, + created_by=None, + ) + entity2_v1 = publishing_api.create_publishable_entity_version( + entity2.id, + version_num=1, + title="An Entity 🌴 2", + created=self.now, + created_by=None, + ) + + # Check simplest publish of two things... + publish_log = publishing_api.publish_all_drafts(self.learning_package_1.id) + assert PublishLog.objects.all().count() == 1 + assert publish_log.records.all().count() == 2 + + record_1 = publish_log.records.get(entity=entity1) + assert record_1 is not None + assert record_1.old_version is None + assert record_1.new_version == entity1_v1 + + record_2 = publish_log.records.get(entity=entity2) + assert record_2 is not None + assert record_2.old_version is None + assert record_2.new_version == entity2_v1 + + # Check that an empty publish still creates a PublishLog, just one with + # no records. This may be useful for triggering tasks that trigger off + # of publishing events, though I'm not confident about that at the + # moment. + publish_log = publishing_api.publish_all_drafts(self.learning_package_1.id) + assert publish_log.records.count() == 0 + + # Check that we can publish a subset... + entity1_v2 = publishing_api.create_publishable_entity_version( + entity1.id, + version_num=2, + title="An Entity 🌴", + created=self.now, + created_by=None, + ) + publishing_api.create_publishable_entity_version( + entity2.id, + version_num=2, + title="An Entity 🌴 2", + created=self.now, + created_by=None, + ) + publish_e1_log = publishing_api.publish_from_drafts( + self.learning_package_1.id, + Draft.objects.filter(pk=entity1.pk), + ) + assert publish_e1_log.records.count() == 1 + e1_pub_record = publish_e1_log.records.get(entity=entity1) + assert e1_pub_record is not None + assert e1_pub_record.old_version == entity1_v1 + assert e1_pub_record.new_version == entity1_v2 + + class ContainerTestCase(TestCase): """ Test basic operations with Drafts. @@ -1075,3 +1178,24 @@ def test_multiple_layers_of_containers(self): assert unit_change.affected_by.first().cause == component_change assert subsection_change.affected_by.count() == 1 assert subsection_change.affected_by.first().cause == unit_change + + publish_log = publishing_api.publish_all_drafts(self.learning_package.id) + assert publish_log.records.count() == 3 + + publishing_api.create_publishable_entity_version( + component.pk, version_num=3, title="Component v2", created=self.now, created_by=None, + ) + publish_log = publishing_api.publish_from_drafts( + self.learning_package.id, + Draft.objects.filter(entity_id=component.pk), + ) + assert publish_log.records.count() == 3 + component_publish = publish_log.records.get(entity=component) + unit_publish = publish_log.records.get(entity=unit.publishable_entity) + subsection_publish = publish_log.records.get(entity=subsection.publishable_entity) + + assert not component_publish.affected_by.exists() + assert unit_publish.affected_by.count() == 1 + assert unit_publish.affected_by.first().cause == component_publish + assert subsection_publish.affected_by.count() == 1 + assert subsection_publish.affected_by.first().cause == unit_publish From d69988a81cdcd9047965f381f775f1618a0edd61 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 15 May 2025 19:35:08 -0400 Subject: [PATCH 2/2] fixup!: based on review feedback --- openedx_learning/apps/authoring/publishing/api.py | 8 +------- .../apps/authoring/publishing/models/publish_log.py | 9 +++++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 10c1a1329..a60950670 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -1464,7 +1464,6 @@ def get_containers_with_entity( """ relation_model = "published" if published else "draft" if ignore_pinned: - # TODO: Do we need to run distinct() on this? Will fix in https://github.com/openedx/openedx-learning/issues/303 filter_dict = { # Note: these two conditions must be in the same filter() call, # or the query won't be correct. @@ -1486,12 +1485,7 @@ def get_containers_with_entity( ): publishable_entity_pk } qs = Container.objects.filter(**filter_dict) - # Could alternately do this query in two steps. Not sure which is more efficient; depends on how the DB plans it. - # # Find all the EntityLists that contain the given entity: - # lists = EntityList.objects.filter(entitylistrow__entity_id=publishable_entity_pk).values_list('pk', flat=True) - # qs = Container.objects.filter( - # publishable_entity__draft__version__containerversion__entity_list__in=lists - # ) + return qs.order_by("pk").distinct() # Ordering is mostly for consistent test cases. diff --git a/openedx_learning/apps/authoring/publishing/models/publish_log.py b/openedx_learning/apps/authoring/publishing/models/publish_log.py index e49a3837b..5a244b1b0 100644 --- a/openedx_learning/apps/authoring/publishing/models/publish_log.py +++ b/openedx_learning/apps/authoring/publishing/models/publish_log.py @@ -62,6 +62,15 @@ class PublishLogRecord(models.Model): To revert a publish, we would make a new publish that swaps ``old_version`` and ``new_version`` field values. + + If the old_version and new_version of a PublishLogRecord match, it means + that the definition of the entity itself did not change (i.e. no new + PublishableEntityVersion was created), but something else was published that + had the side-effect of changing the published state of this entity. For + instance, if a Unit has unpinned references to its child Components (which + it almost always will), then publishing one of those Components will alter + the published state of the Unit, even if the UnitVersion does not change. In + that case, we still consider the Unit to have been "published". """ publish_log = models.ForeignKey(