-
Notifications
You must be signed in to change notification settings - Fork 286
Add management command for auditing missing sources #5711
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
bjester
merged 3 commits into
learningequality:hotfixes
from
bjester:incompletely-synced
Feb 18, 2026
+277
−2
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
164 changes: 164 additions & 0 deletions
164
contentcuration/contentcuration/management/commands/fix_missing_import_sources.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| import csv | ||
| import logging | ||
| import time | ||
|
|
||
| from django.core.management.base import BaseCommand | ||
| from django.db.models import Exists | ||
| from django.db.models import FilteredRelation | ||
| from django.db.models import OuterRef | ||
| from django.db.models import Q | ||
| from django.db.models.expressions import F | ||
| from django_cte import With | ||
|
|
||
| from contentcuration.models import Channel | ||
| from contentcuration.models import ContentNode | ||
|
|
||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class Command(BaseCommand): | ||
| """ | ||
| Audits nodes that have imported content from public channels and whether the imported content | ||
| has a missing source node. | ||
|
|
||
| TODO: this does not yet FIX them | ||
| """ | ||
|
|
||
| def handle(self, *args, **options): | ||
| start = time.time() | ||
|
|
||
| public_cte = self.get_public_cte() | ||
|
|
||
| # preliminary filter on channels to those private and non-deleted, which have content | ||
| # lft=1 is always true for root nodes, so rght>2 means it actually has children | ||
| private_channels_cte = With( | ||
| Channel.objects.filter( | ||
| public=False, | ||
| deleted=False, | ||
| ) | ||
| .annotate( | ||
| non_empty_main_tree=FilteredRelation( | ||
| "main_tree", condition=Q(main_tree__rght__gt=2) | ||
| ), | ||
| ) | ||
| .annotate( | ||
| tree_id=F("non_empty_main_tree__tree_id"), | ||
| ) | ||
| .values("id", "name", "tree_id"), | ||
| name="dest_channel_cte", | ||
| ) | ||
|
|
||
| # reduce the list of private channels to those that have an imported node | ||
| # from a public channel | ||
| destination_channels = ( | ||
| private_channels_cte.queryset() | ||
| .with_cte(public_cte) | ||
| .with_cte(private_channels_cte) | ||
| .filter( | ||
| Exists( | ||
| public_cte.join( | ||
| ContentNode.objects.filter( | ||
| tree_id=OuterRef("tree_id"), | ||
| ), | ||
| original_channel_id=public_cte.col.id, | ||
| ) | ||
| ) | ||
| ) | ||
| .values("id", "name", "tree_id") | ||
| .order_by("id") | ||
| ) | ||
|
|
||
| logger.info("=== Iterating over private destination channels. ===") | ||
| channel_count = 0 | ||
| total_node_count = 0 | ||
|
|
||
| with open("fix_missing_import_sources.csv", "w", newline="") as csv_file: | ||
| csv_writer = csv.DictWriter( | ||
| csv_file, | ||
| fieldnames=[ | ||
| "channel_id", | ||
| "channel_name", | ||
| "contentnode_id", | ||
| "contentnode_title", | ||
| "public_channel_id", | ||
| "public_channel_name", | ||
| "public_channel_deleted", | ||
| ], | ||
| ) | ||
| csv_writer.writeheader() | ||
|
|
||
| for channel in destination_channels.iterator(): | ||
| node_count = self.handle_channel(csv_writer, channel) | ||
|
|
||
| if node_count > 0: | ||
| total_node_count += node_count | ||
| channel_count += 1 | ||
|
|
||
| logger.info("=== Done iterating over private destination channels. ===") | ||
| logger.info(f"Found {total_node_count} nodes across {channel_count} channels.") | ||
| logger.info(f"Finished in {time.time() - start}") | ||
|
|
||
| def get_public_cte(self) -> With: | ||
| # This CTE gets all public channels with their main tree info | ||
| return With( | ||
| Channel.objects.filter(public=True) | ||
| .annotate( | ||
| tree_id=F("main_tree__tree_id"), | ||
| ) | ||
| .values("id", "name", "deleted", "tree_id"), | ||
| name="public_cte", | ||
| ) | ||
|
|
||
| def handle_channel(self, csv_writer: csv.DictWriter, channel: dict) -> int: | ||
| public_cte = self.get_public_cte() | ||
rtibbles marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| channel_id = channel["id"] | ||
| channel_name = channel["name"] | ||
| tree_id = channel["tree_id"] | ||
|
|
||
| missing_source_nodes = ( | ||
| public_cte.join( | ||
| ContentNode.objects.filter(tree_id=tree_id), | ||
| original_channel_id=public_cte.col.id, | ||
| ) | ||
| .with_cte(public_cte) | ||
| .annotate( | ||
| public_channel_id=public_cte.col.id, | ||
| public_channel_name=public_cte.col.name, | ||
| public_channel_deleted=public_cte.col.deleted, | ||
| ) | ||
| .filter( | ||
| Q(public_channel_deleted=True) | ||
| | ~Exists( | ||
| ContentNode.objects.filter( | ||
| tree_id=public_cte.col.tree_id, | ||
| node_id=OuterRef("original_source_node_id"), | ||
| ) | ||
| ) | ||
| ) | ||
| .values( | ||
| "public_channel_id", | ||
| "public_channel_name", | ||
| "public_channel_deleted", | ||
| contentnode_id=F("id"), | ||
| contentnode_title=F("title"), | ||
| ) | ||
| ) | ||
|
|
||
| # Count and log results | ||
| node_count = missing_source_nodes.count() | ||
|
|
||
| # TODO: this will be replaced with logic to correct the missing source nodes | ||
| if node_count > 0: | ||
| logger.info( | ||
| f"{channel_id}:{channel_name}\t{node_count} node(s) with missing source nodes." | ||
| ) | ||
| row_dict = { | ||
| "channel_id": channel_id, | ||
| "channel_name": channel_name, | ||
| } | ||
| for node_dict in missing_source_nodes.iterator(): | ||
| row_dict.update(node_dict) | ||
| csv_writer.writerow(row_dict) | ||
|
|
||
| return node_count | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,6 +47,7 @@ | |
| from django.utils import timezone | ||
| from django.utils.translation import gettext as _ | ||
| from django_cte import CTEManager | ||
| from django_cte import CTEQuerySet | ||
| from django_cte import With | ||
| from le_utils import proquint | ||
| from le_utils.constants import content_kinds | ||
|
|
@@ -837,7 +838,7 @@ def exists(self, *filters): | |
| return Exists(self.queryset().filter(*filters).values("user_id")) | ||
|
|
||
|
|
||
| class ChannelModelQuerySet(models.QuerySet): | ||
| class ChannelModelQuerySet(CTEQuerySet): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. praise: Good approach — inheriting from |
||
| def create(self, **kwargs): | ||
| """ | ||
| Create a new object with the given kwargs, saving it to the database | ||
|
|
@@ -863,6 +864,12 @@ def update_or_create(self, defaults=None, **kwargs): | |
| return super().update_or_create(defaults, **kwargs) | ||
|
|
||
|
|
||
| class ChannelModelManager(models.Manager.from_queryset(ChannelModelQuerySet)): | ||
| """Custom Channel models manager with CTE support""" | ||
|
|
||
| pass | ||
|
|
||
|
|
||
| class Channel(models.Model): | ||
| """ Permissions come from association with organizations """ | ||
|
|
||
|
|
@@ -994,7 +1001,7 @@ class Channel(models.Model): | |
| ] | ||
| ) | ||
|
|
||
| objects = ChannelModelQuerySet.as_manager() | ||
| objects = ChannelModelManager() | ||
|
|
||
| @classmethod | ||
| def get_editable(cls, user, channel_id): | ||
|
|
||
Empty file.
Empty file.
100 changes: 100 additions & 0 deletions
100
contentcuration/contentcuration/tests/management/commands/test_fix_missing_import_sources.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| from unittest.mock import mock_open | ||
| from unittest.mock import patch | ||
|
|
||
| from django.core.management import call_command | ||
|
|
||
| from contentcuration.tests import testdata | ||
| from contentcuration.tests.base import StudioTestCase | ||
|
|
||
|
|
||
| class CommandTestCase(StudioTestCase): | ||
| """Test suite for the fix_missing_import_sources management command""" | ||
|
|
||
| def setUp(self): | ||
| open_patcher = patch( | ||
| "contentcuration.management.commands.fix_missing_import_sources.open", | ||
| mock_open(), | ||
| ) | ||
| self.mock_open = open_patcher.start() | ||
| self.mock_file = self.mock_open.return_value | ||
| self.mock_file.__enter__.return_value = self.mock_file | ||
| self.addCleanup(open_patcher.stop) | ||
|
|
||
| csv_writer_patcher = patch( | ||
| "contentcuration.management.commands.fix_missing_import_sources.csv.DictWriter" | ||
| ) | ||
| self.mock_csv_writer = csv_writer_patcher.start() | ||
| self.mock_csv_writer_instance = self.mock_csv_writer.return_value | ||
| self.addCleanup(csv_writer_patcher.stop) | ||
|
|
||
| self.public_channel = testdata.channel("Public Channel") | ||
| self.public_channel.public = True | ||
| self.public_channel.save() | ||
|
|
||
| self.private_channel = testdata.channel("Private Channel") | ||
|
|
||
| # see tree.json for this file | ||
| self.original_node = ( | ||
| self.public_channel.main_tree.get_descendants() | ||
| .filter(node_id="00000000000000000000000000000003") | ||
| .first() | ||
| ) | ||
| self.copied_node = self.original_node.copy_to( | ||
| target=self.private_channel.main_tree | ||
| ) | ||
|
|
||
| def test_handle__opens_csv_file(self): | ||
| call_command("fix_missing_import_sources") | ||
|
|
||
| self.mock_open.assert_called_once_with( | ||
| "fix_missing_import_sources.csv", "w", newline="" | ||
| ) | ||
|
|
||
| self.mock_csv_writer.assert_called_once_with( | ||
| self.mock_file, | ||
| fieldnames=[ | ||
| "channel_id", | ||
| "channel_name", | ||
| "contentnode_id", | ||
| "contentnode_title", | ||
| "public_channel_id", | ||
| "public_channel_name", | ||
| "public_channel_deleted", | ||
| ], | ||
| ) | ||
|
|
||
| self.mock_csv_writer_instance.writeheader.assert_called_once() | ||
| self.mock_csv_writer_instance.writerow.assert_not_called() | ||
|
|
||
| def test_handle__finds_missing(self): | ||
| self.original_node.delete() | ||
| call_command("fix_missing_import_sources") | ||
|
|
||
| self.mock_csv_writer_instance.writerow.assert_called_once_with( | ||
| { | ||
| "channel_id": self.private_channel.id, | ||
| "channel_name": self.private_channel.name, | ||
| "contentnode_id": self.copied_node.id, | ||
| "contentnode_title": self.copied_node.title, | ||
| "public_channel_id": self.public_channel.id, | ||
| "public_channel_name": self.public_channel.name, | ||
| "public_channel_deleted": False, | ||
| } | ||
| ) | ||
|
|
||
| def test_handle__finds_for_deleted_channel(self): | ||
| self.public_channel.deleted = True | ||
| self.public_channel.save(actor_id=testdata.user().id) | ||
| call_command("fix_missing_import_sources") | ||
|
|
||
| self.mock_csv_writer_instance.writerow.assert_called_once_with( | ||
| { | ||
| "channel_id": self.private_channel.id, | ||
| "channel_name": self.private_channel.name, | ||
| "contentnode_id": self.copied_node.id, | ||
| "contentnode_title": self.copied_node.title, | ||
| "public_channel_id": self.public_channel.id, | ||
| "public_channel_name": self.public_channel.name, | ||
| "public_channel_deleted": True, | ||
| } | ||
| ) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.