Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,14 @@ var/

# Ignore editor / IDE related data
.vscode/
.gemini/

# IntelliJ IDE, except project config
.idea/
/*.iml
.junie/
.aiassistant/
.aiignore
# ignore future updates to run configuration
.run/devserver.run.xml

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import csv
import logging
import time

from django.core.management.base import BaseCommand
from django.db.models import Exists
from django.db.models import FilteredRelation
from django.db.models import OuterRef
from django.db.models import Q
from django.db.models.expressions import F
from django_cte import With

from contentcuration.models import Channel
from contentcuration.models import ContentNode


logger = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Audits nodes that have imported content from public channels and whether the imported content
has a missing source node.

TODO: this does not yet FIX them
"""

def handle(self, *args, **options):
start = time.time()

public_cte = self.get_public_cte()

# preliminary filter on channels to those private and non-deleted, which have content
# lft=1 is always true for root nodes, so rght>2 means it actually has children
private_channels_cte = With(
Channel.objects.filter(
public=False,
deleted=False,
)
.annotate(
non_empty_main_tree=FilteredRelation(
"main_tree", condition=Q(main_tree__rght__gt=2)
),
)
.annotate(
tree_id=F("non_empty_main_tree__tree_id"),
)
.values("id", "name", "tree_id"),
name="dest_channel_cte",
)

# reduce the list of private channels to those that have an imported node
# from a public channel
destination_channels = (
private_channels_cte.queryset()
.with_cte(public_cte)
.with_cte(private_channels_cte)
.filter(
Exists(
public_cte.join(
ContentNode.objects.filter(
tree_id=OuterRef("tree_id"),
),
original_channel_id=public_cte.col.id,
)
)
)
.values("id", "name", "tree_id")
.order_by("id")
)

logger.info("=== Iterating over private destination channels. ===")
channel_count = 0
total_node_count = 0

with open("fix_missing_import_sources.csv", "w", newline="") as csv_file:
csv_writer = csv.DictWriter(
csv_file,
fieldnames=[
"channel_id",
"channel_name",
"contentnode_id",
"contentnode_title",
"public_channel_id",
"public_channel_name",
"public_channel_deleted",
],
)
csv_writer.writeheader()

for channel in destination_channels.iterator():
node_count = self.handle_channel(csv_writer, channel)

if node_count > 0:
total_node_count += node_count
channel_count += 1

logger.info("=== Done iterating over private destination channels. ===")
logger.info(f"Found {total_node_count} nodes across {channel_count} channels.")
logger.info(f"Finished in {time.time() - start}")

def get_public_cte(self) -> With:
# This CTE gets all public channels with their main tree info
return With(
Channel.objects.filter(public=True)
.annotate(
tree_id=F("main_tree__tree_id"),
)
.values("id", "name", "deleted", "tree_id"),
name="public_cte",
)

def handle_channel(self, csv_writer: csv.DictWriter, channel: dict) -> int:
public_cte = self.get_public_cte()
channel_id = channel["id"]
channel_name = channel["name"]
tree_id = channel["tree_id"]

missing_source_nodes = (
public_cte.join(
ContentNode.objects.filter(tree_id=tree_id),
original_channel_id=public_cte.col.id,
)
.with_cte(public_cte)
.annotate(
public_channel_id=public_cte.col.id,
public_channel_name=public_cte.col.name,
public_channel_deleted=public_cte.col.deleted,
)
.filter(
Q(public_channel_deleted=True)
| ~Exists(
ContentNode.objects.filter(
tree_id=public_cte.col.tree_id,
node_id=OuterRef("original_source_node_id"),
)
)
)
.values(
"public_channel_id",
"public_channel_name",
"public_channel_deleted",
contentnode_id=F("id"),
contentnode_title=F("title"),
)
)

# Count and log results
node_count = missing_source_nodes.count()

# TODO: this will be replaced with logic to correct the missing source nodes
if node_count > 0:
logger.info(
f"{channel_id}:{channel_name}\t{node_count} node(s) with missing source nodes."
)
row_dict = {
"channel_id": channel_id,
"channel_name": channel_name,
}
for node_dict in missing_source_nodes.iterator():
row_dict.update(node_dict)
csv_writer.writerow(row_dict)

return node_count
11 changes: 9 additions & 2 deletions contentcuration/contentcuration/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from django.utils import timezone
from django.utils.translation import gettext as _
from django_cte import CTEManager
from django_cte import CTEQuerySet
from django_cte import With
from le_utils import proquint
from le_utils.constants import content_kinds
Expand Down Expand Up @@ -837,7 +838,7 @@ def exists(self, *filters):
return Exists(self.queryset().filter(*filters).values("user_id"))


class ChannelModelQuerySet(models.QuerySet):
class ChannelModelQuerySet(CTEQuerySet):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

praise: Good approach — inheriting from CTEQuerySet instead of models.QuerySet keeps the custom create/update_or_create methods intact while adding CTE support. The ChannelModelManager via from_queryset is the correct Django pattern for this.

def create(self, **kwargs):
"""
Create a new object with the given kwargs, saving it to the database
Expand All @@ -863,6 +864,12 @@ def update_or_create(self, defaults=None, **kwargs):
return super().update_or_create(defaults, **kwargs)


class ChannelModelManager(models.Manager.from_queryset(ChannelModelQuerySet)):
"""Custom Channel models manager with CTE support"""

pass


class Channel(models.Model):
""" Permissions come from association with organizations """

Expand Down Expand Up @@ -994,7 +1001,7 @@ class Channel(models.Model):
]
)

objects = ChannelModelQuerySet.as_manager()
objects = ChannelModelManager()

@classmethod
def get_editable(cls, user, channel_id):
Expand Down
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from unittest.mock import mock_open
from unittest.mock import patch

from django.core.management import call_command

from contentcuration.tests import testdata
from contentcuration.tests.base import StudioTestCase


class CommandTestCase(StudioTestCase):
"""Test suite for the fix_missing_import_sources management command"""

def setUp(self):
open_patcher = patch(
"contentcuration.management.commands.fix_missing_import_sources.open",
mock_open(),
)
self.mock_open = open_patcher.start()
self.mock_file = self.mock_open.return_value
self.mock_file.__enter__.return_value = self.mock_file
self.addCleanup(open_patcher.stop)

csv_writer_patcher = patch(
"contentcuration.management.commands.fix_missing_import_sources.csv.DictWriter"
)
self.mock_csv_writer = csv_writer_patcher.start()
self.mock_csv_writer_instance = self.mock_csv_writer.return_value
self.addCleanup(csv_writer_patcher.stop)

self.public_channel = testdata.channel("Public Channel")
self.public_channel.public = True
self.public_channel.save()

self.private_channel = testdata.channel("Private Channel")

# see tree.json for this file
self.original_node = (
self.public_channel.main_tree.get_descendants()
.filter(node_id="00000000000000000000000000000003")
.first()
)
self.copied_node = self.original_node.copy_to(
target=self.private_channel.main_tree
)

def test_handle__opens_csv_file(self):
call_command("fix_missing_import_sources")

self.mock_open.assert_called_once_with(
"fix_missing_import_sources.csv", "w", newline=""
)

self.mock_csv_writer.assert_called_once_with(
self.mock_file,
fieldnames=[
"channel_id",
"channel_name",
"contentnode_id",
"contentnode_title",
"public_channel_id",
"public_channel_name",
"public_channel_deleted",
],
)

self.mock_csv_writer_instance.writeheader.assert_called_once()
self.mock_csv_writer_instance.writerow.assert_not_called()

def test_handle__finds_missing(self):
self.original_node.delete()
call_command("fix_missing_import_sources")

self.mock_csv_writer_instance.writerow.assert_called_once_with(
{
"channel_id": self.private_channel.id,
"channel_name": self.private_channel.name,
"contentnode_id": self.copied_node.id,
"contentnode_title": self.copied_node.title,
"public_channel_id": self.public_channel.id,
"public_channel_name": self.public_channel.name,
"public_channel_deleted": False,
}
)

def test_handle__finds_for_deleted_channel(self):
self.public_channel.deleted = True
self.public_channel.save(actor_id=testdata.user().id)
call_command("fix_missing_import_sources")

self.mock_csv_writer_instance.writerow.assert_called_once_with(
{
"channel_id": self.private_channel.id,
"channel_name": self.private_channel.name,
"contentnode_id": self.copied_node.id,
"contentnode_title": self.copied_node.title,
"public_channel_id": self.public_channel.id,
"public_channel_name": self.public_channel.name,
"public_channel_deleted": True,
}
)