diff --git a/Makefile b/Makefile index 051053bab3..619fcee41e 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ migrate: # 4) Remove the management command from this `deploy-migrate` recipe # 5) Repeat! deploy-migrate: - echo "Nothing to do here!" + python contentcuration/manage.py rectify_incorrect_contentnode_source_fields contentnodegc: python contentcuration/manage.py garbage_collect diff --git a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py new file mode 100644 index 0000000000..a563fb712a --- /dev/null +++ b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py @@ -0,0 +1,182 @@ +# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS +import datetime +import uuid + +from django.core.management import call_command +from django.utils import timezone +from le_utils.constants import content_kinds + +from contentcuration.models import Channel +from contentcuration.models import ContentNode +from contentcuration.models import License +from contentcuration.tests import testdata +from contentcuration.tests.base import StudioAPITestCase +from contentcuration.utils.publish import publish_channel + + +class TestRectifyMigrationCommand(StudioAPITestCase): + + @classmethod + def setUpClass(cls): + super(TestRectifyMigrationCommand, cls).setUpClass() + + def tearDown(self): + super(TestRectifyMigrationCommand, self).tearDown() + + def setUp(self): + super(TestRectifyMigrationCommand, self).setUp() + self.original_channel = testdata.channel() + self.license_original = License.objects.all()[0] + self.original_contentnode = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="Original Node", + parent=self.original_channel.main_tree, + license=self.license_original, + original_channel_id=None, + source_channel_id=None, + author="old author" + ) + self.user = testdata.user() + self.original_channel.editors.add(self.user) + self.client.force_authenticate(user=self.user) + + def create_base_channel_and_contentnode(self, source_contentnode, source_channel): + base_channel = testdata.channel() + base_channel.public = True + base_channel.save() + license_changed = License.objects.all()[2] + base_node = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="base contentnode", + parent=base_channel.main_tree, + kind_id=content_kinds.VIDEO, + original_channel_id=self.original_channel.id, + original_source_node_id=self.original_contentnode.node_id, + source_channel_id=source_channel.id, + source_node_id=source_contentnode.node_id, + author="source author", + license=license_changed, + ) + return base_node, base_channel + + def create_source_channel_and_contentnode(self): + source_channel = testdata.channel() + source_channel.public = True + source_channel.save() + license_changed = License.objects.all()[1] + source_node = ContentNode.objects.create( + id=uuid.uuid4().hex, + title="base contentnode", + parent=source_channel.main_tree, + kind_id=content_kinds.VIDEO, + license=license_changed, + original_channel_id=self.original_channel.id, + source_channel_id=self.original_channel.id, + source_node_id=self.original_contentnode.node_id, + original_source_node_id=self.original_contentnode.node_id, + author="source author", + ) + + return source_node, source_channel + + def run_migrations(self): + call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True) + + def test_two_node_case(self): + base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel) + + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + # main_tree node still has changed=true even after the publish + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + print(node.id) + print(node.id == base_channel.main_tree.id) + print("checking if the node is complete or not ", node.complete) + node.changed = False + # This should probably again change the changed=true but suprisingly it doesnot + # Meaning the changed boolean doesnot change for the main_tree no matter what we do + # through ContentNode model methods like save. + node.save() + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + + self.run_migrations() + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + self.assertEqual(updated_base_node.license, self.original_contentnode.license) + self.assertEqual(updated_base_node.author, self.original_contentnode.author) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + + def test_three_node_case_implicit(self): + source_node, source_channel = self.create_source_channel_and_contentnode() + base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) + source_node.aggregator = "Nami" + source_node.save() + # Implicit case + base_node.author = source_node.author + base_node.license = source_node.license + base_node.aggregator = source_node.aggregator + base_node.save() + + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + print(node.id) + print(node.id == base_channel.main_tree.id) + print("checking if the node is complete or not ", node.complete) + node.changed = False + node.save() + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + + ContentNode.objects.filter(pk=source_node.pk).update( + modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) + ) + + self.run_migrations() + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + updated_source_node = ContentNode.objects.get(pk=source_node.pk) + self.assertEqual(updated_base_node.license, self.original_contentnode.license) + self.assertEqual(updated_base_node.author, self.original_contentnode.author) + self.assertEqual(updated_source_node.license, self.original_contentnode.license) + self.assertEqual(updated_source_node.author, self.original_contentnode.author) + self.assertEqual(updated_source_node.aggregator, self.original_contentnode.aggregator) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) + + def test_three_node_case_explicit(self): + source_node, source_channel = self.create_source_channel_and_contentnode() + base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) + source_node.provider = "luffy" + base_node.provider = "zoro" + base_node.save() + source_node.save() + publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) + + for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): + print(node.id) + print(node.id == base_channel.main_tree.id) + print("checking if the node is complete or not ", node.complete) + node.changed = False + node.save() + + ContentNode.objects.filter(pk=base_node.pk).update( + modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) + ) + + ContentNode.objects.filter(pk=source_node.pk).update( + modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) + ) + + self.run_migrations() + updated_base_node = ContentNode.objects.get(pk=base_node.pk) + updated_source_node = ContentNode.objects.get(pk=source_node.pk) + self.assertEqual(updated_base_node.license, self.original_contentnode.license) + self.assertEqual(updated_base_node.author, self.original_contentnode.author) + self.assertEqual(updated_base_node.provider, self.original_contentnode.provider) + self.assertEqual(updated_source_node.license, self.original_contentnode.license) + self.assertEqual(updated_source_node.author, self.original_contentnode.author) + self.assertEqual(updated_source_node.provider, self.original_contentnode.provider) + self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py new file mode 100644 index 0000000000..2df2aad4bb --- /dev/null +++ b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py @@ -0,0 +1,161 @@ +import datetime +import logging + +from django.core.management.base import BaseCommand +from django.db.models import Exists +from django.db.models import F +from django.db.models import OuterRef +from django.db.models import Q +from django.db.models import Value +from django.db.models.functions import Coalesce +from django.utils import timezone +from django_cte import With + +from contentcuration.models import Channel +from contentcuration.models import ContentNode +from contentcuration.models import User +from contentcuration.utils.publish import publish_channel + +logger = logging.getLogger(__file__) + + +class Command(BaseCommand): + + def add_arguments(self, parser): + + parser.add_argument( + '--is_test', + action='store_true', + help="Indicate if the command is running in a test environment.", + ) + + parser.add_argument( + '--user_id', + type=int, + help="User ID for the operation", + required=True + ) + + def handle(self, *args, **options): + # Filter Date : July 9, 2023 + # Link https://github.com/learningequality/studio/pull/4189 + # The PR date for the frontend change is July 10, 2023 + # we would set the filter day one day back just to be sure + + is_test = options['is_test'] + user_id = options['user_id'] + + if not is_test: + user_id = User.objects.get(email='channeladmin@learningequality.org').pk + + filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) + main_trees_cte = With( + ( + Channel.objects.filter( + main_tree__isnull=False + ) + .annotate(channel_id=F("id")) + .values("channel_id", "deleted", tree_id=F("main_tree__tree_id")) + ), + name="main_trees", + ) + + nodes = main_trees_cte.join( + ContentNode.objects.all(), + tree_id=main_trees_cte.col.tree_id, + ).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted) + + original_source_nodes = ( + nodes.with_cte(main_trees_cte) + .filter( + node_id=OuterRef("original_source_node_id"), + ) + .exclude( + tree_id=OuterRef("tree_id"), + ) + .annotate( + coalesced_provider=Coalesce("provider", Value("")), + coalesced_author=Coalesce("author", Value("")), + coalesced_aggregator=Coalesce("aggregator", Value("")), + coalesced_license_id=Coalesce("license_id", -1), + ) + ) + # We filter out according to last_modified date before this PR + # https://github.com/learningequality/studio/pull/4189 + # As we want to lossen up the public=True Filter and open the + # migration for all the nodes even if they are not published + diff = ( + nodes.with_cte(main_trees_cte).filter( + deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes + source_node_id__isnull=False, + original_source_node_id__isnull=False, + modified__lt=filter_date + ) + ).annotate( + coalesced_provider=Coalesce("provider", Value("")), + coalesced_author=Coalesce("author", Value("")), + coalesced_aggregator=Coalesce("aggregator", Value("")), + coalesced_license_id=Coalesce("license_id", -1), + ) + diff_combined = diff.annotate( + original_source_node_f_changed=Exists( + original_source_nodes.filter( + ~Q(coalesced_provider=OuterRef("coalesced_provider")) + | ~Q(coalesced_author=OuterRef("coalesced_author")) + | ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator")) + | ~Q(coalesced_license_id=OuterRef("coalesced_license_id")) + ) + ) + ).filter(original_source_node_f_changed=True) + + final_nodes = diff_combined.values( + "id", + "channel_id", + "original_channel_id", + "original_source_node_id", + "coalesced_provider", + "coalesced_author", + "coalesced_aggregator", + "coalesced_license_id", + "original_source_node_f_changed", + ).order_by() + + channel_ids_to_republish = set() + + for item in final_nodes: + base_node = ContentNode.objects.get(pk=item["id"]) + + original_source_channel_id = item["original_channel_id"] + original_source_node_id = item["original_source_node_id"] + tree_id = ( + Channel.objects.filter(pk=original_source_channel_id) + .values_list("main_tree__tree_id", flat=True) + .get() + ) + original_source_node = ContentNode.objects.filter( + tree_id=tree_id, node_id=original_source_node_id + ) + + base_channel = Channel.objects.get(pk=item['channel_id']) + + to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) + + if original_source_channel_id is not None and original_source_node.exists(): + # original source node exists and its source fields dont match + # update the base node + if base_node.author != original_source_node[0].author: + base_node.author = original_source_node[0].author + if base_node.provider != original_source_node[0].provider: + base_node.provider = original_source_node[0].provider + if base_node.aggregator != original_source_node[0].aggregator: + base_node.aggregator = original_source_node[0].aggregator + if base_node.license != original_source_node[0].license: + base_node.license = original_source_node[0].license + base_node.save() + + if to_be_republished and base_channel.last_published is not None: + channel_ids_to_republish.add(base_channel.id) + + # we would repbulish the channel + for channel_id in channel_ids_to_republish: + publish_channel(user_id, channel_id)