diff --git a/.run/devserver.run.xml b/.run/devserver.run.xml
index 1c94ee6402..55b6546404 100644
--- a/.run/devserver.run.xml
+++ b/.run/devserver.run.xml
@@ -13,7 +13,7 @@
-
+
diff --git a/Makefile b/Makefile
index 002d337323..0e9f16d0d8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,13 @@
-# standalone install method
-DOCKER_COMPOSE = docker-compose
+SHELL := /bin/bash
-# support new plugin installation for docker-compose
-ifeq (, $(shell which docker-compose))
+# new plugin installation method for docker compose
DOCKER_COMPOSE = docker compose
+
+WEBPACK_CMD = $(if $(filter $(WEBPACK_MODE),hot),pnpm run build:dev:hot, pnpm run build:dev)
+
+# support fallback to old docker-compose
+ifeq (, $(shell $DOCKER_COMPOSE version 2>/dev/null))
+ DOCKER_COMPOSE = docker-compose
endif
###############################################################
@@ -133,9 +137,6 @@ dummyusers:
cd contentcuration/ && python manage.py loaddata contentcuration/fixtures/admin_user.json
cd contentcuration/ && python manage.py loaddata contentcuration/fixtures/admin_user_token.json
-hascaptions:
- python contentcuration/manage.py set_orm_based_has_captions
-
BRANCH_NAME := $(shell git rev-parse --abbrev-ref HEAD | sed 's/[^a-zA-Z0-9_-]/-/g')
export COMPOSE_PROJECT_NAME=studio_$(BRANCH_NAME)
@@ -149,9 +150,31 @@ destroy-and-recreate-database: purge-postgres setup
devceleryworkers:
$(MAKE) -e DJANGO_SETTINGS_MODULE=contentcuration.dev_settings prodceleryworkers
-run-services:
+devrun-django:
+ python contentcuration/manage.py runserver --settings=contentcuration.dev_settings 0.0.0.0:8081
+
+devrun-server:
+ set -ex; \
+ function _on_interrupt() { $(DOCKER_COMPOSE) stop studio-nginx; }; \
+ trap _on_interrupt SIGINT SIGTERM SIGKILL ERR; \
+ $(DOCKER_COMPOSE) up -d studio-nginx; \
+ $(MAKE) -j 2 devrun-django devrun-webpack
+
+devrun-server-hot:
+ $(MAKE) -e devrun-server WEBPACK_MODE=hot
+
+devrun-services:
$(MAKE) -j 2 dcservicesup devceleryworkers
+devrun-setup:
+ python contentcuration/manage.py setup --settings=contentcuration.dev_settings
+
+devrun-shell:
+ python contentcuration/manage.py shell --settings=contentcuration.dev_settings
+
+devrun-webpack:
+ $(WEBPACK_CMD)
+
.docker/minio:
mkdir -p $@
@@ -172,7 +195,7 @@ dcbuild:
dcup: .docker/minio .docker/postgres
# run all services except for cloudprober
- $(DOCKER_COMPOSE) up studio-app celery-worker
+ $(DOCKER_COMPOSE) up studio-nginx studio-app
dcup-cloudprober: .docker/minio .docker/postgres
# run all services including cloudprober
@@ -200,8 +223,8 @@ dctest: .docker/minio .docker/postgres
dcservicesup: .docker/minio .docker/postgres
# launch all studio's dependent services using docker-compose
- $(DOCKER_COMPOSE) -f docker-compose.yml -f docker-compose.alt.yml up minio postgres redis
+ $(DOCKER_COMPOSE) up minio postgres redis
dcservicesdown:
# stop services that were started using dcservicesup
- $(DOCKER_COMPOSE) -f docker-compose.yml -f docker-compose.alt.yml down
+ $(DOCKER_COMPOSE) down
diff --git a/contentcuration/contentcuration/management/commands/restore_channel.py b/contentcuration/contentcuration/management/commands/restore_channel.py
index efaeb3ee7c..16b3976228 100644
--- a/contentcuration/contentcuration/management/commands/restore_channel.py
+++ b/contentcuration/contentcuration/management/commands/restore_channel.py
@@ -2,28 +2,65 @@
from django.core.management.base import BaseCommand
-from contentcuration.utils.import_tools import import_channel
+from contentcuration.utils.import_tools import ImportManager
-logger = logging.getLogger('command')
+logger = logging.getLogger("command")
class Command(BaseCommand):
+ """
+ This command is used to restore a channel from another Studio instance. This is for
+ development purposes only and should not be used in production.
+ """
def add_arguments(self, parser):
# ID of channel to read data from
- parser.add_argument('source_id', type=str)
+ parser.add_argument("source_id", type=str)
# ID of channel to write data to (can be same as source channel)
- parser.add_argument('--target', help='restore channel db to TARGET CHANNEL ID')
- parser.add_argument('--download-url', help='where to download db from')
- parser.add_argument('--editor', help='add user as editor to channel')
+ parser.add_argument(
+ "--target",
+ help="A different channel ID for which to restore the channel. If not provided, the source channel ID will be used.",
+ )
+ parser.add_argument(
+ "--source-url",
+ default="http://localhost:8080",
+ help="Studio instance from which to download the channel DB or content files",
+ )
+ parser.add_argument("--token", help="API token for the Studio instance")
+ parser.add_argument(
+ "--editor",
+ default="a@a.com",
+ help="Add user as editor to channel with provided email address",
+ )
+ parser.add_argument(
+ "--download-content",
+ action="store_true",
+ default=False,
+ help="Whether to download content files",
+ )
+ parser.add_argument(
+ "--public",
+ action="store_true",
+ default=False,
+ help="Whether to make the channel public",
+ )
+ parser.add_argument(
+ "--publish",
+ action="store_true",
+ default=False,
+ help="Whether to publish the channel after restoration",
+ )
def handle(self, *args, **options):
- # Set up variables for restoration process
- logger.info("\n\n********** STARTING CHANNEL RESTORATION **********")
- source_id = options['source_id']
- target_id = options.get('target') or source_id
- download_url = options.get('download_url')
- editor = options.get('editor')
-
- import_channel(source_id, target_id, download_url, editor, logger=logger)
+ manager = ImportManager(
+ options["source_url"],
+ options["source_id"],
+ target_id=options.get("target"),
+ editor=options.get("editor"),
+ public=options.get("public"),
+ publish=options.get("publish"),
+ token=options.get("token"),
+ download_content=options.get("download_content"),
+ )
+ manager.run()
diff --git a/contentcuration/contentcuration/management/commands/set_content_mimetypes.py b/contentcuration/contentcuration/management/commands/set_content_mimetypes.py
index 732d64f8d6..3d408b783e 100755
--- a/contentcuration/contentcuration/management/commands/set_content_mimetypes.py
+++ b/contentcuration/contentcuration/management/commands/set_content_mimetypes.py
@@ -14,7 +14,7 @@
from django.core.files.storage import default_storage
from django.core.management.base import BaseCommand
-from contentcuration.utils.storage_common import determine_content_type
+from contentcuration.utils.storage.common import determine_content_type
class Command(BaseCommand):
diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py
index d10f3ac2b0..bc68573b5e 100644
--- a/contentcuration/contentcuration/models.py
+++ b/contentcuration/contentcuration/models.py
@@ -2,7 +2,6 @@
import json
import logging
import os
-import urllib.parse
import uuid
from datetime import datetime
@@ -581,44 +580,9 @@ def generate_storage_url(filename, request=None, *args):
path = generate_object_storage_name(os.path.splitext(filename)[0], filename)
- # There are three scenarios where Studio might be run as:
- #
- # 1. In normal kubernetes, nginx will proxy for us. We'll know we're in kubernetes when the
- # environment variable RUN_MODE=k8s
- #
- # 2. In Docker Compose and bare metal runserver, we'll be running in runserver, and minio
- # will be exposed in port 9000 in the host's localhost network.
-
- # Note (aron): returning the true storage URL (e.g. https://storage.googleapis.com/storage/a.mp4)
- # isn't too important, because we have CDN in front of our servers, so it should be cached.
- # But change the logic here in case there is a potential for bandwidth and latency improvement.
-
- # Detect our current state first
- run_mode = os.getenv("RUN_MODE")
-
- # if we're running inside k8s, then just serve the normal /content/{storage,databases} URL,
- # and let nginx handle proper proxying.
- if run_mode == "k8s":
- url = "/content/{path}".format(
- path=path,
- )
-
- # if we're in docker-compose or in baremetal, just return the object storage URL as localhost:9000
- elif run_mode == "docker-compose" or run_mode is None:
- # generate the minio storage URL, so we can get the GET parameters that give everyone
- # access even if they don't need to log in
- params = urllib.parse.urlparse(default_storage.url(path)).query
- host = "localhost"
- port = 9000 # hardcoded to the default minio IP address
- url = "http://{host}:{port}/{bucket}/{path}?{params}".format(
- host=host,
- port=port,
- bucket=settings.AWS_S3_BUCKET_NAME,
- path=path,
- params=params,
- )
-
- return url
+ return "/content/{path}".format(
+ path=path,
+ )
class FileOnDiskStorage(FileSystemStorage):
diff --git a/contentcuration/contentcuration/production_settings.py b/contentcuration/contentcuration/production_settings.py
index 969ff4dca4..0412178d98 100644
--- a/contentcuration/contentcuration/production_settings.py
+++ b/contentcuration/contentcuration/production_settings.py
@@ -10,7 +10,7 @@
MEDIA_ROOT = base_settings.STORAGE_ROOT
-DEFAULT_FILE_STORAGE = 'contentcuration.utils.gcs_storage.CompositeGCS'
+DEFAULT_FILE_STORAGE = 'contentcuration.utils.storage.gcs.CompositeGCS'
SESSION_ENGINE = "django.contrib.sessions.backends.db"
# email settings
diff --git a/contentcuration/contentcuration/sandbox_settings.py b/contentcuration/contentcuration/sandbox_settings.py
index 61e00a465f..912fed7244 100644
--- a/contentcuration/contentcuration/sandbox_settings.py
+++ b/contentcuration/contentcuration/sandbox_settings.py
@@ -3,7 +3,7 @@
DEBUG = True
-DEFAULT_FILE_STORAGE = "contentcuration.utils.gcs_storage.CompositeGCS"
+DEFAULT_FILE_STORAGE = "contentcuration.utils.storage.gcs.CompositeGCS"
LANGUAGES += (("ar", gettext("Arabic")),) # noqa
diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py
index a911055032..be6a53d20e 100644
--- a/contentcuration/contentcuration/settings.py
+++ b/contentcuration/contentcuration/settings.py
@@ -361,7 +361,7 @@ def gettext(s):
ORPHAN_DATE_CLEAN_UP_THRESHOLD = TWO_WEEKS_AGO
# CLOUD STORAGE SETTINGS
-DEFAULT_FILE_STORAGE = 'django_s3_storage.storage.S3Storage'
+DEFAULT_FILE_STORAGE = 'contentcuration.utils.storage.dev.CompositeStorage'
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID') or 'development'
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY') or 'development'
AWS_S3_BUCKET_NAME = os.getenv('AWS_BUCKET_NAME') or 'content'
diff --git a/contentcuration/contentcuration/tests/test_restore_channel.py b/contentcuration/contentcuration/tests/test_restore_channel.py
deleted file mode 100644
index a4d1e13a39..0000000000
--- a/contentcuration/contentcuration/tests/test_restore_channel.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# -*- coding: utf-8 -*-
-import datetime
-import json
-import uuid
-from io import BytesIO
-
-from django.core.files.storage import default_storage
-from django.template.loader import render_to_string
-from django.utils.translation import activate
-from django.utils.translation import deactivate
-from le_utils.constants import exercises
-from mixer.backend.django import mixer
-from mock import MagicMock
-from mock import patch
-
-from .base import StudioTestCase
-from contentcuration.models import AssessmentItem
-from contentcuration.models import generate_object_storage_name
-from contentcuration.utils.import_tools import create_channel
-from contentcuration.utils.import_tools import generate_assessment_item
-from contentcuration.utils.import_tools import process_content
-
-
-thumbnail_path = "/content/thumbnail.png"
-ASSESSMENT_DATA = {
- 'input-question-test': {
- 'template': 'perseus/input_question.json',
- 'type': exercises.INPUT_QUESTION,
- 'question': "Input question",
- 'question_images': [{"name": "test.jpg", "width": 12.71, "height": 12.12}],
- 'hints': [{'hint': 'Hint 1'}],
- 'answers': [
- {'answer': '1', 'correct': True, 'images': []},
- {'answer': '2', 'correct': True, 'images': []}
- ],
- 'order': 0
- },
- 'multiple-selection-test': {
- 'template': 'perseus/multiple_selection.json',
- 'type': exercises.MULTIPLE_SELECTION,
- 'question': "Multiple selection question",
- 'question_images': [],
- 'hints': [],
- 'answers': [
- {'answer': 'A', 'correct': True, 'images': []},
- {'answer': 'B', 'correct': True, 'images': []},
- {'answer': 'C', 'correct': False, 'images': []},
- ],
- 'multiple_select': True,
- 'order': 1,
- 'randomize': False
- },
- 'single-selection-test': {
- 'template': 'perseus/multiple_selection.json',
- 'type': exercises.SINGLE_SELECTION,
- 'question': "Single select question",
- 'question_images': [],
- 'hints': [{'hint': 'Hint test'}],
- 'answers': [
- {'answer': 'Correct answer', 'correct': True, 'images': []},
- {'answer': 'Incorrect answer', 'correct': False, 'images': []},
- ],
- 'multiple_select': False,
- 'order': 2,
- 'randomize': True
- },
- 'perseus-question-test': {
- 'template': 'perseus/perseus_question.json',
- 'type': exercises.PERSEUS_QUESTION,
- 'order': 3,
- 'raw_data': '{}'
- }
-}
-
-
-class ChannelRestoreUtilityFunctionTestCase(StudioTestCase):
- @patch("contentcuration.utils.import_tools.write_to_thumbnail_file", return_value=thumbnail_path)
- def setUp(self, thumb_mock):
- self.id = uuid.uuid4().hex
- self.name = "test name"
- self.description = "test description"
- self.thumbnail_encoding = "base64 string"
- self.root_pk = uuid.uuid4()
- self.version = 7
- self.last_updated = datetime.datetime.now()
- self.cursor_mock = MagicMock()
- self.cursor_mock.execute.return_value.fetchone.return_value = (
- self.id,
- self.name,
- self.description,
- self.thumbnail_encoding,
- self.root_pk,
- self.version,
- self.last_updated,
- )
- self.channel, _ = create_channel(self.cursor_mock, self.id, self.admin_user)
-
- def test_restore_channel_id(self):
- self.assertEqual(self.channel.id, self.id)
-
- def test_restore_channel_name(self):
- self.assertEqual(self.channel.name, self.name)
-
- def test_restore_channel_description(self):
- self.assertEqual(self.channel.description, self.description)
-
- def test_restore_channel_thumbnail(self):
- self.assertEqual(self.channel.thumbnail, thumbnail_path)
-
- def test_restore_channel_thumbnail_encoding(self):
- self.assertEqual(self.channel.thumbnail_encoding["base64"], self.thumbnail_encoding)
-
- def test_restore_channel_version(self):
- self.assertEqual(self.channel.version, self.version)
-
-
-class PerseusRestoreTestCase(StudioTestCase):
- def setUp(self):
- super(PerseusRestoreTestCase, self).setUp()
- image_path = generate_object_storage_name('test', 'test.png')
- default_storage.save(image_path, BytesIO(b'test'))
-
- def test_process_content(self):
- tests = [
- {
- "content": 'test 1',
- "output": 'test 1',
- 'images': {}
- },
- {
- "content": 'test 2 ',
- "output": 'test 2 ',
- 'images': {}
- },
- {
- "content": 'test 3 ',
- "output": 'test 3 ',
- 'images': {
- '${☣ LOCALPATH}/images/test.png': {
- 'width': 50,
- 'height': 50
- }
- }
- },
- {
- "content": 'test 4  ',
- "output": 'test 4  ',
- 'images': {}
- },
- {
- "content": 'test 5 $\\sqrt{36}+\\frac{1}{2}$ ',
- "output": 'test 5 $$\\sqrt{36}+\\frac{1}{2}$$',
- 'images': {}
- },
- {
- "content": 'test 6 $\\frac{1}{2}$ $\\frac{3}{2}$',
- "output": 'test 6 $$\\frac{1}{2}$$ $$\\frac{3}{2}$$',
- 'images': {}
- }
- ]
- for test in tests:
- result = process_content(test, mixer.blend(AssessmentItem))
- self.assertEqual(result, test['output'])
-
- def test_generate_assessment_item(self):
- # Run in Spanish to ensure we are properly creating JSON with non-localized numbers
- activate("es-es")
- for assessment_id, data in list(ASSESSMENT_DATA.items()):
- assessment_data = json.loads(render_to_string(data['template'], data).encode('utf-8', "ignore"))
- assessment_item = generate_assessment_item(assessment_id, data['order'], data['type'], assessment_data)
- self.assertEqual(assessment_item.type, data['type'])
- self.assertEqual(assessment_item.question, data.get('question', ''))
- self.assertEqual(assessment_item.randomize, bool(data.get('randomize')))
- self.assertEqual(assessment_item.raw_data, data.get('raw_data', ''))
- for hint in json.loads(assessment_item.hints):
- self.assertTrue(any(h for h in data['hints'] if h['hint'] == hint['hint']))
- for answer in json.loads(assessment_item.answers):
- self.assertTrue(any(a for a in data['answers'] if a['answer'] == str(answer['answer']) and a['correct'] == answer['correct']))
- deactivate()
diff --git a/contentcuration/contentcuration/tests/utils/test_cloud_storage.py b/contentcuration/contentcuration/tests/utils/test_cloud_storage.py
deleted file mode 100644
index 5d84fd9f10..0000000000
--- a/contentcuration/contentcuration/tests/utils/test_cloud_storage.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from django.test import TestCase
-
-from contentcuration.utils.cloud_storage import CloudStorage
-
-
-class CloudStorageTestCase(TestCase):
- def test_backend_initialization(self):
- cloud_storage_instance = CloudStorage()
- self.assertIsNotNone(cloud_storage_instance)
- self.assertIsInstance(cloud_storage_instance, CloudStorage)
diff --git a/contentcuration/contentcuration/tests/test_gcs_storage.py b/contentcuration/contentcuration/tests/utils/test_gcs_storage.py
similarity index 95%
rename from contentcuration/contentcuration/tests/test_gcs_storage.py
rename to contentcuration/contentcuration/tests/utils/test_gcs_storage.py
index 165877f9ac..07feb0bdb8 100755
--- a/contentcuration/contentcuration/tests/test_gcs_storage.py
+++ b/contentcuration/contentcuration/tests/utils/test_gcs_storage.py
@@ -8,8 +8,8 @@
from google.cloud.storage.blob import Blob
from mixer.main import mixer
-from contentcuration.utils.gcs_storage import CompositeGCS
-from contentcuration.utils.gcs_storage import GoogleCloudStorage
+from contentcuration.utils.storage.gcs import CompositeGCS
+from contentcuration.utils.storage.gcs import GoogleCloudStorage
class GoogleCloudStorageSaveTestCase(TestCase):
@@ -70,8 +70,8 @@ def test_uploads_cache_control_private_if_content_database(self):
self.storage.save(filename, self.content, blob_object=self.blob_obj)
assert "private" in self.blob_obj.cache_control
- @mock.patch("contentcuration.utils.gcs_storage.BytesIO")
- @mock.patch("contentcuration.utils.gcs_storage.GoogleCloudStorage._is_file_empty", return_value=False)
+ @mock.patch("contentcuration.utils.storage.gcs.BytesIO")
+ @mock.patch("contentcuration.utils.storage.gcs.GoogleCloudStorage._is_file_empty", return_value=False)
def test_gzip_if_content_database(self, bytesio_mock, file_empty_mock):
"""
Check that if we're uploading a gzipped content database and
@@ -147,8 +147,8 @@ def setUp(self):
self.mock_anon_bucket = bucket_cls(self.mock_anon_client, "bucket")
self.mock_anon_client.get_bucket.return_value = self.mock_anon_bucket
- with mock.patch("contentcuration.utils.gcs_storage._create_default_client", return_value=self.mock_default_client), \
- mock.patch("contentcuration.utils.gcs_storage.Client.create_anonymous_client", return_value=self.mock_anon_client):
+ with mock.patch("contentcuration.utils.storage.gcs._create_default_client", return_value=self.mock_default_client), \
+ mock.patch("contentcuration.utils.storage.gcs.Client.create_anonymous_client", return_value=self.mock_anon_client):
self.storage = CompositeGCS()
def test_get_writeable_backend(self):
@@ -177,7 +177,7 @@ def test_open(self):
self.assertIsInstance(f, File)
self.mock_default_bucket.get_blob.assert_called_with("blob")
- @mock.patch("contentcuration.utils.gcs_storage.Blob")
+ @mock.patch("contentcuration.utils.storage.gcs.Blob")
def test_save(self, mock_blob):
self.storage.save("blob", BytesIO(b"content"))
blob = mock_blob.return_value
diff --git a/contentcuration/contentcuration/tests/test_storage_common.py b/contentcuration/contentcuration/tests/utils/test_storage.py
similarity index 82%
rename from contentcuration/contentcuration/tests/test_storage_common.py
rename to contentcuration/contentcuration/tests/utils/test_storage.py
index 29ad9f59c9..b4c0e0db20 100644
--- a/contentcuration/contentcuration/tests/test_storage_common.py
+++ b/contentcuration/contentcuration/tests/utils/test_storage.py
@@ -7,16 +7,15 @@
import requests
from django.core.files.storage import FileSystemStorage
from django.test import TestCase
-from django_s3_storage.storage import S3Storage
from mock import MagicMock
-from .base import StudioTestCase
+from ..base import StudioTestCase
from contentcuration.models import generate_object_storage_name
-from contentcuration.utils.storage_common import _get_gcs_presigned_put_url
-from contentcuration.utils.storage_common import determine_content_type
-from contentcuration.utils.storage_common import get_presigned_upload_url
-from contentcuration.utils.storage_common import UnknownStorageBackendError
-# The modules we'll test
+from contentcuration.utils.storage.common import determine_content_type
+from contentcuration.utils.storage.common import get_presigned_upload_url
+from contentcuration.utils.storage.common import UnknownStorageBackendError
+from contentcuration.utils.storage.dev import Storage as DevStorage
+from contentcuration.utils.storage.gcs import GoogleCloudStorage
class MimeTypesTestCase(TestCase):
@@ -77,7 +76,10 @@ def test_raises_error(self):
"""
with pytest.raises(UnknownStorageBackendError):
get_presigned_upload_url(
- "nice", "err", 5, 0, storage=self.STORAGE,
+ "nice",
+ "err",
+ 5,
+ storage=self.STORAGE,
)
@@ -90,7 +92,9 @@ class GoogleCloudStoragePresignedURLUnitTestCase(TestCase):
"""
def setUp(self):
+ super().setUp()
self.client = MagicMock()
+ self.storage = GoogleCloudStorage(self.client, "fake")
self.generate_signed_url_method = (
self.client.get_bucket.return_value.blob.return_value.generate_signed_url
)
@@ -102,19 +106,15 @@ def test_that_generate_signed_url_is_called(self):
"""
Check that we even call blob.generate_signed_url in the first place.
"""
- bucket = "fake"
- _get_gcs_presigned_put_url(self.client, bucket, "/object.jpg", "aBc", 0, 0)
+ get_presigned_upload_url("/object.jpg", "aBc", 0, storage=self.storage)
self.generate_signed_url_method.assert_called_once()
def test_that_we_return_a_string(self):
"""
Check that _get_gcs_presigned_put_url returns a string.
"""
- bucket = "fake"
- ret = _get_gcs_presigned_put_url(
- self.client, bucket, "/object.jpg", "aBc", 0, 0
- )
- assert isinstance(ret, str)
+ ret = get_presigned_upload_url("/object.jpg", "aBc", 0, storage=self.storage)
+ assert isinstance(ret["uploadURL"], str)
def test_generate_signed_url_called_with_required_arguments(self):
"""
@@ -132,11 +132,9 @@ def test_generate_signed_url_called_with_required_arguments(self):
bucket_name = "fake"
filepath = "object.jpg"
lifetime = 20 # seconds
- mimetype = "doesntmatter"
+ mimetype = "image/jpeg"
- _get_gcs_presigned_put_url(
- self.client, bucket_name, filepath, content_md5, lifetime, mimetype
- )
+ get_presigned_upload_url(filepath, content_md5, lifetime, storage=self.storage)
# assert that we're creating the right object
self.client.get_bucket.assert_called_once_with(bucket_name)
@@ -148,8 +146,8 @@ def test_generate_signed_url_called_with_required_arguments(self):
self.generate_signed_url_method.assert_called_once_with(
method=method,
content_md5=content_md5,
- expiration=lifetime_timedelta,
content_type=mimetype,
+ expiration=lifetime_timedelta,
)
@@ -158,11 +156,9 @@ class S3StoragePresignedURLUnitTestCase(StudioTestCase):
Test cases for generating presigned URLs for S3 storage, i.e. Minio.
"""
- STORAGE = S3Storage()
-
def setUp(self):
- self.client = MagicMock()
super().setUp()
+ self.storage = DevStorage()
def test_returns_string_if_inputs_are_valid(self):
"""
@@ -171,9 +167,7 @@ def test_returns_string_if_inputs_are_valid(self):
"""
# use a real connection here as a sanity check
- ret = get_presigned_upload_url(
- "a/b/abc.jpg", "aBc", 10, 1, storage=self.STORAGE, client=None
- )
+ ret = get_presigned_upload_url("a/b/abc.jpg", "aBc", 10, storage=self.storage)
url = ret["uploadURL"]
assert isinstance(url, str)
@@ -187,12 +181,14 @@ def test_can_upload_file_to_presigned_url(self):
# S3 expects a base64-encoded MD5 checksum
md5 = hashlib.md5(file_contents)
md5_checksum = md5.hexdigest()
- md5_checksum_base64 = codecs.encode(codecs.decode(md5_checksum, "hex"), "base64").decode()
+ md5_checksum_base64 = codecs.encode(
+ codecs.decode(md5_checksum, "hex"), "base64"
+ ).decode()
filename = "blahfile.jpg"
filepath = generate_object_storage_name(md5_checksum, filename)
- ret = get_presigned_upload_url(filepath, md5_checksum_base64, 1000, len(file_contents))
+ ret = get_presigned_upload_url(filepath, md5_checksum_base64, 1000)
url = ret["uploadURL"]
content_type = ret["mimetype"]
@@ -201,6 +197,6 @@ def test_can_upload_file_to_presigned_url(self):
data=file,
headers={
"Content-Type": content_type,
- }
+ },
)
resp.raise_for_status()
diff --git a/contentcuration/contentcuration/utils/cloud_storage.py b/contentcuration/contentcuration/utils/cloud_storage.py
deleted file mode 100644
index bf60b51bb3..0000000000
--- a/contentcuration/contentcuration/utils/cloud_storage.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from automation.utils.appnexus.base import Backend
-from automation.utils.appnexus.base import BackendFactory
-from automation.utils.appnexus.base import BackendRequest
-from automation.utils.appnexus.base import BackendResponse
-
-
-class CloudStorageBackendRequest(BackendRequest):
- pass
-
-
-class CloudStorageRequest(CloudStorageBackendRequest):
- def __init__(self) -> None:
- super().__init__()
-
-
-class CloudStorageBackendResponse(BackendResponse):
- pass
-
-
-class CloudStorageResponse(CloudStorageBackendResponse):
- def __init__(self) -> None:
- pass
-
-
-class CloudStorageBackendFactory(BackendFactory):
- def create_backend(self) -> Backend:
- return super().create_backend()
-
-
-class CloudStorage(Backend):
-
- def connect(self) -> None:
- return super().connect()
-
- def make_request(self, request) -> CloudStorageResponse:
- return super().make_request(request)
-
- @classmethod
- def _create_instance(cls) -> 'CloudStorage':
- return cls()
diff --git a/contentcuration/contentcuration/utils/files.py b/contentcuration/contentcuration/utils/files.py
index a5d8361e8c..74c53f8ba9 100644
--- a/contentcuration/contentcuration/utils/files.py
+++ b/contentcuration/contentcuration/utils/files.py
@@ -79,12 +79,13 @@ def duplicate_file(file_object, node=None, assessment_item=None, preset_id=None,
return file_copy
-def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH):
+def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH, input_buffer=None):
"""
Generates a base64 encoding for a thumbnail
Args:
filename (str): thumbnail to generate encoding from (must be in storage already)
dimension (int, optional): desired width of thumbnail. Defaults to 400.
+ input_buffer (BytesIO, optional): buffer to read from. Defaults to None.
Returns base64 encoding of resized thumbnail
"""
@@ -97,17 +98,17 @@ def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH):
# make sure the aspect ratio between width and height is 16:9
thumbnail_size = [dimension, round(dimension / 1.77)]
try:
- if not filename.startswith(settings.STATIC_ROOT):
- filename = generate_object_storage_name(checksum, filename)
- inbuffer = default_storage.open(filename, 'rb')
-
- else:
- inbuffer = open(filename, 'rb')
-
- if not inbuffer:
+ if not input_buffer:
+ if not filename.startswith(settings.STATIC_ROOT):
+ filename = generate_object_storage_name(checksum, filename)
+ input_buffer = default_storage.open(filename, 'rb')
+ else:
+ input_buffer = open(filename, 'rb')
+
+ if not input_buffer:
raise AssertionError
- with Image.open(inbuffer) as image:
+ with Image.open(input_buffer) as image:
image_format = image.format
# Note: Image.thumbnail ensures that the image will fit in the
@@ -122,7 +123,7 @@ def get_thumbnail_encoding(filename, dimension=THUMBNAIL_WIDTH):
finally:
# Try to close the inbuffer if it has been created
try:
- inbuffer.close()
+ input_buffer.close()
except UnboundLocalError:
pass
outbuffer.close()
diff --git a/contentcuration/contentcuration/utils/import_tools.py b/contentcuration/contentcuration/utils/import_tools.py
index e662b75fc4..57d6d6e76a 100644
--- a/contentcuration/contentcuration/utils/import_tools.py
+++ b/contentcuration/contentcuration/utils/import_tools.py
@@ -4,498 +4,794 @@
import logging
import os
import re
-import shutil
import sqlite3
import sys
import tempfile
-import zipfile
+from functools import cached_property
from io import BytesIO
import requests
-from django.conf import settings
+import tqdm
from django.core.files.storage import default_storage
+from django.core.management import call_command
from django.db import transaction
+from kolibri_content.router import get_active_content_database
+from kolibri_content.router import using_content_database
+from le_utils.constants import completion_criteria
from le_utils.constants import content_kinds
from le_utils.constants import exercises
from le_utils.constants import format_presets
+from le_utils.constants import mastery_criteria
from le_utils.constants import roles
+from le_utils.constants.labels import learning_activities
from contentcuration import models
from contentcuration.api import write_raw_content_to_storage
from contentcuration.utils.files import create_file_from_contents
+from contentcuration.utils.files import get_thumbnail_encoding
from contentcuration.utils.files import write_base64_to_file
from contentcuration.utils.garbage_collect import get_deleted_chefs_root
-
-
-CHANNEL_TABLE = 'content_channelmetadata'
-NODE_TABLE = 'content_contentnode'
-ASSESSMENTMETADATA_TABLE = 'content_assessmentmetadata'
-FILE_TABLE = 'content_file'
-TAG_TABLE = 'content_contenttag'
-NODE_TAG_TABLE = 'content_contentnode_tags'
-LICENSE_TABLE = 'content_license'
+from contentcuration.utils.publish import publish_channel
+from contentcuration.utils.storage.base import CompositeStorage
+from contentcuration.viewsets.assessmentitem import exercise_image_filename_regex
+
+CHANNEL_TABLE = "content_channelmetadata"
+NODE_TABLE = "content_contentnode"
+ASSESSMENTMETADATA_TABLE = "content_assessmentmetadata"
+FILE_TABLE = "content_file"
+TAG_TABLE = "content_contenttag"
+NODE_TAG_TABLE = "content_contentnode_tags"
+LICENSE_TABLE = "content_license"
NODE_COUNT = 0
FILE_COUNT = 0
TAG_COUNT = 0
ANSWER_FIELD_MAP = {
- exercises.SINGLE_SELECTION: 'radio 1',
- exercises.MULTIPLE_SELECTION: 'radio 1',
- exercises.INPUT_QUESTION: 'numeric-input 1',
+ exercises.SINGLE_SELECTION: "radio 1",
+ exercises.MULTIPLE_SELECTION: "radio 1",
+ exercises.INPUT_QUESTION: "numeric-input 1",
}
log = logging.getLogger(__name__)
-def import_channel(source_id, target_id=None, download_url=None, editor=None, logger=None):
- """
- Import a channel from another Studio instance. This can be used to
- copy online Studio channels into local machines for development,
- testing, faster editing, or other purposes.
-
- :param source_id: The UUID of the channel to import from the source Studio instance.
- :param target_id: The UUID of the channel on the local instance. Defaults to source_id.
- :param download_url: The URL of the Studio instance to import from.
- :param editor: The email address of the user you wish to add as an editor, if any.
-
- """
-
- global log
- if logger:
- log = logger
- else:
- log = logging.getLogger(__name__)
-
- # Set up variables for the import process
- log.info("\n\n********** STARTING CHANNEL IMPORT **********")
- start = datetime.datetime.now()
- target_id = target_id or source_id
-
- # Test connection to database
- log.info("Connecting to database for channel {}...".format(source_id))
-
- tempf = tempfile.NamedTemporaryFile(suffix=".sqlite3", delete=False)
- conn = None
- try:
- if download_url:
- response = requests.get('{}/content/databases/{}.sqlite3'.format(download_url, source_id))
- for chunk in response:
- tempf.write(chunk)
- else:
- filepath = "/".join([settings.DB_ROOT, "{}.sqlite3".format(source_id)])
- # Check if database exists
- if not default_storage.exists(filepath):
- raise IOError("The object requested does not exist.")
- with default_storage.open(filepath) as fobj:
- shutil.copyfileobj(fobj, tempf)
-
- tempf.close()
- conn = sqlite3.connect(tempf.name)
- cursor = conn.cursor()
-
- # Start by creating channel
- log.info("Creating channel...")
- editor = models.User.objects.get(email=editor)
- channel, root_pk = create_channel(conn, target_id, editor)
- channel.editors.add(editor)
- channel.save()
+class ImportClient(requests.Session):
+ def __init__(self, base_url, api_token=None):
+ super(ImportClient, self).__init__()
+ self.base_url = base_url
+ self.api_token = api_token
+ self.headers.update(
+ {
+ "User-Agent": f"restore_channel/kolibri-studio/dev python-requests/{requests.__version__}",
+ }
+ )
- # Create root node
- root = models.ContentNode.objects.create(
- node_id=root_pk,
- title=channel.name,
- kind_id=content_kinds.TOPIC,
- original_channel_id=target_id,
- source_channel_id=target_id,
+ def __getattr__(self, name):
+ if name.endswith("_with_token"):
+ if not self.api_token:
+ raise ValueError("API token is required for this method.")
+
+ target_method = getattr(
+ super(ImportClient, self), name.replace("_with_token", "")
+ )
+ token_headers = {
+ "Authorization": f"Token {self.api_token}",
+ }
+ return lambda url, *args, **kwargs: target_method(
+ url, *args, headers=token_headers, **kwargs
+ )
+ raise AttributeError(
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
)
- # Create nodes mapping to channel
- log.info(" Creating nodes...")
- with transaction.atomic():
- create_nodes(cursor, target_id, root, download_url=download_url)
- # TODO: Handle prerequisites
-
- # Delete the previous tree if it exists
- old_previous = channel.previous_tree
- if old_previous:
- old_previous.parent = get_deleted_chefs_root()
- old_previous.title = "Old previous tree for channel {}".format(channel.pk)
- old_previous.save()
-
- # Save tree to target tree
- channel.previous_tree = channel.main_tree
- channel.main_tree = root
- channel.save()
- finally:
- conn and conn.close()
- tempf.close()
- os.unlink(tempf.name)
-
- # Print stats
- log.info("\n\nChannel has been imported (time: {ms})\n".format(ms=datetime.datetime.now() - start))
- log.info("\n\n********** IMPORT COMPLETE **********\n\n")
-
-
-def create_channel(cursor, target_id, editor):
- """ create_channel: Create channel at target id
- Args:
- cursor (sqlite3.Connection): connection to export database
- target_id (str): channel_id to write to
- Returns: channel model created and id of root node
- """
- id, name, description, thumbnail, root_pk, version, last_updated = cursor.execute(
- 'SELECT id, name, description, thumbnail, root_pk, version, last_updated FROM {table}'
- .format(table=CHANNEL_TABLE)).fetchone()
- channel, is_new = models.Channel.objects.get_or_create(pk=target_id, actor_id=editor.id)
- channel.name = name
- channel.description = description
- channel.thumbnail = write_to_thumbnail_file(thumbnail)
- channel.thumbnail_encoding = {'base64': thumbnail, 'points': [], 'zoom': 0}
- channel.version = version
- channel.save()
- log.info("\tCreated channel {} with name {}".format(target_id, name))
- return channel, root_pk
+ def request(self, method, url, *args, **kwargs):
+ url = f"{self.base_url}{url}"
+ return super(ImportClient, self).request(method, url, *args, **kwargs)
def write_to_thumbnail_file(raw_thumbnail):
- """ write_to_thumbnail_file: Convert base64 thumbnail to file
- Args:
- raw_thumbnail (str): base64 encoded thumbnail
- Returns: thumbnail filename
+ """write_to_thumbnail_file: Convert base64 thumbnail to file
+ Args:
+ raw_thumbnail (str): base64 encoded thumbnail
+ Returns: thumbnail filename
"""
- if raw_thumbnail and isinstance(raw_thumbnail, str) and raw_thumbnail != "" and 'static' not in raw_thumbnail:
+ if (
+ raw_thumbnail
+ and isinstance(raw_thumbnail, str)
+ and raw_thumbnail != ""
+ and "static" not in raw_thumbnail
+ ):
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tempf:
try:
tempf.close()
write_base64_to_file(raw_thumbnail, tempf.name)
- with open(tempf.name, 'rb') as tf:
- fobj = create_file_from_contents(tf.read(), ext="png", preset_id=format_presets.CHANNEL_THUMBNAIL)
+ with open(tempf.name, "rb") as tf:
+ fobj = create_file_from_contents(
+ tf.read(), ext="png", preset_id=format_presets.CHANNEL_THUMBNAIL
+ )
return str(fobj)
finally:
tempf.close()
os.unlink(tempf.name)
-def create_nodes(cursor, target_id, parent, indent=1, download_url=None):
- """ create_channel: Create channel at target id
- Args:
- cursor (sqlite3.Connection): connection to export database
- target_id (str): channel_id to write to
- parent (models.ContentNode): node's parent
- indent (int): How far to indent print statements
- Returns: newly created node
+def convert_metadata_to_dict(metadata):
"""
- # Read database rows that match parent
- parent_query = "parent_id=\'{}\'".format(parent.node_id)
-
- sql_command = 'SELECT id, title, content_id, description, sort_order, '\
- 'license_owner, author, license_id, kind, coach_content, lang_id FROM {table} WHERE {query} ORDER BY sort_order;'\
- .format(table=NODE_TABLE, query=parent_query)
- query = cursor.execute(sql_command).fetchall()
-
- # Parse through rows and create models
- for id, title, content_id, description, sort_order, license_owner, author, license_id, kind, coach_content, lang_id in query:
- log.info("{indent} {id} ({title} - {kind})...".format(indent=" |" * indent, id=id, title=title, kind=kind))
-
- # Determine role
- role = roles.LEARNER
- if coach_content:
- role = roles.COACH
-
- # Determine extra_fields
- assessment_query = "SELECT mastery_model, randomize FROM {table} WHERE contentnode_id='{node}'".format(table=ASSESSMENTMETADATA_TABLE, node=id)
- result = cursor.execute(assessment_query).fetchone()
- extra_fields = result[0] if result else {}
- if isinstance(extra_fields, str):
- extra_fields = json.loads(extra_fields)
- if result:
- extra_fields.update({"randomize": result[1]})
-
- # Determine license
- license = retrieve_license(cursor, license_id)
- license_description = license[1] if license else ""
- license = license[0] if license else None
-
- # TODO: Determine thumbnail encoding
-
- # Create new node model
- node = models.ContentNode.objects.create(
- node_id=id,
- original_source_node_id=id,
- source_node_id=id,
- title=title,
- content_id=content_id,
- description=description,
- sort_order=sort_order,
- copyright_holder=license_owner,
- author=author,
- license=license,
- license_description=license_description,
- language_id=lang_id,
- role_visibility=role,
- extra_fields=extra_fields,
- kind_id=kind,
- parent=parent,
- original_channel_id=target_id,
- source_channel_id=target_id,
- )
+ Convert metadata from a string to a dictionary.
- # Handle foreign key references (children, files, tags)
- if kind == content_kinds.TOPIC:
- create_nodes(cursor, target_id, node, indent=indent + 1, download_url=download_url)
- elif kind == content_kinds.EXERCISE:
- create_assessment_items(cursor, node, indent=indent + 1, download_url=download_url)
- create_files(cursor, node, indent=indent + 1, download_url=download_url)
- create_tags(cursor, node, target_id, indent=indent + 1)
-
- return node
+ :param metadata: The metadata string to convert.
+ :return: A dictionary representation of the metadata.
+ """
+ if isinstance(metadata, str):
+ metadata_split = metadata.split(",")
+ return {metadata_key: True for metadata_key in metadata_split}
+ return metadata
-def retrieve_license(cursor, license_id):
- """ retrieve_license_name: Get license based on id from exported db
- Args:
- cursor (sqlite3.Connection): connection to export database
- license_id (str): id of license on exported db
- Returns: license model matching the name and the associated license description
+def convert_learning_activities_to_dict(content_kind, metadata):
"""
- # Handle no license being assigned
- if license_id is None or license_id == "":
- return None
+ Convert learning activities from a string to a dictionary.
- # Return license that matches name
- name, description = cursor.execute(
- 'SELECT license_name, license_description FROM {table} WHERE id={id}'.format(table=LICENSE_TABLE, id=license_id)
- ).fetchone()
- return models.License.objects.get(license_name=name), description
-
-
-def download_file(filename, download_url=None, contentnode=None, assessment_item=None, preset=None, file_size=None, lang_id=None):
- checksum, extension = os.path.splitext(filename)
- extension = extension.lstrip('.')
- filepath = models.generate_object_storage_name(checksum, filename)
-
- # Download file if it hasn't already been downloaded
- if download_url and not default_storage.exists(filepath):
- buffer = BytesIO()
- response = requests.get('{}/content/storage/{}/{}/{}'.format(download_url, filename[0], filename[1], filename))
- for chunk in response:
- buffer.write(chunk)
-
- checksum, _, filepath = write_raw_content_to_storage(buffer.getvalue(), ext=extension)
- buffer.close()
-
- # Save values to new file object
- file_obj = models.File(
- file_format_id=extension,
- file_size=file_size or default_storage.size(filepath),
- contentnode=contentnode,
- assessment_item=assessment_item,
- language_id=lang_id,
- preset_id=preset or "",
- )
- file_obj.file_on_disk.name = filepath
- file_obj.save()
-
-
-def create_files(cursor, contentnode, indent=0, download_url=None):
- """ create_files: Get license
- Args:
- cursor (sqlite3.Connection): connection to export database
- contentnode (models.ContentNode): node file references
- indent (int): How far to indent print statements
- Returns: None
+ :param content_kind: The content kind of the learning activities.
+ :param metadata: The learning activities string to convert.
+ :return: A dictionary representation of the learning activities.
"""
- # Parse database for files referencing content node and make file models
- sql_command = 'SELECT checksum, extension, file_size, contentnode_id, '\
- 'lang_id, preset FROM {table} WHERE contentnode_id=\'{id}\';'\
- .format(table=FILE_TABLE, id=contentnode.node_id)
+ metadata = convert_metadata_to_dict(metadata)
+ if isinstance(metadata, dict):
+ return metadata
+
+ if content_kind == content_kinds.EXERCISE:
+ return {learning_activities.PRACTICE: True}
+ elif content_kind in [content_kinds.HTML5, content_kinds.H5P]:
+ return {learning_activities.EXPLORE: True}
+ elif content_kind == content_kinds.AUDIO:
+ return {learning_activities.LISTEN: True}
+ elif content_kind == content_kinds.VIDEO:
+ return {learning_activities.WATCH: True}
+ elif content_kind == content_kinds.DOCUMENT:
+ return {learning_activities.READ: True}
+ elif content_kind == content_kinds.SLIDESHOW:
+ return {learning_activities.READ: True}
+ elif content_kind == content_kinds.TOPIC:
+ return None
+ return {learning_activities.EXPLORE: True}
- query = cursor.execute(sql_command).fetchall()
- for checksum, extension, file_size, contentnode_id, lang_id, preset in query:
- filename = "{}.{}".format(checksum, extension)
- log.info("{indent} * FILE {filename}...".format(indent=" |" * indent, filename=filename))
- try:
- download_file(filename, download_url=download_url, contentnode=contentnode, preset=preset, file_size=file_size, lang_id=lang_id)
-
- except IOError as e:
- log.warning("\b FAILED (check logs for more details)")
- sys.stderr.write("Restoration Process Error: Failed to save file object {}: {}".format(filename, os.strerror(e.errno)))
- continue
-
-
-def create_tags(cursor, contentnode, target_id, indent=0):
- """ create_tags: Create tags associated with node
- Args:
- cursor (sqlite3.Connection): connection to export database
- contentnode (models.ContentNode): node file references
- target_id (str): channel_id to write to
- indent (int): How far to indent print statements
- Returns: None
+class ImportManager(object):
"""
- # Parse database for files referencing content node and make file models
- sql_command = 'SELECT ct.id, ct.tag_name FROM {cnttable} cnt '\
- 'JOIN {cttable} ct ON cnt.contenttag_id = ct.id ' \
- 'WHERE cnt.contentnode_id=\'{id}\';'\
- .format(
- cnttable=NODE_TAG_TABLE,
- cttable=TAG_TABLE,
- id=contentnode.node_id,
- )
- query = cursor.execute(sql_command).fetchall()
-
- # Build up list of tags
- tag_list = []
- for id, tag_name in query:
- log.info("{indent} ** TAG {tag}...".format(indent=" |" * indent, tag=tag_name))
- # Save values to new or existing tag object
- tag_obj, is_new = models.ContentTag.objects.get_or_create(
- pk=id,
- tag_name=tag_name,
- channel_id=target_id,
- )
- tag_list.append(tag_obj)
-
- # Save tags to node
- contentnode.tags.set(tag_list)
- contentnode.save()
-
-
-def create_assessment_items(cursor, contentnode, indent=0, download_url=None):
- """ create_assessment_items: Generate assessment items based on perseus zip
- Args:
- cursor (sqlite3.Connection): connection to export database
- contentnode (models.ContentNode): node assessment items reference
- indent (int): How far to indent print statements
- download_url (str): Domain to download files from
- Returns: None
+ Import a channel from another Studio instance. This can be used to copy online Studio channels
+ into local machines for development, testing, faster editing, or other purposes.
"""
- # Parse database for files referencing content node and make file models
- sql_command = 'SELECT checksum, extension '\
- 'preset FROM {table} WHERE contentnode_id=\'{id}\' AND preset=\'exercise\';'\
- .format(table=FILE_TABLE, id=contentnode.node_id)
-
- query = cursor.execute(sql_command).fetchall()
- for checksum, extension in query:
- filename = "{}.{}".format(checksum, extension)
- log.info("{indent} * EXERCISE {filename}...".format(indent=" |" * indent, filename=filename))
-
+ def __init__(
+ self,
+ source_url,
+ source_id,
+ target_id=None,
+ editor=None,
+ public=False,
+ publish=False,
+ token=None,
+ download_content=True,
+ logger=None,
+ ):
+ self.source_id = source_id
+ self.target_id = target_id or source_id
+ self.source_url = source_url
+ self.editor = editor
+ self.public = public
+ self.publish = publish
+ self.token = token
+ self.download_content = download_content
+ self.logger = logger or logging.getLogger(__name__)
+ self.client = ImportClient(source_url, api_token=token)
+ self.storage = (
+ default_storage._get_writeable_backend()
+ if isinstance(default_storage, CompositeStorage)
+ else default_storage
+ )
+ self.conn = None
+ self.cursor = None
+ self.progress = None
+
+ @cached_property
+ def editor_user(self):
+ """
+ Get the User object for the editor email address.
+
+ :return: The User object for the editor.
+ """
+ return models.User.objects.get(email=self.editor) if self.editor else None
+
+ def run(self):
+ """
+ Run the import restoration process.
+ """
+ self.logger.info("********** STARTING CHANNEL RESTORATION **********")
+ # Set up variables for the import process
+ start = datetime.datetime.now()
+
+ if not self.token:
+ self.logger.warning(
+ "No API token provided. This may result in limited functionality."
+ )
+
+ # Test connection to the database
+ self.logger.info(f"Connecting to database for channel {self.source_id}...")
+
+ tempf = tempfile.NamedTemporaryFile(suffix=".sqlite3", delete=False)
try:
- # Store the downloaded zip into temporary storage
- tempf = tempfile.NamedTemporaryFile(suffix='.{}'.format(extension), delete=False)
- response = requests.get('{}/content/storage/{}/{}/{}'.format(download_url, filename[0], filename[1], filename))
+ response = self.client.get(f"/content/databases/{self.source_id}.sqlite3")
for chunk in response:
tempf.write(chunk)
+
tempf.close()
- extract_assessment_items(tempf.name, contentnode, download_url=download_url)
- except IOError as e:
- log.warning("\b FAILED (check logs for more details)")
- sys.stderr.write("Restoration Process Error: Failed to save file object {}: {}".format(filename, os.strerror(e.errno)))
- continue
+
+ with using_content_database(tempf.name):
+ call_command(
+ "migrate",
+ "content",
+ database=get_active_content_database(),
+ no_input=True,
+ )
+
+ self.conn = sqlite3.connect(tempf.name)
+ self.cursor = self.conn.cursor()
+
+ # Start by creating the channel
+ self.logger.info("Creating channel...")
+ channel, root_pk = self._create_channel()
+ channel.editors.add(self.editor_user)
+ channel.save()
+
+ # Create the root node
+ root = models.ContentNode.objects.create(
+ node_id=root_pk,
+ title=channel.name,
+ kind_id=content_kinds.TOPIC,
+ original_channel_id=self.target_id,
+ source_channel_id=self.target_id,
+ complete=True,
+ )
+
+ self.logger.info("Creating nodes...")
+ total_nodes = self.cursor.execute(
+ f"SELECT COUNT(*) FROM {NODE_TABLE}"
+ ).fetchone()[0]
+ node_progress = tqdm.tqdm(
+ total=total_nodes, desc="Restoring nodes", unit="node"
+ )
+
+ # Create nodes mapping to channel
+ with transaction.atomic():
+ self._create_nodes(root, node_progress)
+ node_progress.close()
+ self.logger.info("Creating assessment items...")
+ exercise_nodes = models.ContentNode.objects.filter(
+ kind_id=content_kinds.EXERCISE, tree_id=root.tree_id
+ )
+ exercise_progress = tqdm.tqdm(
+ total=exercise_nodes.count(),
+ desc="Restoring assessments",
+ unit="node",
+ )
+ chunk = []
+ for node in exercise_nodes.iterator(chunk_size=20):
+ chunk.append(node)
+ if len(chunk) >= 20:
+ self._create_assessment_items(chunk)
+ exercise_progress.update(len(chunk))
+ chunk = []
+ if chunk:
+ self._create_assessment_items(chunk)
+ exercise_progress.update(len(chunk))
+ exercise_progress.close()
+ # TODO: Handle prerequisites
+
+ # Delete the previous tree if it exists
+ old_previous = channel.previous_tree
+ if old_previous:
+ old_previous.parent = get_deleted_chefs_root()
+ old_previous.title = f"Old previous tree for channel {channel.pk}"
+ old_previous.save()
+
+ # Save the new tree to the target tree, and preserve the old one
+ channel.previous_tree = channel.main_tree
+ channel.main_tree = root
+ channel.save()
finally:
+ self.conn and self.conn.close()
+ tempf.close()
os.unlink(tempf.name)
+ # Publish the channel if requested
+ if self.publish:
+ self.logger.info("Publishing channel...")
+ publish_channel(self.editor_user.id, channel.id)
-def extract_assessment_items(filepath, contentnode, download_url=None):
- """ extract_assessment_items: Create and save assessment items to content node
- Args:
- filepath (str): Where perseus zip is stored
- contentnode (models.ContentNode): node assessment items reference
- download_url (str): Domain to download files from
- Returns: None
- """
-
- try:
- tempdir = tempfile.mkdtemp()
- with zipfile.ZipFile(filepath, 'r') as zipf:
- zipf.extractall(tempdir)
- os.chdir(tempdir)
-
- with open('exercise.json', 'rb') as fobj:
- data = json.load(fobj)
-
- for index, assessment_id in enumerate(data['all_assessment_items']):
- with open('{}.json'.format(assessment_id), 'rb') as fobj:
- assessment_item = generate_assessment_item(
- assessment_id,
- index,
- data['assessment_mapping'][assessment_id],
- json.load(fobj),
- download_url=download_url
+ # Print stats
+ self.logger.info(
+ f"Channel has been imported (time: {datetime.datetime.now() - start})"
+ )
+ self.logger.info("********** IMPORT COMPLETE **********")
+
+ def _create_channel(self):
+ """
+ Create the channel at target id
+ """
+ (
+ id,
+ name,
+ description,
+ thumbnail,
+ root_pk,
+ version,
+ last_updated,
+ schema_version,
+ ) = self.cursor.execute(
+ f"""
+ SELECT
+ id, name, description, thumbnail, root_pk, version, last_updated,
+ min_schema_version
+ FROM {CHANNEL_TABLE}
+ """
+ ).fetchone()
+ lang_id, _ = self.cursor.execute(
+ f"""
+ SELECT lang_id, COUNT(id) AS node_by_lang_count
+ FROM {NODE_TABLE}
+ ORDER BY node_by_lang_count DESC
+ """
+ ).fetchone()
+ channel, is_new = models.Channel.objects.get_or_create(
+ pk=self.target_id, actor_id=self.editor_user.id
+ )
+ channel.name = name
+ channel.description = description
+ channel.language_id = lang_id
+ channel.thumbnail = write_to_thumbnail_file(thumbnail)
+ channel.thumbnail_encoding = {"base64": thumbnail, "points": [], "zoom": 0}
+ channel.version = version
+ channel.public = self.public
+ channel.save()
+ self.logger.info(f"Created channel {self.target_id} with name {name}")
+ return channel, root_pk
+
+ def _create_nodes(self, parent, progress):
+ """
+ Create node(s) for a channel with target id
+
+ :param parent: node's parent
+ :param progress: progress bar for node creation
+ """
+ sql_command = f"""
+ SELECT
+ id, title, content_id, description, sort_order, license_owner, author, license_id,
+ kind, coach_content, lang_id, grade_levels, resource_types, learning_activities,
+ accessibility_labels, categories, learner_needs, duration, options
+ FROM {NODE_TABLE}
+ WHERE parent_id = ?
+ ORDER BY sort_order;
+ """
+ query = self.cursor.execute(
+ sql_command, (getattr(parent, "node_id", parent),)
+ ).fetchall()
+
+ # Parse through rows and create models
+ for (
+ id,
+ title,
+ content_id,
+ description,
+ sort_order,
+ license_owner,
+ author,
+ license_id,
+ kind,
+ coach_content,
+ lang_id,
+ grade_levels,
+ resource_types,
+ learning_activities_,
+ accessibility_labels,
+ categories,
+ learner_needs,
+ duration,
+ options,
+ ) in query:
+ # Determine role
+ role = roles.LEARNER
+ if coach_content:
+ role = roles.COACH
+
+ # Determine license
+ license_result = self._retrieve_license(license_id)
+ license_description = license_result[1] if license_result else ""
+ license_result = license_result[0] if license_result else None
+
+ # Create the new node model
+ node = models.ContentNode.objects.create(
+ node_id=id,
+ original_source_node_id=id,
+ source_node_id=id,
+ title=title,
+ content_id=content_id,
+ description=description,
+ sort_order=sort_order,
+ copyright_holder=license_owner,
+ author=author,
+ license=license_result,
+ license_description=license_description,
+ language_id=lang_id,
+ role_visibility=role,
+ extra_fields=self._prepare_node_extra_fields(id, kind, options),
+ kind_id=kind,
+ parent=parent,
+ original_channel_id=self.target_id,
+ source_channel_id=self.target_id,
+ grade_levels=convert_metadata_to_dict(grade_levels),
+ resource_types=convert_metadata_to_dict(resource_types),
+ learning_activities=convert_learning_activities_to_dict(
+ kind, learning_activities_
+ ),
+ accessibility_labels=convert_metadata_to_dict(accessibility_labels),
+ categories=convert_metadata_to_dict(categories),
+ learner_needs=convert_metadata_to_dict(learner_needs),
+ )
+
+ # Handle foreign key references (children, files, tags)
+ if kind == content_kinds.TOPIC:
+ self._create_nodes(node, progress)
+ self._create_files(node)
+ self._create_tags(node)
+
+ # assessments are handled after all nodes are created, which also ensures nodes
+ # are marked complete
+ if kind != content_kinds.EXERCISE:
+ errors = node.mark_complete()
+ if errors:
+ self.logger.warning(f"Node {node.node_id} has errors: {errors}")
+ node.save()
+ progress.update(1)
+
+ def _prepare_node_extra_fields(self, node_id, kind, options):
+ """
+ Prepare extra fields for the node based on the kind and options. For exercises, it
+ retrieves the additional info from the assessment metadata.
+
+ :param node_id: the node ID
+ :param kind: the content kind
+ :param options: the options JSON string
+ :return: a dictionary of extra fields
+ """
+ extra_fields = {
+ "options": json.loads(options) if options else {},
+ }
+ completion_criteria_ = extra_fields["options"].get("completion_criteria", {})
+
+ # don't fill anything in if there is no completion_criteria, otherwise validation will fail
+ if completion_criteria_ and "learner_managed" not in completion_criteria_:
+ completion_criteria_.update(learner_managed=False)
+
+ if kind == content_kinds.EXERCISE:
+ randomize_sql = f"""
+ SELECT randomize, mastery_model
+ FROM {ASSESSMENTMETADATA_TABLE}
+ WHERE contentnode_id = ?
+ """
+ randomize, mastery_criteria_ = self.cursor.execute(
+ randomize_sql, (node_id,)
+ ).fetchone()
+ extra_fields["randomize"] = bool(randomize) if randomize else False
+ if mastery_criteria_:
+ mastery_criteria_ = json.loads(mastery_criteria_)
+ mastery_criteria_.update(mastery_model=mastery_criteria_.pop("type"))
+ completion_criteria_.update(
+ {
+ "model": completion_criteria.MASTERY,
+ "threshold": mastery_criteria_,
+ }
)
- contentnode.assessment_items.add(assessment_item)
- finally:
- shutil.rmtree(tempdir)
-
-
-def generate_assessment_item(assessment_id, order, assessment_type, assessment_data, download_url=None):
- """ generate_assessment_item: Generates a new assessment item
- Args:
- assessment_id (str): AssessmentItem.assessment_id value
- order (Number): AssessmentItem.order value
- assessment_type (str): AssessmentItem.type value
- assessment_data (dict): Extracted data from perseus file
- download_url (str): Domain to download files from
- Returns: models.AssessmentItem
- """
- assessment_item = models.AssessmentItem.objects.create(
- assessment_id=assessment_id,
- type=assessment_type,
- order=order
- )
- if assessment_type == exercises.PERSEUS_QUESTION:
- assessment_item.raw_data = json.dumps(assessment_data)
- else:
- # Parse questions
- assessment_data['question']['content'] = '\n\n'.join(assessment_data['question']['content'].split('\n\n')[:-1])
- assessment_item.question = process_content(assessment_data['question'], assessment_item, download_url=download_url)
-
- # Parse answers
- answer_data = assessment_data['question']['widgets'][ANSWER_FIELD_MAP[assessment_type]]['options']
- if assessment_type == exercises.INPUT_QUESTION:
- assessment_item.answers = json.dumps([
- {'answer': answer['value'], 'correct': True} for answer in answer_data['answers']
- ])
- else:
- assessment_item.answers = json.dumps([
- {'answer': process_content(answer, assessment_item, download_url=download_url), 'correct': answer['correct']}
- for answer in answer_data['choices']
- ])
- assessment_item.randomize = answer_data['randomize']
-
- # Parse hints
- assessment_item.hints = json.dumps([
- {'hint': process_content(hint, assessment_item, download_url=download_url)}
- for hint in assessment_data['hints']
- ])
-
- assessment_item.save()
- return assessment_item
-
-
-def process_content(data, assessment_item, download_url=None):
- """ process_content: Parses perseus text for special formatting (e.g. formulas, images)
- Args:
- data (dict): Perseus data to parse (e.g. parsing 'question' field)
- download_url (str): Domain to download files from
- assessment_item (models.AssessmentItem): assessment item to save images to
- Returns: models.AssessmentItem
- """
- data['content'] = data['content'].replace(' ', '') # Remove unrecognized non unicode characters
- # Process formulas
- for match in re.finditer(r'(\$[^\$☣]+\$)', data['content']):
- data['content'] = data['content'].replace(match.group(0), '${}$'.format(match.group(0)))
- # Process images
+ if completion_criteria_.get("model") == completion_criteria.MASTERY:
+ mastery_model = completion_criteria_.get("threshold", {}).get(
+ "mastery_model"
+ )
+ if mastery_model in [
+ mastery_criteria.DO_ALL,
+ mastery_criteria.NUM_CORRECT_IN_A_ROW_2,
+ mastery_criteria.NUM_CORRECT_IN_A_ROW_3,
+ mastery_criteria.NUM_CORRECT_IN_A_ROW_5,
+ mastery_criteria.NUM_CORRECT_IN_A_ROW_10,
+ ]:
+ # remove m,n values
+ completion_criteria_["threshold"] = {
+ "mastery_model": mastery_model,
+ }
+
+ extra_fields["options"].update(completion_criteria=completion_criteria_)
+ return extra_fields
+
+ def _retrieve_license(self, license_id):
+ """
+ Get license based on id from exported db
+
+ :param license_id: id of license on exported db
+ :return: license model matching the id and the associated license description
+ :rtype: tuple
+ """
+ # Handle no license being assigned
+ if license_id is None or license_id == "":
+ return None
+
+ # Return license that matches name
+ name, description = self.cursor.execute(
+ f"""
+ SELECT license_name, license_description
+ FROM {LICENSE_TABLE}
+ WHERE id = ?
+ """,
+ (license_id,),
+ ).fetchone()
+ return models.License.objects.get(license_name=name), description
+
+ def _create_files(self, contentnode):
+ """
+ Create and possibly download node files
+
+ :param contentnode: node file references
+ """
+ # Parse database for files referencing content node and make file models
+ sql_command = f"""
+ SELECT checksum, extension, file_size, contentnode_id, lang_id, preset, thumbnail
+ FROM {FILE_TABLE}
+ WHERE contentnode_id = ?;
+ """
+ query = self.cursor.execute(sql_command, (contentnode.node_id,)).fetchall()
+
+ for (
+ checksum,
+ extension,
+ file_size,
+ contentnode_id,
+ lang_id,
+ preset,
+ is_thumbnail,
+ ) in query:
+ filename = "{}.{}".format(checksum, extension)
- for match in re.finditer(r'!\[[^\]]*\]\((\$(\{☣ LOCALPATH\}\/images)\/([^\.]+\.[^\)]+))\)', data['content']):
- data['content'] = data['content'].replace(match.group(2), exercises.CONTENT_STORAGE_PLACEHOLDER)
- image_data = data['images'].get(match.group(1))
- if image_data and image_data.get('width'):
- data['content'] = data['content'].replace(match.group(3), '{} ={}x{}'.format(match.group(3), image_data['width'], image_data['height']))
+ try:
+ self._download_file(
+ filename,
+ contentnode=contentnode,
+ preset=preset,
+ file_size=file_size,
+ lang_id=lang_id,
+ is_thumbnail=is_thumbnail,
+ )
+ except IOError as e:
+ self.logger.warning(f"FAILED to download '{filename}': {str(e)}")
+ if e.errno:
+ sys.stderr.write(
+ f"Restoration Process Error: Failed to save file object {filename}: {os.strerror(e.errno)}"
+ )
+ continue
+
+ def _download_file(
+ self,
+ filename,
+ contentnode=None,
+ assessment_item=None,
+ preset=None,
+ file_size=None,
+ lang_id=None,
+ is_thumbnail=False,
+ ):
+ """
+ Create and possibly download a file from source instance and save to local storage
+
+ :param filename: the name of the file to download
+ :param contentnode: the associated content node
+ :param assessment_item: the associated assessment item
+ :param preset: the format preset for the file
+ :param file_size: the known size of the file
+ :param lang_id: the language ID of the file
+ :param is_thumbnail: whether the file is a thumbnail
+ """
+ checksum, extension = os.path.splitext(filename)
+ extension = extension.lstrip(".")
+ filepath = models.generate_object_storage_name(checksum, filename)
+
+ file_url = f"/content/storage/{filename[0]}/{filename[1]}/{filename}"
+ file_exists = False
+
+ # If the file already exists, get the size from the storage
+ if self.storage.exists(filepath):
+ file_size = file_size or self.storage.size(filepath)
+ file_exists = True
+ # if it needs downloading and if we were instructed to do so
+ elif self.download_content or (is_thumbnail and contentnode):
+ buffer = BytesIO()
+ response = self.client.get(file_url)
+ for chunk in response:
+ buffer.write(chunk)
+
+ if is_thumbnail and contentnode:
+ # If the file is a thumbnail, save it to the content node
+ contentnode.thumbnail_encoding = json.dumps(
+ {
+ "base64": get_thumbnail_encoding(filename, input_buffer=buffer),
+ "points": [],
+ "zoom": 0,
+ }
+ )
+ else:
+ checksum, _, filepath = write_raw_content_to_storage(
+ buffer.getvalue(), ext=extension
+ )
+ buffer.close()
+ file_exists = True
+ # otherwise, if file size is not known, get it from the response headers
+ elif not file_size:
+ response = self.client.head(file_url)
+ file_size = int(response.headers.get("Content-Length", 0))
+
+ # Save values to a new file object
+ file_obj = models.File(
+ file_format_id=extension,
+ file_size=file_size,
+ contentnode=contentnode,
+ assessment_item=assessment_item,
+ language_id=lang_id,
+ preset_id=preset or "",
+ checksum=checksum,
+ )
+ file_obj.file_on_disk.name = filepath
+ # set_by_file_on_disk: skip unless the file has been downloaded
+ file_obj.save(set_by_file_on_disk=file_exists)
+
+ def _create_tags(self, contentnode):
+ """
+ Create tags associated with node
+
+ :param contentnode: node tags reference
+ """
+ # Parse database for files referencing content node and make file models
+ sql_command = f"""
+ SELECT ct.tag_name
+ FROM {NODE_TAG_TABLE} cnt
+ JOIN {TAG_TABLE} ct ON cnt.contenttag_id = ct.id
+ WHERE cnt.contentnode_id = ?;
+ """
+ query = self.cursor.execute(sql_command, (contentnode.node_id,)).fetchall()
+
+ models.ContentTag.objects.bulk_create(
+ [
+ models.ContentTag(
+ tag_name=tag_name,
+ channel_id=self.target_id,
+ )
+ for tag_name in query
+ ],
+ ignore_conflicts=True,
+ )
- # Save files to db
- download_file(match.group(3), assessment_item=assessment_item, preset=format_presets.EXERCISE, download_url=download_url)
+ # Save tags to node
+ contentnode.tags.set(
+ models.ContentTag.objects.filter(
+ tag_name__in=query, channel_id=self.target_id
+ )
+ )
+ contentnode.save()
+
+ def _create_assessment_items(self, nodes):
+ """
+ Generate assessment items based on API data
+
+ :param nodes: nodes to lookup assessment items
+ """
+ # Note: there are several different IDs being used within this method
+ node_ids = [node.node_id for node in nodes]
+
+ if not self.token:
+ self.logger.warning(
+ f"Skipping assessment items for node(s) {','. join(node_ids)}"
+ )
+ return
+
+ # first obtain the remote nodes' IDs with the node ID and channel ID
+ node_channel_ids = f",{self.source_id},".join(node_ids)
+ nodes_response = self.client.get_with_token(
+ f"/api/contentnode?_node_id_channel_id___in={node_channel_ids},{self.source_id}"
+ )
+ if nodes_response.status_code != 200:
+ self.logger.warning(
+ f"Failed to obtain assessment items for node(s) {','. join(node_ids)}"
+ )
+ return
+
+ nodes_data = nodes_response.json()
+ remote_node_pks = [n["id"] for n in nodes_data] if nodes_data else None
+
+ if not remote_node_pks:
+ self.logger.warning(
+ f"No content node found for node(s) {','. join(node_ids)}"
+ )
+ return
+
+ # Get the content node's assessment items
+ assessment_response = self.client.get_with_token(
+ f"/api/assessmentitem?contentnode__in={','.join(remote_node_pks)}"
+ )
+ if assessment_response.status_code != 200:
+ self.logger.warning(
+ f"Failed to obtain assessment items for node(s) {','. join(node_ids)}"
+ )
+ return
+
+ assessment_items = assessment_response.json()
+ if not assessment_items:
+ self.logger.warning(
+ f"No assessment items found for node(s) {','. join(node_ids)}"
+ )
+ return
+
+ remote_node_pk_map = (
+ {n["node_id"]: n["id"] for n in nodes_data} if nodes_data else {}
+ )
- return data['content']
+ for local_node in nodes:
+ remote_contentnode_id = remote_node_pk_map.get(local_node.node_id)
+ reduced_assessment_items = [
+ item
+ for item in assessment_items
+ if item["contentnode"] == remote_contentnode_id
+ ]
+
+ if not reduced_assessment_items:
+ self.logger.warning(
+ f"No assessment items found for node {local_node.node_id}"
+ )
+ continue
+
+ for item in reduced_assessment_items:
+ assessment_item = models.AssessmentItem.objects.create(
+ assessment_id=item["assessment_id"],
+ type=item["type"],
+ order=item["order"],
+ question=item["question"],
+ answers=item["answers"],
+ hints=item["hints"],
+ raw_data=item["raw_data"],
+ source_url=item["source_url"],
+ randomize=item.get("randomize", False),
+ )
+ self._process_assessment_images(assessment_item)
+ local_node.assessment_items.add(assessment_item)
+ errors = local_node.mark_complete()
+ if errors:
+ self.logger.warning(f"Node {local_node.node_id} has errors: {errors}")
+ local_node.save()
+
+ def _process_assessment_images(self, assessment_item):
+ """
+ Process images in assessment items and save them to the database.
+
+ :param assessment_item: The assessment item to process.
+ """
+ if not self.download_content:
+ # Skip if not downloading content
+ return
+
+ for content in [
+ assessment_item.question,
+ assessment_item.answers,
+ assessment_item.hints,
+ ]:
+ for match in re.finditer(exercise_image_filename_regex, content):
+ # Save files to db
+ self._download_file(
+ match.group(3),
+ assessment_item=assessment_item,
+ preset=format_presets.EXERCISE,
+ )
diff --git a/contentcuration/contentcuration/utils/storage/__init__.py b/contentcuration/contentcuration/utils/storage/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/contentcuration/contentcuration/utils/storage/base.py b/contentcuration/contentcuration/utils/storage/base.py
new file mode 100644
index 0000000000..a78e54153f
--- /dev/null
+++ b/contentcuration/contentcuration/utils/storage/base.py
@@ -0,0 +1,97 @@
+from django.core.files.storage import Storage as BaseStorage
+
+
+class Storage(BaseStorage):
+ def writeable(self):
+ """
+ :rtype: bool
+ """
+ return True
+
+ def get_client(self):
+ """
+ :rtype: object
+ """
+ return None
+
+ def get_presigned_put_url(
+ self, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream"
+ ):
+ """
+ Creates a pre-signed URL for uploading files.
+
+ :param filepath: A string representing the destination file path inside the bucket
+ :param md5sum: A MD5 checksum of the file to be uploaded
+ :param lifetime_sec: The lifetime of the URL in seconds
+ :param mimetype: The content type of the file to be uploaded
+ :return: A pre-signed URL for uploading the file
+ """
+ raise NotImplementedError("Subclasses must implement this method")
+
+
+class CompositeStorage(Storage):
+ def __init__(self):
+ self.backends = []
+
+ def _get_writeable_backend(self):
+ """
+ :rtype: Storage
+ """
+ for backend in self.backends:
+ if backend.writeable:
+ return backend
+ raise AssertionError("No writeable backend found")
+
+ def _get_readable_backend(self, name):
+ """
+ :rtype: Storage
+ """
+ for backend in self.backends:
+ if backend.exists(name):
+ return backend
+ raise FileNotFoundError("{} not found".format(name))
+
+ def get_client(self):
+ return self._get_writeable_backend().get_client()
+
+ def open(self, name, mode="rb"):
+ return self._get_readable_backend(name).open(name, mode)
+
+ def save(self, name, content, max_length=None):
+ return self._get_writeable_backend().save(name, content, max_length=max_length)
+
+ def delete(self, name):
+ self._get_writeable_backend().delete(name)
+
+ def exists(self, name):
+ try:
+ self._get_readable_backend(name)
+ return True
+ except FileNotFoundError:
+ return False
+
+ def listdir(self, path):
+ # This method was not implemented on GoogleCloudStorage to begin with
+ raise NotImplementedError("listdir is not implemented for CompositeStorage")
+
+ def size(self, name):
+ return self._get_readable_backend(name).size(name)
+
+ def url(self, name):
+ return self._get_readable_backend(name).url(name)
+
+ def get_accessed_time(self, name):
+ return self._get_readable_backend(name).get_accessed_time(name)
+
+ def get_created_time(self, name):
+ return self._get_readable_backend(name).get_created_time(name)
+
+ def get_modified_time(self, name):
+ return self._get_readable_backend(name).get_modified_time(name)
+
+ def get_presigned_put_url(
+ self, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream"
+ ):
+ return self._get_writeable_backend().get_presigned_put_url(
+ filepath, md5sum, lifetime_sec, mimetype=mimetype
+ )
diff --git a/contentcuration/contentcuration/utils/storage/common.py b/contentcuration/contentcuration/utils/storage/common.py
new file mode 100644
index 0000000000..f48fd0da2d
--- /dev/null
+++ b/contentcuration/contentcuration/utils/storage/common.py
@@ -0,0 +1,66 @@
+import mimetypes
+import os
+
+from django.core.files.storage import default_storage
+
+from .base import CompositeStorage
+from .base import Storage
+
+
+# Do this to ensure that we infer mimetypes for files properly, specifically
+# zip file and epub files.
+# to add additional files add them to the mime.types file
+mimetypes.init([os.path.join(os.path.dirname(__file__), "mime.types")])
+
+
+class UnknownStorageBackendError(Exception):
+ pass
+
+
+def determine_content_type(filename):
+ """
+ Guesses the content type of a filename. Returns the mimetype of a file.
+
+ Returns "application/octet-stream" if the type can't be guessed.
+ Raises an AssertionError if filename is not a string.
+ """
+
+ typ, _ = mimetypes.guess_type(filename)
+
+ if not typ:
+ return "application/octet-stream"
+ return typ
+
+
+def get_presigned_upload_url(
+ filepath, md5sum_b64, lifetime_sec, storage=default_storage
+):
+ """
+ Return a presigned URL that can modify the given filepath through a PUT
+ request. Performing a PUT request on the returned URL changes the object's
+ contents with the contents of your PUT request.
+
+ :param: filepath: the file path inside the bucket, to the file.
+ :param: md5sum_b64: the base64 encoded md5 hash of the file. The holder of the URL will
+ have to set a Content-MD5 HTTP header matching this md5sum once it
+ initiates the download.
+ :param: lifetime_sec: the lifetime of the generated upload url, in seconds.
+
+ :returns: a dictionary containing 2 keys:
+ mimetype: the mimetype that will be required to send as part of the file upload's mimetype header
+ uploadURL: the URL to upload the file to.
+
+ :raises: :class:`UnknownStorageBackendError`: If the storage backend is not S3 or GCS.
+ """
+ mimetype = determine_content_type(filepath)
+
+ if isinstance(storage, (Storage, CompositeStorage)):
+ upload_url = storage.get_presigned_put_url(
+ filepath, md5sum_b64, lifetime_sec, mimetype=mimetype
+ )
+ else:
+ raise UnknownStorageBackendError(
+ "Please ensure your storage backend is either Google Cloud Storage or S3 Storage!"
+ )
+
+ return {"mimetype": mimetype, "uploadURL": upload_url}
diff --git a/contentcuration/contentcuration/utils/storage/dev.py b/contentcuration/contentcuration/utils/storage/dev.py
new file mode 100644
index 0000000000..7e77a6e305
--- /dev/null
+++ b/contentcuration/contentcuration/utils/storage/dev.py
@@ -0,0 +1,53 @@
+from django.conf import settings
+from django_s3_storage.storage import S3Storage
+from google.cloud.storage import Client
+
+from contentcuration.utils.storage.base import CompositeStorage as BaseCompositeStorage
+from contentcuration.utils.storage.base import Storage as BaseStorage
+from contentcuration.utils.storage.gcs import GoogleCloudStorage
+
+
+class Storage(S3Storage, BaseStorage):
+ def get_client(self):
+ """
+ :rtype: botocore.client.BaseClient
+ """
+ return self.s3_connection
+
+ def get_presigned_put_url(self, filepath, md5sum, lifetime_sec, mimetype=None):
+ """
+ Creates a pre-signed URL for development storage backends
+
+ Note that since our production object storage backend is GCS, we do not enforce or require
+ any Content-MD5 value.
+
+ :param: filepath: the file path inside the bucket that the user can PUT their object.
+ :param: md5sum: the base64-encoded MD5sum of the object the user is planning to PUT.
+ This is ignored for this function and added solely to maintain API compatibility with other
+ private presigned URL functions.
+ :param: lifetime_sec: how long before the presigned URL expires, in seconds.
+ :param: mimetype: the content type of the file to be uploaded
+ :return: A pre-signed URL for uploading the file
+ """
+ # S3's PUT Object parameters:
+ # https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html
+ method = "put_object"
+ fields = {
+ "Bucket": settings.AWS_S3_BUCKET_NAME,
+ "Key": filepath,
+ }
+
+ return self.get_client().generate_presigned_url(
+ ClientMethod=method,
+ Params=fields,
+ ExpiresIn=lifetime_sec,
+ )
+
+
+class CompositeStorage(BaseCompositeStorage):
+ def __init__(self):
+ super(CompositeStorage, self).__init__()
+ self.backends.append(Storage())
+ self.backends.append(
+ GoogleCloudStorage(Client.create_anonymous_client(), "studio-content")
+ )
diff --git a/contentcuration/contentcuration/utils/gcs_storage.py b/contentcuration/contentcuration/utils/storage/gcs.py
similarity index 76%
rename from contentcuration/contentcuration/utils/gcs_storage.py
rename to contentcuration/contentcuration/utils/storage/gcs.py
index 9ec21a3886..38e0347d2d 100644
--- a/contentcuration/contentcuration/utils/gcs_storage.py
+++ b/contentcuration/contentcuration/utils/storage/gcs.py
@@ -1,16 +1,19 @@
import logging
import tempfile
+from datetime import timedelta
from gzip import GzipFile
from io import BytesIO
import backoff
from django.conf import settings
from django.core.files import File
-from django.core.files.storage import Storage
from google.cloud.exceptions import InternalServerError
from google.cloud.storage import Client
from google.cloud.storage.blob import Blob
+from contentcuration.utils.storage.base import CompositeStorage
+from contentcuration.utils.storage.base import Storage
+
OLD_STUDIO_STORAGE_PREFIX = "/contentworkshop_content/"
CONTENT_DATABASES_MAX_AGE = 5 # seconds
@@ -18,7 +21,9 @@
MAX_RETRY_TIME = 60 # seconds
-def _create_default_client(service_account_credentials_path=settings.GCS_STORAGE_SERVICE_ACCOUNT_KEY_PATH):
+def _create_default_client(
+ service_account_credentials_path=settings.GCS_STORAGE_SERVICE_ACCOUNT_KEY_PATH,
+):
if service_account_credentials_path:
return Client.from_service_account_json(service_account_credentials_path)
return Client()
@@ -120,7 +125,8 @@ def save(self, name, fobj, max_length=None, blob_object=None):
# determine the current file's mimetype based on the name
# import determine_content_type lazily in here, so we don't get into an infinite loop with circular dependencies
- from contentcuration.utils.storage_common import determine_content_type
+ from contentcuration.utils.storage.common import determine_content_type
+
content_type = determine_content_type(name)
# force the current file to be at file location 0, to
@@ -132,7 +138,8 @@ def save(self, name, fobj, max_length=None, blob_object=None):
return name
blob.upload_from_file(
- fobj, content_type=content_type,
+ fobj,
+ content_type=content_type,
)
# Close StringIO object and discard memory buffer if created
@@ -211,67 +218,46 @@ def _is_file_empty(fobj):
fobj.seek(current_location)
return len(byt) == 0
-
-class CompositeGCS(Storage):
- def __init__(self):
- self.backends = []
- self.backends.append(GoogleCloudStorage(_create_default_client(), settings.AWS_S3_BUCKET_NAME))
- # Only add the studio-content bucket (the production bucket) if we're not in production
- if settings.SITE_ID != settings.PRODUCTION_SITE_ID:
- self.backends.append(GoogleCloudStorage(Client.create_anonymous_client(), "studio-content"))
-
- def _get_writeable_backend(self):
+ def get_presigned_put_url(
+ self, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream"
+ ):
"""
- :rtype: GoogleCloudStorage
- """
- for backend in self.backends:
- if backend.writeable:
- return backend
- raise AssertionError("No writeable backend found")
+ Creates a pre-signed URL for GCS.
- def _get_readable_backend(self, name):
- """
- :rtype: GoogleCloudStorage
+ :param filepath: A string representing the destination file path inside the bucket
+ :param md5sum: A MD5 checksum of the file to be uploaded
+ :param lifetime_sec: The lifetime of the URL in seconds
+ :param mimetype: The content type of the file to be uploaded
+ :return: A pre-signed URL for uploading the file
"""
- for backend in self.backends:
- if backend.exists(name):
- return backend
- raise FileNotFoundError("{} not found".format(name))
-
- def get_client(self):
- return self._get_writeable_backend().get_client()
-
- def open(self, name, mode='rb'):
- return self._get_readable_backend(name).open(name, mode)
-
- def save(self, name, content, max_length=None):
- return self._get_writeable_backend().save(name, content, max_length=max_length)
-
- def delete(self, name):
- self._get_writeable_backend().delete(name)
-
- def exists(self, name):
- try:
- self._get_readable_backend(name)
- return True
- except FileNotFoundError:
- return False
-
- def listdir(self, path):
- # This method was not implemented on GoogleCloudStorage to begin with
- raise NotImplementedError("listdir is not implemented for CompositeGCS")
-
- def size(self, name):
- return self._get_readable_backend(name).size(name)
-
- def url(self, name):
- return self._get_readable_backend(name).url(name)
-
- def get_accessed_time(self, name):
- return self._get_readable_backend(name).get_accessed_time(name)
+ blob_obj = self.bucket.blob(filepath)
+
+ # ensure the md5sum doesn't have any whitespace, including newlines.
+ # We should do the same whitespace stripping as well on any client that actually
+ # uses the returned presigned url.
+ md5sum_stripped = md5sum.strip()
+
+ # convert the lifetime to a timedelta, so gcloud library will interpret the lifetime
+ # as the seconds from right now. If we use an absolute integer, it's the number of seconds
+ # from unix time
+ lifetime_timedelta = timedelta(seconds=lifetime_sec)
+
+ return blob_obj.generate_signed_url(
+ method="PUT",
+ content_md5=md5sum_stripped,
+ content_type=mimetype,
+ expiration=lifetime_timedelta,
+ )
- def get_created_time(self, name):
- return self._get_readable_backend(name).get_created_time(name)
- def get_modified_time(self, name):
- return self._get_readable_backend(name).get_modified_time(name)
+class CompositeGCS(CompositeStorage):
+ def __init__(self):
+ super(CompositeGCS, self).__init__()
+ self.backends.append(
+ GoogleCloudStorage(_create_default_client(), settings.AWS_S3_BUCKET_NAME)
+ )
+ # Only add the studio-content bucket (the production bucket) if we're not in production
+ if settings.SITE_ID != settings.PRODUCTION_SITE_ID:
+ self.backends.append(
+ GoogleCloudStorage(Client.create_anonymous_client(), "studio-content")
+ )
diff --git a/contentcuration/contentcuration/utils/storage_common.py b/contentcuration/contentcuration/utils/storage_common.py
deleted file mode 100644
index f2ba6e3188..0000000000
--- a/contentcuration/contentcuration/utils/storage_common.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import mimetypes
-import os
-from datetime import timedelta
-
-from django.conf import settings
-from django.core.files.storage import default_storage
-from django_s3_storage.storage import S3Storage
-
-from .gcs_storage import CompositeGCS
-from .gcs_storage import GoogleCloudStorage
-
-
-# Do this to ensure that we infer mimetypes for files properly, specifically
-# zip file and epub files.
-# to add additional files add them to the mime.types file
-mimetypes.init([os.path.join(os.path.dirname(__file__), "mime.types")])
-
-
-class UnknownStorageBackendError(Exception):
- pass
-
-
-def determine_content_type(filename):
- """
- Guesses the content type of a filename. Returns the mimetype of a file.
-
- Returns "application/octet-stream" if the type can't be guessed.
- Raises an AssertionError if filename is not a string.
- """
-
- typ, _ = mimetypes.guess_type(filename)
-
- if not typ:
- return "application/octet-stream"
- return typ
-
-
-def get_presigned_upload_url(
- filepath, md5sum_b64, lifetime_sec, content_length, storage=default_storage, client=None
-):
- """Return a presigned URL that can modify the given filepath through a PUT
- request. Performing a PUT request on the returned URL changes the object's
- contents with the contents of your PUT request.
-
- :param: filepath: the file path inside the bucket, to the file.
- :param: md5sum_b64: the base64 encoded md5 hash of the file. The holder of the URL will
- have to set a Content-MD5 HTTP header matching this md5sum once it
- initiates the download.
- :param: lifetime_sec: the lifetime of the generated upload url, in seconds.
- :param: content_length: the size of the content, in bytes.
- :param: client: the storage client that will be used to gennerate the presigned URL.
- This must have an API that's similar to either the GCS client or the boto3 client.
-
- :returns: a dictionary containing 2 keys:
- mimetype: the mimetype that will be required to send as part of the file upload's mimetype header
- uploadURL: the URL to upload the file to.
-
- :raises: :class:`UnknownStorageBackendError`: If the storage backend is not S3 or GCS.
- """
-
- # Aron: note that content_length is not used right now because
- # both storage types are having difficulties enforcing it.
-
- mimetype = determine_content_type(filepath)
- if isinstance(storage, (GoogleCloudStorage, CompositeGCS)):
- client = client or storage.get_client()
- bucket = settings.AWS_S3_BUCKET_NAME
- upload_url = _get_gcs_presigned_put_url(client, bucket, filepath, md5sum_b64, lifetime_sec, mimetype=mimetype)
- elif isinstance(storage, S3Storage):
- bucket = settings.AWS_S3_BUCKET_NAME
- client = client or storage.s3_connection
- upload_url = _get_s3_presigned_put_url(client, bucket, filepath, md5sum_b64, lifetime_sec)
- else:
- raise UnknownStorageBackendError(
- "Please ensure your storage backend is either Google Cloud Storage or S3 Storage!"
- )
-
- return {
- "mimetype": mimetype,
- "uploadURL": upload_url
- }
-
-
-def _get_gcs_presigned_put_url(gcs_client, bucket, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream"):
- bucket_obj = gcs_client.get_bucket(bucket)
- blob_obj = bucket_obj.blob(filepath)
-
- # ensure the md5sum doesn't have any whitespace, including newlines.
- # We should do the same whitespace stripping as well on any client that actually
- # uses the returned presigned url.
- md5sum_stripped = md5sum.strip()
-
- # convert the lifetime to a timedelta, so gcloud library will interpret the lifetime
- # as the seconds from right now. If we use an absolute integer, it's the number of seconds
- # from unix time
- lifetime_timedelta = timedelta(seconds=lifetime_sec)
-
- url = blob_obj.generate_signed_url(
- method="PUT",
- content_md5=md5sum_stripped,
- content_type=mimetype,
- expiration=lifetime_timedelta,
- )
-
- return url
-
-
-def _get_s3_presigned_put_url(s3_client, bucket, filepath, md5sum, lifetime_sec):
- """
- Creates a pre-signed URL for S3-like backends, e.g. Minio.
-
- Note that since our production object storage backend is GCS, we do not enforce or require
- any Content-MD5 value.
-
- :param: s3_client: an initialized S3 client. We will use this to create the presigned PUT url.
- :param: bucket: the bucket where the user can PUT their object.
- :param: filepath: the file path inside the bucket that the user can PUT their object.
- :param: md5sum: the base64-encoded MD5sum of the object the user is planning to PUT.
- This is ignored for this function and added solely to maintain API compatibility with other
- private presigned URL functions.
- :param: lifetime_sec: how long before the presigned URL expires, in seconds.
- """
- # S3's PUT Object parameters:
- # https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html
- method = "put_object"
- fields = {
- "Bucket": bucket,
- "Key": filepath,
- }
-
- response = s3_client.generate_presigned_url(
- ClientMethod=method,
- Params=fields,
- ExpiresIn=lifetime_sec,
- )
-
- return response
diff --git a/contentcuration/contentcuration/viewsets/file.py b/contentcuration/contentcuration/viewsets/file.py
index f73f0557f2..35ba9157f5 100644
--- a/contentcuration/contentcuration/viewsets/file.py
+++ b/contentcuration/contentcuration/viewsets/file.py
@@ -18,7 +18,7 @@
from contentcuration.models import generate_storage_url
from contentcuration.utils.cache import ResourceSizeCache
from contentcuration.utils.sentry import report_exception
-from contentcuration.utils.storage_common import get_presigned_upload_url
+from contentcuration.utils.storage.common import get_presigned_upload_url
from contentcuration.utils.user import calculate_user_storage
from contentcuration.viewsets.base import BulkDeleteMixin
from contentcuration.viewsets.base import BulkListSerializer
@@ -236,7 +236,7 @@ def upload_url(self, request):
codecs.decode(checksum, "hex"), "base64"
).decode()
retval = get_presigned_upload_url(
- filepath, checksum_base64, 600, content_length=size
+ filepath, checksum_base64, 600
)
file = File(
diff --git a/docker-compose.yml b/docker-compose.yml
index e1b1e35c6f..ae83fbb645 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,5 +1,3 @@
-version: '3.4'
-
x-studio-environment:
&studio-environment
MPLBACKEND: ps
@@ -14,7 +12,8 @@ x-studio-environment:
CELERY_BROKER_ENDPOINT: redis
CELERY_RESULT_BACKEND_ENDPOINT: redis
CELERY_REDIS_PASSWORD: ""
- PROBER_STUDIO_BASE_URL: http://studio-app:8080/{path}
+ PROBER_STUDIO_BASE_URL: http://studio-app:8081/{path}
+ WEBPACK_DEV_HOST: 0.0.0.0
x-studio-worker:
&studio-worker
@@ -36,10 +35,7 @@ services:
build:
context: .
dockerfile: k8s/images/nginx/Dockerfile
- ports:
- - "8081:8080"
- depends_on:
- - studio-app
+ network_mode: host
environment: *studio-environment
studio-app:
@@ -47,7 +43,7 @@ services:
entrypoint: python docker/entrypoint.py
command: pnpm run devserver
ports:
- - "8080:8080"
+ - "8081:8081"
- "4000:4000"
celery-worker:
@@ -71,6 +67,8 @@ services:
build:
context: ./docker
dockerfile: Dockerfile.postgres.dev
+ ports:
+ - "5432:5432"
environment:
PGDATA: /var/lib/postgresql/data/pgdata
POSTGRES_USER: learningequality
@@ -82,6 +80,8 @@ services:
redis:
image: redis:6.0.9
+ ports:
+ - "6379:6379"
cloudprober:
<<: *studio-worker
diff --git a/docs/_index.md b/docs/_index.md
index c3e67006ba..03a5e49aea 100644
--- a/docs/_index.md
+++ b/docs/_index.md
@@ -2,12 +2,11 @@
## Local development guides
-- [Local development instructions: With Docker (recommended)](./local_dev_docker.md)
-- [Local development instructions: Run everything on your host machine](./local_dev_host.md)
+- [Local development instructions](./local_dev.md)
- [Local development tools](./dev_tools.md)
- [Running tests](./running_tests.md)
- [Adding or updating dependencies](./dependencies.md)
-- [Preparing a pull request](../pull_requests.md)
+- [Preparing a pull request](./pull_requests.md)
## Additional development tools
diff --git a/docs/host_services_setup.md b/docs/host_services_setup.md
index 451c3b3c95..528c5e4442 100644
--- a/docs/host_services_setup.md
+++ b/docs/host_services_setup.md
@@ -1,16 +1,10 @@
-# Local development instructions: run everything on your host machine
+# Supplemental instructions for host services
-This guide will walk through setting up Kolibri Studio for local development, where you'll run Studio's Python apps and all of Studio's services on your host machine, without the need for docker.
+This guide is a supplement to Kolibri Studio's [local development instructions](./local_dev.md) and provides additional notes and instructions for setting up Kolibri Studio's services manually.
## Prerequisites
-For detailed instructions on installing and configuring Volta, pyenv, and pyenv-virtualenv, please see the [Prerequisites](./local_dev_host.md#prerequisites) section in our Local Development with host guide.
## Install system dependencies and services
-Studio requires some background services to be running:
-
-* Minio - a local S3 storage emulation
-* PostgreSQL - a relational database
-* Redis - a fast key/value store useful for caching
### Ubuntu or Debian
```bash
@@ -39,14 +33,7 @@ brew link --force imagemagick@6
## Set up the database
-Make sure postgres is running:
-
-```bash
-service postgresql start
-# alternatively: pg_ctl -D /usr/local/var/postgresql@16 start
-```
-
-Start the client with:
+Once you've started postgres, access the postgres client with:
```bash
sudo su postgres # switch to the postgres account
@@ -70,26 +57,3 @@ Press Ctrl+D to exit the `psql` client. Finally
```bash
exit # leave the postgres account
```
-
-## Build your python virtual environment
-For complete instructions on installing Python 3.10.13, creating and activating the virtual environment, and installing Studio’s Python dependencies, please refer to the [Build Your Python Virtual Environment](./local_dev_host.md#build-your-python-virtual-environment) section in our Local Development with host guide.
-
-### A note about `psycopg2`
-The packages `postgresql-16`, `postgresql-contrib`, and `postgresql-server-dev-all` are required to build `psycopg2` python driver.
-
-### A note about dependencies on Apple Silicon M1+
-If you run into an error with `pip install` related to the `grcpio` package, it is because it currently [does not support M1 with the version for `grcpio` Studio uses](https://github.com/grpc/grpc/issues/25082). In order to fix it, you will need to add the following environmental variables before running `pip install`:
-```bash
-export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
-export GRPC_PYTHON_BUILD_SYSTEM_ZLIB=1
-export CFLAGS="-I/opt/homebrew/opt/openssl/include"
-export LDFLAGS="-L/opt/homebrew/opt/openssl/lib"
-```
-
-## Install frontend dependencies
-For guidance on installing Node 18.X, pnpm, and all required frontend dependencies, running the services, initializing Studio, and running the development server , please refer to the [Install Frontend Dependencies](./local_dev_host.md#install-frontend-dependencies) section in our Local Development with host guide.
-
-Either of the above commands will take a few minutes to build the frontend. When it's done, you can sign in with the account created by the `pnpm run devsetup` command:
-- url: `http://localhost:8080/accounts/login/`
-- username: `a@a.com`
-- password: `a`
diff --git a/docs/local_dev.md b/docs/local_dev.md
new file mode 100644
index 0000000000..520270bc61
--- /dev/null
+++ b/docs/local_dev.md
@@ -0,0 +1,157 @@
+# Local development instructions
+
+The following guide follows the preferred method of running Kolibri Studio locally for development purposes. This guide allows you to run all of Studio's services in docker containers, and the python and webpack servers on your host.
+
+**Note:** If you are developing on Windows or would rather take a manual approach to installation, please see the supplemental documentation in the [WSL setup guide](./local_dev_wsl.md) and [host services setup guide](./host_services_setup.md).
+
+## Prerequisites
+The following tools are required to run Studio locally:
+- [git](https://git-scm.com/)
+- [docker engine](https://docs.docker.com/engine/install/) (community edition)
+- [docker Compose](https://docs.docker.com/compose/install/)
+- [volta](https://docs.volta.sh/guide/getting-started)
+- [pyenv](https://kolibri-dev.readthedocs.io/en/develop/howtos/installing_pyenv.html) and [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv#installation)
+
+## Build your python virtual environment
+Once you've cloned the repository, you can set up a python virtual environment using `pyenv` and `pyenv-virtualenv`. This is the recommended way to manage your python versions and virtual environments.
+
+To determine what version of Python studio needs, you can check the `runtime.txt` file:
+```bash
+$ cat runtime.txt
+# This is the required version of Python to run Studio currently.
+# This is determined by the default Python 3 version that is installed
+# inside Ubuntu Bionic, which is used to build images for Studio.
+# We encode it here so that it can be picked up by Github's dependabot
+# to manage automated package upgrades.
+python-3.10.13
+```
+
+So to install python 3.10.13 through `pyenv` and set up a virtual environment:
+```bash
+pyenv install 3.10.13
+pyenv virtualenv 3.10.13 studio-py3.10
+pyenv activate studio-py3.10
+```
+
+Now you may install Studio's Python dependencies:
+```bash
+pip install -r requirements.txt -r requirements-dev.txt
+```
+
+To deactivate the virtual environment, when you're finished developing on Studio for the time being:
+```bash
+pyenv deactivate
+```
+
+### A note about `psycopg2`
+The packages `postgresql-16`, `postgresql-contrib`, and `postgresql-server-dev-all` may be required to build the `psycopg2` python driver.
+
+### A note about dependencies on Apple Silicon M1+
+If you run into an error with `pip install` related to the `grcpio` package, it is because it currently [does not support M1 with the version for `grcpio` Studio uses](https://github.com/grpc/grpc/issues/25082). In order to fix it, you will need to add the following environmental variables before running `pip install`:
+```bash
+export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
+export GRPC_PYTHON_BUILD_SYSTEM_ZLIB=1
+export CFLAGS="-I/opt/homebrew/opt/openssl/include"
+export LDFLAGS="-L/opt/homebrew/opt/openssl/lib"
+```
+
+## Install frontend dependencies
+The project requires `Node 18.X` as the runtime and `pnpm` as the package manager. You can make use of [`Volta`](https://docs.volta.sh/guide/getting-started) to manage the nodejs version, and pnpm with a few extra steps.
+
+### Volta installation of pnpm
+If you have Volta installed, you can use it to install pnpm:
+```bash
+export VOLTA_FEATURE_PNPM=1
+volta install pnpm
+```
+
+### Corepack installation of pnpm
+If you have Corepack installed, you can use it to install pnpm:
+```bash
+corepack enable
+corepack install
+```
+
+### Completing the installation
+Once `pnpm` is installed, you can install all the dependencies by running:
+```bash
+pnpm install
+```
+
+## Install and run services
+
+Studio requires some background services to be running:
+
+* Minio - a local storage emulation
+* PostgreSQL (postgres) - a relational database
+* Redis - a fast key/value store useful for caching
+* Celery - the task manager and executor, which relies on the Studio codebase
+
+Before starting the services, you'll want to make sure any other services that may be using the same ports are stopped. For example, if you have a local postgres server running, you'll want to stop it before starting the docker-based services.
+
+Generally speaking, you'll want to open a separate terminal/terminal-tab to run the services. With docker and docker-compose installed, running the above services is as easy as:
+```bash
+make devrun-services
+```
+
+The above command may take longer the first time it's run. It includes starting the `celery` workers (please see 'Running the celery service' section below). You may use the following commands to start the services and the celery workers separately:
+
+```bash
+make dcservicesup
+make devceleryworkers
+```
+
+To confirm that docker-based services are running, you should see three or more containers when executing `docker ps`. For example:
+
+```bash
+> docker ps
+CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
+e09c5c203b93 redis:6.0.9 "docker-entrypoint.s…" 51 seconds ago Up 49 seconds 0.0.0.0:6379->6379/tcp studio_vue-refactor_redis_1
+6164371efb6b minio/minio "minio server /data" 51 seconds ago Up 49 seconds 0.0.0.0:9000->9000/tcp studio_vue-refactor_minio_1
+c86bbfa3a59e postgres:12.10 "docker-entrypoint.s…" 51 seconds ago Up 49 seconds 0.0.0.0:5432->5432/tcp studio_vue-refactor_postgres_1
+```
+
+To stop the services, press Ctrl + C in the terminal where you ran `make devrun-services` (or `dcservicesup`). Once you've done that, you may run the following command to remove the docker containers (they will be recreated when you run `devrun-services` or `dcservicesup` again):
+```bash
+make dcservicesdown
+```
+
+Lastly, the volumes used by minio and postgres are not removed when you run `dcservicesdown`. If you want to remove them, you can run the following command:
+```bash
+make dcclean
+```
+
+## Initializing Studio
+With the services running, in a separate terminal/terminal-tab, we can now initialize the database for Studio development purposes. The command below will initialize the database tables, import constants, enable required postgres extensions and a studio user account for development:
+```bash
+make devrun-setup
+```
+
+## Running the development server
+With the services running, in a separate terminal/terminal-tab, and the database initialized, we can start the dev server:
+```bash
+make devrun-server-hot # with Vue hot module reloading
+# or
+make devrun-server # without hot module reloading
+```
+
+### Running within docker
+If you want to run the development server within docker, you can use the following command:
+```bash
+make dcup
+```
+
+Either of the above commands will take a few moments to build the frontend. When it finishes, you can sign in with the account created by the `make devrun-setup` command:
+- url: `http://localhost:8080/accounts/login/`
+- username: `a@a.com`
+- password: `a`
+
+## Running the celery service
+Studio uses `celery` for executing asynchronous tasks, which are integral to Studio's channel editing architecture. The celery service does not reload when there are Python changes like the Django devserver does, so it's often preferred to run it separately. If you are developing changes against a task or the celery configuration, you'll need to use `make dcservicesup` to run only the docker-based services.
+
+In a separate terminal/terminal-tab, run the following to start the service and press Ctrl + C to stop it:
+```bash
+make devceleryworkers
+```
+
+Stop and restart the above to reload your changes.
diff --git a/docs/local_dev_docker.md b/docs/local_dev_docker.md
deleted file mode 100644
index ccd3cad729..0000000000
--- a/docs/local_dev_docker.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# Local development instructions: With Docker (recommended)
-
-The following guide utilizes docker and docker-compose to run select services required for Studio to function. It's our recommended setup. However, if you would rather install these services on your host, please follow the [host-setup guide](./local_dev_host.md).
-
-**Note:** If you are developing on Windows, it is recommended to use WSL (Windows Subsystem for Linux). Please follow the [WSL setup guide](./local_dev_wsl.md) for detailed instructions.
-
-## Prerequisites
-For detailed instructions on installing and configuring Volta, pyenv, and pyenv-virtualenv, please see the [Prerequisites](./local_dev_host.md#prerequisites) section in our Local Development with host guide.
-
-## Build your python virtual environment
-For complete instructions on installing Python 3.10.13, creating and activating the virtual environment, and installing Studio’s Python dependencies, please refer to the [Build Your Python Virtual Environment](./local_dev_host.md#build-your-python-virtual-environment) section in our Local Development with host guide.
-
-
-### A note about dependencies on Apple Silicon M1+
-If you run into an error with `pip install` related to the `grcpio` package, it is because it currently [does not support M1 with the version for `grcpio` Studio uses](https://github.com/grpc/grpc/issues/25082). In order to fix it, you will need to add the following environmental variables before running `pip install`:
-```bash
-export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
-export GRPC_PYTHON_BUILD_SYSTEM_ZLIB=1
-export CFLAGS="-I/opt/homebrew/opt/openssl/include"
-export LDFLAGS="-L/opt/homebrew/opt/openssl/lib"
-```
-
-## Install frontend dependencies
-The project requires `Node 18.X` as the runtime and `pnpm` as the package manager. We make use of [`Volta`](https://docs.volta.sh/guide/getting-started) to manage the same automatically. Please make sure you have volta installed and your shell configured to use volta. You can then install all the dependencies by running:
-```bash
-corepack use pnpm # or `volta install pnpm`
-pnpm install
-```
-
-## Install and run services
-
-Studio requires some background services to be running:
-
-* Minio - a local S3 storage emulation
-* PostgreSQL (postgres) - a relational database
-* Redis - a fast key/value store useful for caching
-* Celery - the task manager and executor, which relies on the Studio codebase
-
-Generally speaking, you'll want to open a separate terminal/terminal-tab to run the services. With docker and docker-compose installed, running the above services is as easy as:
-```bash
-make run-services
-```
-
-The above command may take longer the first time it's run. It includes starting the `celery` workers, and the other dependent services through docker, which can be done separately with the following two commands:
-
-```bash
-make dcservicesup
-make devceleryworkers
-```
-
-To confirm that docker-based services are running, you should see three containers when executing `docker ps`. For example:
-
-```bash
-> docker ps
-CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
-e09c5c203b93 redis:6.0.9 "docker-entrypoint.s…" 51 seconds ago Up 49 seconds 0.0.0.0:6379->6379/tcp studio_vue-refactor_redis_1
-6164371efb6b minio/minio "minio server /data" 51 seconds ago Up 49 seconds 0.0.0.0:9000->9000/tcp studio_vue-refactor_minio_1
-c86bbfa3a59e postgres:12.10 "docker-entrypoint.s…" 51 seconds ago Up 49 seconds 0.0.0.0:5432->5432/tcp studio_vue-refactor_postgres_1
-```
-
-To stop the services, press Ctrl + C in the terminal where you ran `make run-services` (or `dcservicesup`). Once you've done that, you may run the following command to remove the docker containers (they will be recreated when you run `run-services` or `dcservicesup` again):
-```bash
-make dcservicesdown
-```
-
-## Initializing Studio
-With the services running, in a separate terminal/terminal-tab, we can now initialize the database for Studio development purposes. The command below will initialize the database tables, import constants, enable required postgres extensions and a studio user account for development:
-```bash
-pnpm run devsetup
-```
-
-## Running the development server
-With the services running, in a separate terminal/terminal-tab, and the database initialized, we can start the dev server:
-```bash
-pnpm run devserver:hot # with Vue hot module reloading
-# or
-pnpm run devserver # without hot module reloading
-```
-
-Either of the above commands will take a few moments to build the frontend. When it finishes, you can sign in with the account created by the `pnpm run devsetup` command:
-- url: `http://localhost:8080/accounts/login/`
-- username: `a@a.com`
-- password: `a`
-
-## Running the celery service
-Studio uses `celery` for executing asynchronous tasks, which are integral to Studio's channel editing architecture. The celery service does not reload when there are Python changes like the Django devserver does, so it's often preferred to run it separately. If you are developing changes against a task or the celery configuration, you'll need to use `make dcservicesup` to run only the docker-based services.
-
-In a separate terminal/terminal-tab, run the following to start the service and press Ctrl + C to stop it:
-```bash
-make devceleryworkers
-```
-
-Stop and restart the above to reload your changes.
diff --git a/docs/local_dev_host.md b/docs/local_dev_host.md
deleted file mode 100644
index 17e6243761..0000000000
--- a/docs/local_dev_host.md
+++ /dev/null
@@ -1,146 +0,0 @@
-# Local development instructions: Run everything on your host machine
-
-This guide will walk through setting up Kolibri Studio for local development, where you'll run Studio's Python apps and all of Studio's services on your host machine, without the need for docker.
-
-**Note:** If you are developing on Windows, it is recommended to use WSL (Windows Subsystem for Linux). Please follow the [WSL setup guide](./local_dev_wsl.md) for detailed instructions.
-
-## Prerequisites
-- [volta](https://docs.volta.sh/guide/getting-started)
-- [pyenv](https://kolibri-dev.readthedocs.io/en/develop/howtos/installing_pyenv.html) and [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv#installation)
-
-## Install system dependencies and services
-Studio requires some background services to be running:
-
-* Minio - a local S3 storage emulation
-* PostgreSQL - a relational database
-* Redis - a fast key/value store useful for caching
-
-### Ubuntu or Debian
-```bash
-# Install packages
-sudo apt-get install -y python-tk \
- postgresql-server-dev-all postgresql-contrib postgresql-client postgresql-16 \
- ffmpeg libmagickwand-dev redis-server wkhtmltopdf
-
-# Install minio
-wget https://dl.minio.io/server/minio/release/linux-amd64/minio -O bin/minio
-sudo chmod +x bin/minio
-```
-
-### Mac OS
-```bash
-brew install postgresql@16 redis ffmpeg imagemagick@6 gs
-# note, this version of minio may not be compatible with Studio
-brew install minio/stable/minio
-brew link --force postgresql@16
-brew link --force imagemagick@6
-```
-
-### Windows
-
-Windows is no longer supported due to incompatibilities with some required packages.
-
-## Set up the database
-
-Make sure postgres is running:
-
-```bash
-service postgresql start
-# alternatively: pg_ctl -D /usr/local/var/postgresql@16 start
-```
-
-Start the client with:
-
-```bash
-sudo su postgres # switch to the postgres account
-psql # mac: psql postgres
-```
-
-Create a database user with username `learningequality` and password `kolibri`:
-
-```sql
-CREATE USER learningequality with NOSUPERUSER INHERIT NOCREATEROLE CREATEDB LOGIN NOREPLICATION NOBYPASSRLS PASSWORD 'kolibri';
- ```
-
-Create a database called `kolibri-studio`:
-
-```sql
-CREATE DATABASE "kolibri-studio" WITH TEMPLATE = template0 ENCODING = "UTF8" OWNER = "learningequality";
-```
-
-Press Ctrl+D to exit the `psql` client. Finally
-
-```bash
-exit # leave the postgres account
-```
-
-## Build your python virtual environment
-To determine what version of Python studio needs, you can check the `runtime.txt` file:
-```bash
-$ cat runtime.txt
-# This is the required version of Python to run Studio currently.
-# This is determined by the default Python 3 version that is installed
-# inside Ubuntu Bionic, which is used to build images for Studio.
-# We encode it here so that it can be picked up by Github's dependabot
-# to manage automated package upgrades.
-python-3.10.13
-```
-So to install python 3.10.13 through `pyenv` and set up a virtual environment:
-```bash
-pyenv install 3.10.13
-pyenv virtualenv 3.10.13 studio-py3.10
-pyenv activate studio-py3.10
-```
-Now you may install Studio's Python dependencies:
-```bash
-pip install -r requirements.txt -r requirements-dev.txt
-```
-To deactivate the virtual environment, when you're finished developing on Studio for the time being:
-```bash
-pyenv deactivate
-```
-
-### A note about `psycopg2`
-The packages `postgresql-16`, `postgresql-contrib`, and `postgresql-server-dev-all` are required to build `psycopg2` python driver.
-
-### A note about dependencies on Apple Silicon M1+
-If you run into an error with `pip install` related to the `grcpio` package, it is because it currently [does not support M1 with the version for `grcpio` Studio uses](https://github.com/grpc/grpc/issues/25082). In order to fix it, you will need to add the following environmental variables before running `pip install`:
-```bash
-export GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
-export GRPC_PYTHON_BUILD_SYSTEM_ZLIB=1
-export CFLAGS="-I/opt/homebrew/opt/openssl/include"
-export LDFLAGS="-L/opt/homebrew/opt/openssl/lib"
-```
-
-## Install frontend dependencies
-The project requires `Node 18.X` as the runtime and `pnpm` as the package manager. We make use of [`Volta`](https://docs.volta.sh/guide/getting-started) to manage the same automatically. Please make sure you have volta installed and your shell configured to use volta. You can then install all the dependencies by running:
-```bash
-corepack use pnpm # or `volta install pnpm`
-pnpm install
-```
-
-## Run the services
-
-Having installed all the necessary services, initialized your python virtual environment, and installed `pnpm`, you're now ready to start the services. Generally speaking, you'll want to open a separate terminal/terminal-tab to run the services. The following will ensure all services are started, in addition to starting the celery workers service:
-```bash
-pnpm run services
-```
-
-## Initializing Studio
-With the services running, in a separate terminal/terminal-tab, we can now initialize the database for Studio development purposes. The command below will initialize the database, in addition to adding a user account for development:
-```bash
-pnpm run devsetup
-```
-
-## Running the development server
-With the services running, in a separate terminal/terminal-tab, and the database initialized, we can start the dev server:
-```bash
-pnpm run devserver:hot # with Vue hot module reloading
-# or
-pnpm run devserver # without hot module reloading
-```
-
-Either of the above commands will take a few minutes to build the frontend. When it's done, you can sign in with the account created by the `pnpm run devsetup` command:
-- url: `http://localhost:8080/accounts/login/`
-- username: `a@a.com`
-- password: `a`
diff --git a/docs/local_dev_wsl.md b/docs/local_dev_wsl.md
index e01e4ad75b..6c12c50b2c 100644
--- a/docs/local_dev_wsl.md
+++ b/docs/local_dev_wsl.md
@@ -1,23 +1,9 @@
-# Kolibri Studio Local Setup Guide Using WSL
+# Supplemental instructions for WSL
-This guide will walk you through setting up Kolibri Studio for local development using Windows Subsystem for Linux (WSL). We will cover everything from installing WSL and a Linux distribution to configuring your environment and running the necessary commands.
+This guide is a supplement to Kolibri Studio's [local development instructions](./local_dev.md), specifically for using Windows Subsystem for Linux (WSL).
-## Table of Contents
-
- 1. [Install WSL and Ubuntu](#install-wsl-and-ubuntu)
- 2. [Update and Upgrade Packages](#update-and-upgrade-packages)
- 3. [Install Prerequisites](#install-prerequisites)
- 4. [Install System Dependencies and Services](#install-system-dependencies-and-services)
- 5. [Set Up the Database](#set-up-the-database)
- 6. [Configure .bashrc](#configure-bashrc)
- 7. [Set Up Python Virtual Environment](#set-up-python-virtual-environment)
- 8. [Install Frontend Dependencies](#install-frontend-dependencies)
- 9. [Run the Services](#run-the-services)
-10. [Initialize Studio](#initialize-studio)
-11. [Run the Development Server](#run-the-development-server)
-12. [Access Kolibri Studio](#access-kolibri-studio)
-
-## Install WSL and Ubuntu
+## Preparing WSL and Ubuntu
+Before setting up Studio, you will need to prepare your WSL environment. This includes installing WSL (if not already) and Ubuntu, and setting up your Linux username and password.
1. **Enable WSL**: Open PowerShell as Administrator and run the following command to enable WSL:
@@ -35,7 +21,7 @@ This guide will walk you through setting up Kolibri Studio for local development
wsl --set-default-version 2
```
-## Update and Upgrade Packages
+### Update and Upgrade Packages
Open your WSL terminal and update the package lists:
@@ -44,92 +30,7 @@ sudo apt update
sudo apt upgrade -y
```
-Fork the repo and clone it by running the following command:
-
-```sh
-git clone https://github.com/$USERNAME/studio.git
-```
-
-Replace `$USERNAME` with your GitHub username.
-
-## Install Prerequisites
-For detailed instructions on installing and configuring Volta, pyenv, and pyenv-virtualenv, please see the [Prerequisites](./local_dev_host.md#prerequisites) section in our Local Development with host guide.
-
-## Install System Dependencies and Services
-
-Studio requires some background services to be running:
-
-* Minio - a local S3 storage emulation
-* PostgreSQL - a relational database
-* Redis - a fast key/value store useful for caching
-
-Install the necessary system dependencies and services:
-
-```sh
-sudo apt-get update
-sudo apt-get install -y python-tk postgresql-server-dev-all postgresql-contrib postgresql-client postgresql-12 ffmpeg libmagickwand-dev redis-server wkhtmltopdf
-```
-
-### Install MinIO
-
-Download and install MinIO:
-
-```sh
-wget https://dl.minio.io/server/minio/release/linux-amd64/minio -O bin/minio
-sudo chmod +x ~/bin/minio
-```
-
-## Set Up the Database
-
-### Start PostgreSQL Service
-
-Make sure PostgreSQL is running:
-
-```sh
-sudo service postgresql start
-```
-
-### Create Database and User
-
-Start the client with:
-
-```sh
-sudo su postgres
-psql
-```
-
-Run the following SQL commands:
-
-```sql
-CREATE USER learningequality WITH NOSUPERUSER INHERIT NOCREATEROLE CREATEDB LOGIN NOREPLICATION NOBYPASSRLS PASSWORD 'kolibri';
-CREATE DATABASE "kolibri-studio" WITH TEMPLATE = template0 ENCODING = "UTF8" OWNER = "learningequality";
-```
-
-Exit the PostgreSQL client:
-
-```sh
-\q
-exit
-```
-
-## Set Up Python Virtual Environment
-For complete instructions on installing Python 3.10.13, creating and activating the virtual environment, and installing Studio’s Python dependencies, please refer to the [Build Your Python Virtual Environment](./local_dev_host.md#build-your-python-virtual-environment) section in our Local Development with host guide.
-
-### Note about psycopg2
-
-The packages `postgresql-12`, `postgresql-contrib`, and `postgresql-server-dev-all` are required to build the `psycopg2` Python driver.
-
-## Install Frontend Dependencies
-
-For guidance on installing Node 18.X, pnpm, and all required frontend dependencies, running the services, initializing Studio, and running the development server , please refer to the [Install Frontend Dependencies](./local_dev_host.md#install-frontend-dependencies) section in our Local Development with host guide.
-
-## Access Kolibri Studio
-
-Either of the above commands will take a few minutes to build the frontend. When it's done, you can sign in with the account created by the `pnpm run devsetup` command:
-
-* URL:
-* Username: `a@a.com`
-* Password: `a`
+You're now ready to fork the Kolibri Studio repository and set up your development environment using the [local development instructions](./local_dev.md).
## Contributing to the Codebase with Visual Studio Code and WSL
@@ -176,17 +77,17 @@ Now that you have your project open in VS Code, you can run the same commands yo
3. **Run the Services**:
```sh
- pnpm run services
+ make devrun-services
```
4. **Initialize the Studio**:
```sh
- pnpm run devsetup
+ make devrun-setup
```
5. **Start the Development Server**:
```sh
- pnpm run devserver:hot
+ make devrun-server
```
By following these steps, you can set up a productive development environment in VS Code with WSL and start contributing to the Kolibri Studio codebase.
diff --git a/package.json b/package.json
index 1de555a706..d13e294188 100644
--- a/package.json
+++ b/package.json
@@ -13,24 +13,12 @@
"combineprofiles": "node ./node_modules/kolibri-tools/lib/combineStringProfiles.js ./contentcuration/locale/en/LC_MESSAGES/profiles/",
"transfercontext": "kolibri-tools i18n-transfer-context --namespace studio --searchPath contentcuration/contentcuration/frontend; pnpm lint-all:fix",
"build": "webpack --env prod --config webpack.config.js",
- "postgres": "pg_ctl -D /usr/local/var/postgresql@9.6 start || true",
- "redis": "redis-server /usr/local/etc/redis.conf || true",
- "devsetup": "python contentcuration/manage.py setup --settings=contentcuration.dev_settings",
- "devsetup:clean": "python contentcuration/manage.py setup --clean-data-state --settings=contentcuration.dev_settings",
- "services": "npm-run-all -c --parallel --silent celery minio redis postgres",
"test": "jest --config jest_config/jest.conf.js",
"build:dev": "webpack serve --env dev --config webpack.config.js --progress",
"build:dev:hot": "pnpm run build:dev --hot --env hot",
"test-jest:dev": "pnpm run test-jest --watch",
"test-jest": "pnpm run test",
"test-jest:debug": "node --inspect node_modules/.bin/jest --runInBand --watch",
- "minio": "MINIO_API_CORS_ALLOW_ORIGIN='http://localhost:8080,http://127.0.0.1:8080' MINIO_ACCESS_KEY=development MINIO_SECRET_KEY=development minio server ~/.minio_data/ || true",
- "runserver": "cd contentcuration && python manage.py runserver --settings=contentcuration.dev_settings 0.0.0.0:8080",
- "devserver": "npm-run-all --parallel build:dev runserver",
- "devserver:hot": "npm-run-all --parallel build:dev:hot runserver",
- "devserver-hot": "pnpm run devserver:hot",
- "devshell": "cd contentcuration && python manage.py shell --settings=contentcuration.dev_settings",
- "celery": "(cd contentcuration && DJANGO_SETTINGS_MODULE=contentcuration.dev_settings celery -A contentcuration worker --without-mingle --without-gossip -c 1 -l info) || true",
"storybook": "start-storybook",
"storybook:debug": "start-storybook --debug-webpack",
"storybook:build": "build-storybook",
diff --git a/requirements-dev.in b/requirements-dev.in
index a21653e2dd..e22ba298fb 100644
--- a/requirements-dev.in
+++ b/requirements-dev.in
@@ -9,3 +9,4 @@ pre-commit==4.2.0
nodeenv
pip-tools==7.4.1
drf-yasg==1.21.10
+tqdm
diff --git a/requirements-dev.txt b/requirements-dev.txt
index ad0b5b5815..ef1d0adfdd 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -109,6 +109,8 @@ tomli==1.2.3
# build
# pip-tools
# pytest
+tqdm==4.67.1
+ # via -r requirements-dev.in
uritemplate==3.0.1
# via drf-yasg
virtualenv==20.26.6
diff --git a/webpack.config.js b/webpack.config.js
index d07dfcf483..685f62800d 100644
--- a/webpack.config.js
+++ b/webpack.config.js
@@ -16,7 +16,12 @@ const WebpackRTLPlugin = require('kolibri-tools/lib/webpackRtlPlugin');
const { InjectManifest } = require('workbox-webpack-plugin');
-// Function to detect if running in WSL
+const DEFAULT_WEBPACK_DEV_HOST = '127.0.0.1';
+
+/**
+ * Function to detect if running in WSL
+ * @return {boolean}
+ */
function isWSL() {
try {
const version = fs.readFileSync('/proc/version', 'utf8');
@@ -26,14 +31,24 @@ function isWSL() {
}
}
-// Function to get WSL IP address
-function getWSLIP() {
+/**
+ * Get the host for the webpack dev server.
+ * @return {string}
+ */
+function getWebpackDevHost() {
+ if (process.env.WEBPACK_DEV_HOST) {
+ return process.env.WEBPACK_DEV_HOST;
+ }
+
+ if (!isWSL()) {
+ return DEFAULT_WEBPACK_DEV_HOST;
+ }
+
try {
- const ip = execSync('hostname -I').toString().trim().split(' ')[0];
- return ip;
+ return execSync('hostname -I').toString().trim().split(' ')[0];
} catch (err) {
console.warn('Failed to get WSL IP address:', err);
- return '127.0.0.1';
+ return DEFAULT_WEBPACK_DEV_HOST;
}
}
@@ -60,11 +75,8 @@ module.exports = (env = {}) => {
const pnpmNodeModules = path.join(rootDir, 'node_modules', '.pnpm', 'node_modules');
// Determine the appropriate dev server host and public path based on environment
- const isWSLEnvironment = isWSL();
- const devServerHost = isWSLEnvironment ? '0.0.0.0' : '127.0.0.1';
- const devPublicPath = isWSLEnvironment ?
- `http://${getWSLIP()}:4000/dist/` :
- 'http://127.0.0.1:4000/dist/';
+ const devServerHost = getWebpackDevHost();
+ const devPublicPath = `http://${devServerHost}:4000/dist/`;
const workboxPlugin = new InjectManifest({
swSrc: path.resolve(srcDir, 'serviceWorker/index.js'),