Skip to content

Commit 70b73c4

Browse files
authored
backpopulate edx files by learning_resource_id (#2174)
1 parent fab0c32 commit 70b73c4

File tree

6 files changed

+153
-29
lines changed

6 files changed

+153
-29
lines changed

learning_resources/management/commands/backpopulate_mit_edx_files.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,25 @@ def add_arguments(self, parser):
2828
help="Overwrite any existing records",
2929
)
3030

31+
parser.add_argument(
32+
"--resource-ids",
33+
dest="learning_resource_ids",
34+
required=False,
35+
help="If set, backpopulate only the learning resources with these ids",
36+
)
37+
3138
def handle(self, *args, **options): # noqa: ARG002
3239
"""Run Populate MIT edX course run files"""
3340
chunk_size = options["chunk_size"]
41+
resource_ids = (
42+
options["learning_resource_ids"].split(",")
43+
if options["learning_resource_ids"]
44+
else None
45+
)
3446
task = import_all_mit_edx_files.delay(
35-
chunk_size=chunk_size, overwrite=options["force_overwrite"]
47+
chunk_size=chunk_size,
48+
overwrite=options["force_overwrite"],
49+
learning_resource_ids=resource_ids,
3650
)
3751
self.stdout.write(f"Started task {task} to get MIT edX course run file data")
3852
self.stdout.write("Waiting on task...")

learning_resources/management/commands/backpopulate_mitxonline_files.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,25 @@ def add_arguments(self, parser):
2828
help="Overwrite any existing records",
2929
)
3030

31+
parser.add_argument(
32+
"--resource-ids",
33+
dest="learning_resource_ids",
34+
required=False,
35+
help="If set, backpopulate only the learning resources with these ids",
36+
)
37+
3138
def handle(self, *args, **options): # noqa: ARG002
3239
"""Run Populate MITX Online course run files"""
3340
chunk_size = options["chunk_size"]
41+
resource_ids = (
42+
options["learning_resource_ids"].split(",")
43+
if options["learning_resource_ids"]
44+
else None
45+
)
3446
task = import_all_mitxonline_files.delay(
35-
chunk_size=chunk_size, overwrite=options["force_overwrite"]
47+
chunk_size=chunk_size,
48+
overwrite=options["force_overwrite"],
49+
learning_resource_ids=resource_ids,
3650
)
3751
self.stdout.write(
3852
f"Started task {task} to get MITX Online course run file data"

learning_resources/management/commands/backpopulate_oll_files.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,28 @@ def add_arguments(self, parser):
2828
help="Overwrite any existing records",
2929
)
3030

31+
parser.add_argument(
32+
"--resource-ids",
33+
dest="learning_resource_ids",
34+
required=False,
35+
help="If set, backpopulate only the learning resources with these ids",
36+
)
37+
3138
def handle(self, *args, **options): # noqa: ARG002
3239
"""Run Populate OLL course run files"""
3340
if not settings.OLL_LEARNING_COURSE_BUCKET_NAME:
3441
self.stderr.write("OLL contentfile settings not configured, skipping")
3542
return
3643
chunk_size = options["chunk_size"]
44+
resource_ids = (
45+
options["learning_resource_ids"].split(",")
46+
if options["learning_resource_ids"]
47+
else None
48+
)
3749
task = import_all_oll_files.delay(
38-
chunk_size=chunk_size, overwrite=options["force_overwrite"]
50+
chunk_size=chunk_size,
51+
overwrite=options["force_overwrite"],
52+
learning_resource_ids=resource_ids,
3953
)
4054
self.stdout.write(f"Started task {task} to get OLL course run file data")
4155
self.stdout.write("Waiting on task...")

learning_resources/management/commands/backpopulate_xpro_files.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,25 @@ def add_arguments(self, parser):
2727
action="store_true",
2828
help="Overwrite any existing records",
2929
)
30+
parser.add_argument(
31+
"--resource-ids",
32+
dest="learning_resource_ids",
33+
required=False,
34+
help="If set, backpopulate only the learning resources with these ids",
35+
)
3036

3137
def handle(self, *args, **options): # noqa: ARG002
3238
"""Run Populate xpro course run files"""
3339
chunk_size = options["chunk_size"]
40+
resource_ids = (
41+
options["learning_resource_ids"].split(",")
42+
if options["learning_resource_ids"]
43+
else None
44+
)
3445
task = import_all_xpro_files.delay(
35-
chunk_size=chunk_size, overwrite=options["force_overwrite"]
46+
chunk_size=chunk_size,
47+
overwrite=options["force_overwrite"],
48+
learning_resource_ids=resource_ids,
3649
)
3750
self.stdout.write(f"Started task {task} to get xpro course run file data")
3851
self.stdout.write("Waiting on task...")

learning_resources/tasks.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,13 +141,15 @@ def get_content_files(
141141
clear_search_cache()
142142

143143

144-
def get_content_tasks(
144+
def get_content_tasks( # noqa: PLR0913
145145
etl_source: str,
146146
*,
147147
chunk_size: int | None = None,
148148
s3_prefix: str | None = None,
149149
override_base_prefix: bool = False,
150150
overwrite: bool = False,
151+
learning_resource_ids: list[int] | None = None,
152+
# Updated parameter
151153
) -> celery.group:
152154
"""
153155
Return a list of grouped celery tasks for indexing edx content
@@ -159,39 +161,54 @@ def get_content_tasks(
159161
archive_keys = get_most_recent_course_archives(
160162
etl_source, s3_prefix=s3_prefix, override_base_prefix=override_base_prefix
161163
)
164+
165+
if learning_resource_ids:
166+
learning_resources = LearningResource.objects.filter(
167+
id__in=learning_resource_ids, etl_source=etl_source
168+
).values_list("id", flat=True)
169+
else:
170+
learning_resources = (
171+
LearningResource.objects.filter(
172+
published=True, course__isnull=False, etl_source=etl_source
173+
)
174+
.exclude(readable_id__in=blocklisted_ids)
175+
.order_by("-id")
176+
.values_list("id", flat=True)
177+
)
178+
162179
return celery.group(
163180
[
164181
get_content_files.si(
165182
ids, etl_source, archive_keys, s3_prefix=s3_prefix, overwrite=overwrite
166183
)
167184
for ids in chunks(
168-
LearningResource.objects.filter(
169-
published=True, course__isnull=False, etl_source=etl_source
170-
)
171-
.exclude(readable_id__in=blocklisted_ids)
172-
.order_by("-id")
173-
.values_list("id", flat=True),
185+
learning_resources,
174186
chunk_size=chunk_size,
175187
)
176188
]
177189
)
178190

179191

180192
@app.task(bind=True)
181-
def import_all_mit_edx_files(self, *, chunk_size=None, overwrite=False):
193+
def import_all_mit_edx_files(
194+
self, *, chunk_size=None, overwrite=False, learning_resource_ids=None
195+
):
182196
"""Ingest MIT edX files from an S3 bucket"""
183197
return self.replace(
184198
get_content_tasks(
185199
ETLSource.mit_edx.name,
186200
chunk_size=chunk_size,
187201
s3_prefix=settings.EDX_LEARNING_COURSE_BUCKET_PREFIX,
188202
overwrite=overwrite,
203+
learning_resource_ids=learning_resource_ids,
189204
)
190205
)
191206

192207

193208
@app.task(bind=True)
194-
def import_all_oll_files(self, *, chunk_size=None, overwrite=False):
209+
def import_all_oll_files(
210+
self, *, chunk_size=None, overwrite=False, learning_resource_ids=None
211+
):
195212
"""Ingest MIT edX files from an S3 bucket"""
196213
return self.replace(
197214
get_content_tasks(
@@ -200,27 +217,39 @@ def import_all_oll_files(self, *, chunk_size=None, overwrite=False):
200217
s3_prefix=settings.OLL_LEARNING_COURSE_BUCKET_PREFIX,
201218
override_base_prefix=True,
202219
overwrite=overwrite,
220+
learning_resource_ids=learning_resource_ids,
203221
)
204222
)
205223

206224

207225
@app.task(bind=True)
208-
def import_all_mitxonline_files(self, *, chunk_size=None, overwrite=False):
226+
def import_all_mitxonline_files(
227+
self, *, chunk_size=None, overwrite=False, learning_resource_ids=None
228+
):
209229
"""Ingest MITx Online files from an S3 bucket"""
230+
210231
return self.replace(
211232
get_content_tasks(
212-
ETLSource.mitxonline.name, chunk_size=chunk_size, overwrite=overwrite
233+
ETLSource.mitxonline.name,
234+
chunk_size=chunk_size,
235+
overwrite=overwrite,
236+
learning_resource_ids=learning_resource_ids,
213237
)
214238
)
215239

216240

217241
@app.task(bind=True)
218-
def import_all_xpro_files(self, *, chunk_size=None, overwrite=False):
242+
def import_all_xpro_files(
243+
self, *, chunk_size=None, overwrite=False, learning_resource_ids=None
244+
):
219245
"""Ingest xPRO OLX files from an S3 bucket"""
220246

221247
return self.replace(
222248
get_content_tasks(
223-
ETLSource.xpro.name, chunk_size=chunk_size, overwrite=overwrite
249+
ETLSource.xpro.name,
250+
chunk_size=chunk_size,
251+
overwrite=overwrite,
252+
learning_resource_ids=learning_resource_ids,
224253
)
225254
)
226255

learning_resources/tasks_test.py

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,15 @@ def test_import_all_mit_edx_files(settings, mocker, mocked_celery, mock_blocklis
145145
"learning_resources.tasks.get_content_tasks", autospec=True
146146
)
147147
with pytest.raises(mocked_celery.replace_exception_class):
148-
tasks.import_all_mit_edx_files.delay(chunk_size=4, overwrite=False)
148+
tasks.import_all_mit_edx_files.delay(
149+
chunk_size=4, overwrite=False, learning_resource_ids=[1]
150+
)
149151
get_content_tasks_mock.assert_called_once_with(
150152
ETLSource.mit_edx.name,
151153
chunk_size=4,
152154
s3_prefix="simeon-mitx-course-tarballs",
153155
overwrite=False,
156+
learning_resource_ids=[1],
154157
)
155158

156159

@@ -163,9 +166,14 @@ def test_import_all_mitxonline_files(settings, mocker, mocked_celery, mock_block
163166
)
164167

165168
with pytest.raises(mocked_celery.replace_exception_class):
166-
tasks.import_all_mitxonline_files.delay(chunk_size=3, overwrite=True)
169+
tasks.import_all_mitxonline_files.delay(
170+
chunk_size=3, overwrite=True, learning_resource_ids=None
171+
)
167172
get_content_tasks_mock.assert_called_once_with(
168-
PlatformType.mitxonline.name, chunk_size=3, overwrite=True
173+
PlatformType.mitxonline.name,
174+
chunk_size=3,
175+
overwrite=True,
176+
learning_resource_ids=None,
169177
)
170178

171179

@@ -177,9 +185,9 @@ def test_import_all_xpro_files(settings, mocker, mocked_celery, mock_blocklist):
177185
"learning_resources.tasks.get_content_tasks", autospec=True
178186
)
179187
with pytest.raises(mocked_celery.replace_exception_class):
180-
tasks.import_all_xpro_files.delay(chunk_size=3)
188+
tasks.import_all_xpro_files.delay(chunk_size=3, learning_resource_ids=[1])
181189
get_content_tasks_mock.assert_called_once_with(
182-
PlatformType.xpro.name, chunk_size=3, overwrite=False
190+
PlatformType.xpro.name, chunk_size=3, overwrite=False, learning_resource_ids=[1]
183191
)
184192

185193

@@ -198,11 +206,19 @@ def test_import_all_oll_files(settings, mocker, mocked_celery, mock_blocklist):
198206
s3_prefix="open-learning-library/courses",
199207
override_base_prefix=True,
200208
overwrite=False,
209+
learning_resource_ids=None,
201210
)
202211

203212

204213
@mock_aws
205-
def test_get_content_tasks(settings, mocker, mocked_celery, mock_xpro_learning_bucket):
214+
@pytest.mark.parametrize("with_learning_resource_ids", [True, False])
215+
def test_get_content_tasks(
216+
settings,
217+
mocker,
218+
mocked_celery,
219+
mock_xpro_learning_bucket,
220+
with_learning_resource_ids,
221+
):
206222
"""Test that get_content_tasks calls get_content_files with the correct args"""
207223
mock_get_content_files = mocker.patch(
208224
"learning_resources.tasks.get_content_files.si"
@@ -216,9 +232,23 @@ def test_get_content_tasks(settings, mocker, mocked_celery, mock_xpro_learning_b
216232
settings.LEARNING_COURSE_ITERATOR_CHUNK_SIZE = 2
217233
etl_source = ETLSource.xpro.name
218234
platform = PlatformType.xpro.name
219-
factories.CourseFactory.create_batch(3, etl_source=etl_source, platform=platform)
235+
courses = factories.CourseFactory.create_batch(
236+
3, etl_source=etl_source, platform=platform
237+
)
238+
if with_learning_resource_ids:
239+
learning_resource_ids = [
240+
courses[0].learning_resource_id,
241+
courses[1].learning_resource_id,
242+
]
243+
else:
244+
learning_resource_ids = None
220245
s3_prefix = "course-prefix"
221-
tasks.get_content_tasks(etl_source, s3_prefix=s3_prefix, overwrite=True)
246+
tasks.get_content_tasks(
247+
etl_source,
248+
s3_prefix=s3_prefix,
249+
overwrite=True,
250+
learning_resource_ids=learning_resource_ids,
251+
)
222252
assert mocked_celery.group.call_count == 1
223253
assert (
224254
models.LearningResource.objects.filter(
@@ -230,10 +260,20 @@ def test_get_content_tasks(settings, mocker, mocked_celery, mock_xpro_learning_b
230260
.order_by("id")
231261
.values_list("id", flat=True)
232262
).count() == 3
233-
assert mock_get_content_files.call_count == 2
234-
mock_get_content_files.assert_any_call(
235-
ANY, etl_source, ["foo.tar.gz"], s3_prefix=s3_prefix, overwrite=True
236-
)
263+
if with_learning_resource_ids:
264+
assert mock_get_content_files.call_count == 1
265+
mock_get_content_files.assert_any_call(
266+
[learning_resource_ids[0], learning_resource_ids[1]],
267+
etl_source,
268+
["foo.tar.gz"],
269+
s3_prefix=s3_prefix,
270+
overwrite=True,
271+
)
272+
else:
273+
assert mock_get_content_files.call_count == 2
274+
mock_get_content_files.assert_any_call(
275+
ANY, etl_source, ["foo.tar.gz"], s3_prefix=s3_prefix, overwrite=True
276+
)
237277

238278

239279
def test_get_content_files(mocker, mock_xpro_learning_bucket):

0 commit comments

Comments
 (0)