Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions learning_resources/etl/loaders_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,35 @@ def test_load_course( # noqa: PLR0913, PLR0912, PLR0915
assert getattr(result, key) == value, f"Property {key} should equal {value}"


def test_load_course_content_tags(mock_upsert_tasks):
"""Test that content_tags in course data are persisted as resource_tags"""
platform = LearningResourcePlatformFactory.create(code=PlatformType.mitxonline.name)
now = now_in_utc()
props = {
"readable_id": "program-v1:MITx+test",
"platform": platform.code,
"etl_source": ETLSource.mitxonline.name,
"resource_type": LearningResourceType.course.name,
"title": "Test Program as Course",
"image": {"url": "https://www.test.edu/image.jpg"},
"description": "description",
"url": "https://test.edu",
"published": True,
"content_tags": ["Program as Course"],
"runs": [
{
"run_id": "program-v1:MITx+test",
"start_date": now,
"end_date": now + timedelta(30),
}
],
}
result = load_course(props, [], [], config=CourseLoaderConfig(prune=True))
assert list(result.resource_tags.values_list("name", flat=True)) == [
"Program as Course"
]


def test_load_course_bad_platform(mocker):
"""A bad platform should log an exception and not create the course"""
mock_log = mocker.patch("learning_resources.etl.loaders.log.exception")
Expand Down
128 changes: 127 additions & 1 deletion learning_resources/etl/mitxonline.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
transform_price,
transform_topics,
)
from main import features
from main.utils import clean_data, now_in_utc

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -354,6 +355,113 @@ def _transform_course(course):
}


def transform_program_as_course(program: dict) -> dict:
"""
Transform a MITx Online program into a course-shaped dict.

Used for programs with display_mode="course" that should be ingested
as LearningResource objects with resource_type=course.

Args:
program (dict): program data from MITx Online API

Returns:
dict: normalized course data
"""
courses = transform_courses(
[
course
for course in _fetch_courses_by_ids(program.get("courses", []))
if not re.search(EXCLUDE_REGEX, course["title"], re.IGNORECASE)
]
)
pace = sorted({course_pace for course in courses for course_pace in course["pace"]})
run = {
"run_id": program["readable_id"],
"enrollment_start": _parse_datetime(program.get("enrollment_start")),
"enrollment_end": _parse_datetime(program.get("enrollment_end")),
"start_date": _parse_datetime(
program.get("start_date") or program.get("enrollment_start")
),
"end_date": _parse_datetime(program.get("end_date")),
"title": program["title"],
"published": bool(parse_page_attribute(program, "page_url")),
"url": parse_page_attribute(program, "page_url", is_url=True),
"image": _transform_image(program),
"description": clean_data(parse_page_attribute(program, "description")),
"prices": parse_prices(
program,
program.get("enrollment_modes", []),
fully_enrollable=True,
),
"status": RunStatus.current.value
if parse_page_attribute(program, "page_url")
else RunStatus.archived.value,
"enrollment_modes": program.get("enrollment_modes", []),
"availability": program.get("availability"),
"format": [Format.asynchronous.name],
"pace": pace,
"duration": program.get("duration") or "",
"min_weeks": program.get("min_weeks"),
"max_weeks": program.get("max_weeks"),
"time_commitment": program.get("time_commitment") or "",
"min_weekly_hours": parse_string_to_int(program.get("min_weekly_hours")),
"max_weekly_hours": parse_string_to_int(program.get("max_weekly_hours")),
}
runs = [run]
has_certification = parse_certification(OFFERED_BY["code"], runs)
strip_enrollment_modes(runs)
return {
"readable_id": program["readable_id"],
"platform": PlatformType.mitxonline.name,
"etl_source": ETLSource.mitxonline.name,
"resource_type": LearningResourceType.course.name,
"title": program["title"],
"offered_by": OFFERED_BY,
"topics": transform_topics(program.get("topics", []), OFFERED_BY["code"]),
"departments": parse_departments(program.get("departments", [])),
"runs": runs,
"force_ingest": False,
"content_tags": ["Program as Course"],
"course": {
"course_numbers": generate_course_numbers_json(
program["readable_id"], is_ocw=False
),
},
"published": bool(
parse_page_attribute(program, "page_url")
and parse_page_attribute(program, "live")
),
"professional": False,
"certification": has_certification,
"certification_type": parse_certificate_type(
program.get("certificate_type", CertificationType.none.name)
)
if has_certification
else CertificationType.none.name,
"image": _transform_image(program),
"url": parse_page_attribute(program, "page_url", is_url=True),
"description": clean_data(parse_page_attribute(program, "description")),
"availability": program.get("availability"),
"format": [Format.asynchronous.name],
"pace": pace,
}


def transform_programs_as_courses(programs: list[dict]) -> list[dict]:
"""
Transform a list of MITx Online programs into course-shaped dicts.

Args:
programs (list of dict): programs data (already filtered to those
that should be ingested as courses)

Returns:
list of dict: normalized course data
"""
return [transform_program_as_course(program) for program in programs]


def transform_courses(courses):
"""
Transforms a list of courses into our normalized data structure
Expand Down Expand Up @@ -469,9 +577,27 @@ def transform_programs(programs: list[dict]) -> list[dict]:
"published": bool(
parse_page_attribute(program, "page_url")
and parse_page_attribute(program, "live")
), # a program is only considered published if it has a page url
),
"format": [Format.asynchronous.name],
"pace": pace,
"runs": [run],
"courses": courses,
}


def is_program_course(program: dict) -> bool:
"""
Determine if a MITx Online program should be ingested as a course.

Requires the "program-to-course" feature flag to be enabled. When enabled,
programs with display_mode="course" are ingested as courses.

Args:
program (dict): program data from MITx Online API

Returns:
bool: True if the program should be ingested as a course, False otherwise
"""
return program.get("display_mode") == "course" and features.is_enabled(
"program-to-course", default=False
)
Loading
Loading