-
Notifications
You must be signed in to change notification settings - Fork 2
test(tes,wes): add comprehensive unit tests #17
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
de59af2
879a84a
ddaa0de
75ee3a5
29f774d
1300eb3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,52 +1,43 @@ | ||
| from pydantic import ValidationError | ||
| from .abstract_converter import AbstractConverter | ||
| from .utils import convert_to_iso8601 | ||
| from ..models import TESData | ||
| from ..validators import validate_wrroc_tes | ||
|
|
||
| class TESConverter(AbstractConverter): | ||
|
|
||
| def convert_to_wrroc(self, tes_data): | ||
| # Validate and extract data with defaults | ||
| id = tes_data.get("id", "") | ||
| name = tes_data.get("name", "") | ||
| description = tes_data.get("description", "") | ||
| executors = tes_data.get("executors", [{}]) | ||
| inputs = tes_data.get("inputs", []) | ||
| outputs = tes_data.get("outputs", []) | ||
| creation_time = tes_data.get("creation_time", "") | ||
| end_time = tes_data.get("logs", [{}])[0].get("end_time", "") # Corrected to fetch from logs | ||
| def convert_to_wrroc(self, tes_data: dict) -> dict: | ||
| try: | ||
| validated_tes_data = TESData(**tes_data) | ||
| except ValidationError as e: | ||
| raise ValueError(f"Invalid TES data: {e}") | ||
|
|
||
| # Convert to WRROC | ||
| wrroc_data = { | ||
| "@id": id, | ||
| "name": name, | ||
| "description": description, | ||
| "instrument": executors[0].get("image", None) if executors else None, | ||
| "object": [{"@id": input.get("url", ""), "name": input.get("path", "")} for input in inputs], | ||
| "result": [{"@id": output.get("url", ""), "name": output.get("path", "")} for output in outputs], | ||
| "startTime": convert_to_iso8601(creation_time), | ||
| "endTime": convert_to_iso8601(end_time), | ||
| "@id": validated_tes_data.id, | ||
| "name": validated_tes_data.name, | ||
| "description": validated_tes_data.description, | ||
| "instrument": validated_tes_data.executors[0].image if validated_tes_data.executors else None, | ||
| "object": [{"@id": input.url, "name": input.path} for input in validated_tes_data.inputs], | ||
| "result": [{"@id": output.url, "name": output.path} for output in validated_tes_data.outputs], | ||
| "startTime": convert_to_iso8601(validated_tes_data.creation_time), | ||
| "endTime": convert_to_iso8601(validated_tes_data.logs[0].end_time) if validated_tes_data.logs else None, | ||
| } | ||
| return wrroc_data | ||
|
|
||
| def convert_from_wrroc(self, wrroc_data): | ||
| # Validate and extract data with defaults | ||
| id = wrroc_data.get("@id", "") | ||
| name = wrroc_data.get("name", "") | ||
| description = wrroc_data.get("description", "") | ||
| instrument = wrroc_data.get("instrument", "") | ||
| object_data = wrroc_data.get("object", []) | ||
| result_data = wrroc_data.get("result", []) | ||
| start_time = wrroc_data.get("startTime", "") | ||
| end_time = wrroc_data.get("endTime", "") | ||
| def convert_from_wrroc(self, data: dict) -> dict: | ||
| try: | ||
| data_validated = validate_wrroc_tes(data) | ||
| except ValidationError as e: | ||
| raise ValueError(f"Invalid WRROC data: {e}") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In such cases, it is good practice to chain exceptions, i.e., instead of except Exception as exc:
raise Exception(f"My own text: {exc}")do except Exception as exc:
raise Exception("My own text") from excPlease address this in a future PR for all these cases (but not in this PR).
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will address exception chaining in all relevant cases in a future PR to follow best practices as you suggested.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay. Please create an issue for that and then close this conversation. |
||
|
|
||
| # Convert from WRROC to TES | ||
| tes_data = { | ||
| "id": id, | ||
| "name": name, | ||
| "description": description, | ||
| "executors": [{"image": instrument}], | ||
| "inputs": [{"url": obj.get("@id", ""), "path": obj.get("name", "")} for obj in object_data], | ||
| "outputs": [{"url": res.get("@id", ""), "path": res.get("name", "")} for res in result_data], | ||
| "creation_time": start_time, | ||
| "logs": [{"end_time": end_time}], # Added to logs | ||
| "id": data_validated.id, | ||
| "name": data_validated.name, | ||
| "description": data_validated.description, | ||
| "executors": [{"image": data_validated.instrument}], | ||
| "inputs": [{"url": obj.id, "path": obj.name} for obj in data_validated.object], | ||
| "outputs": [{"url": res.id, "path": res.name} for res in data_validated.result], | ||
| "creation_time": data_validated.startTime, | ||
| "logs": [{"end_time": data_validated.endTime}], | ||
| } | ||
| return tes_data | ||
| return tes_data | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,46 +1,43 @@ | ||
| from pydantic import ValidationError | ||
| from .abstract_converter import AbstractConverter | ||
| from ..models import WESData | ||
| from .utils import convert_to_iso8601 | ||
| from ..validators import validate_wrroc_wes | ||
|
|
||
| class WESConverter(AbstractConverter): | ||
|
|
||
| def convert_to_wrroc(self, wes_data): | ||
| # Validate and extract data with defaults | ||
| run_id = wes_data.get("run_id", "") | ||
| name = wes_data.get("run_log", {}).get("name", "") | ||
| state = wes_data.get("state", "") | ||
| start_time = wes_data.get("run_log", {}).get("start_time", "") | ||
| end_time = wes_data.get("run_log", {}).get("end_time", "") | ||
| outputs = wes_data.get("outputs", {}) | ||
| def convert_to_wrroc(self, wes_data: dict) -> dict: | ||
| try: | ||
| wes_model = WESData(**wes_data) | ||
| except ValidationError as e: | ||
| raise ValueError(f"Invalid WES data: {e}") | ||
| outputs = wes_model.outputs | ||
|
|
||
| # Convert to WRROC | ||
| wrroc_data = { | ||
| "@id": run_id, | ||
| "name": name, | ||
| "status": state, | ||
| "startTime": convert_to_iso8601(start_time), | ||
| "endTime": convert_to_iso8601(end_time), | ||
| "result": [{"@id": output.get("location", ""), "name": output.get("name", "")} for output in outputs], | ||
| "@id": wes_model.run_id, | ||
| "name": wes_model.run_log.name, | ||
| "status": wes_model.state, | ||
| "startTime": convert_to_iso8601(wes_model.run_log.start_time), | ||
| "endTime": convert_to_iso8601(wes_model.run_log.end_time), | ||
| "result": [{"@id": output.location, "name": output.name} for output in outputs], | ||
| } | ||
| return wrroc_data | ||
|
|
||
| def convert_from_wrroc(self, wrroc_data): | ||
| # Validate and extract data with defaults | ||
| run_id = wrroc_data.get("@id", "") | ||
| name = wrroc_data.get("name", "") | ||
| start_time = wrroc_data.get("startTime", "") | ||
| end_time = wrroc_data.get("endTime", "") | ||
| state = wrroc_data.get("status", "") | ||
| result_data = wrroc_data.get("result", []) | ||
|
|
||
| # Convert from WRROC to WES | ||
|
|
||
| def convert_from_wrroc(self, data: dict) -> dict: | ||
| try: | ||
| data_validated = validate_wrroc_wes(data) | ||
| except ValidationError as e: | ||
| raise ValueError(f"Invalid WRROC data: {e}") | ||
|
|
||
| wes_data = { | ||
| "run_id": run_id, | ||
| "run_id": data_validated.id, | ||
| "run_log": { | ||
| "name": name, | ||
| "start_time": start_time, | ||
| "end_time": end_time, | ||
| "name": data_validated.name, | ||
| "start_time": data_validated.startTime, | ||
| "end_time": data_validated.endTime, | ||
| }, | ||
| "state": state, | ||
| "outputs": [{"location": res.get("@id", ""), "name": res.get("name", "")} for res in result_data], | ||
| "state": data_validated.status, | ||
| "outputs": [{"location": res.id, "name": res.name} for res in data_validated.result], | ||
| } | ||
| return wes_data | ||
| return wes_data |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When running the tool with WRROC data as input, you probably want to first validate that the input is indeed a valid WRROC entity. For that, you could use a model In a next step, you then want to validate that the WRROC data has all the fields required for the requested conversion. This code is specific to the converter, i.e., you may need different data for the conversion to TES than you need for the conversion to WES. For that you could define models
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will implement an external WRROC validator function to ensure WRROC data validity and avoid code repetition. Additionally, I will define separate models for WRROCDataTES and WRROCDataWES for specific validation in the respective converters.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please double check your
I think the models and the corresponding validators (including unit tests etc.) should be a single PR. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,105 @@ | ||
| from pydantic import BaseModel | ||
| from typing import Optional | ||
|
|
||
| class Executor(BaseModel): | ||
| image: str | ||
| command: list[str] | ||
|
|
||
| class TESInputs(BaseModel): | ||
| url: str | ||
| path: str | ||
|
|
||
| class TESOutputs(BaseModel): | ||
| url: str | ||
| path: str | ||
|
|
||
| class TESLogs(BaseModel): | ||
| end_time: Optional[str] = None | ||
|
|
||
| class TESData(BaseModel): | ||
| id: str | ||
| name: str | ||
| description: Optional[str] = "" | ||
| executors: list[Executor] | ||
| inputs: list[TESInputs] | ||
| outputs: list[TESOutputs] | ||
| creation_time: str | ||
| logs: list[TESLogs] | ||
|
|
||
| class Config: | ||
| extra = "forbid" | ||
|
|
||
| class WESRunLog(BaseModel): | ||
| name: Optional[str] = None | ||
| start_time: Optional[str] = None | ||
| end_time: Optional[str] = None | ||
| cmd: Optional[list[str]] = None | ||
| stdout: Optional[str] = None | ||
| stderr: Optional[str] = None | ||
| exit_code: Optional[int] = None | ||
|
|
||
| class WESOutputs(BaseModel): | ||
| location: str | ||
| name: str | ||
|
|
||
| class WESRequest(BaseModel): | ||
| workflow_params: dict[str, str] | ||
| workflow_type: str | ||
| workflow_type_version: str | ||
| tags: Optional[dict[str, str]] = None | ||
|
|
||
| class WESData(BaseModel): | ||
| run_id: str | ||
| request: WESRequest | ||
| state: str | ||
| run_log: WESRunLog | ||
| task_logs: Optional[list[WESRunLog]] = None | ||
| outputs: list[WESOutputs] | ||
|
|
||
| class Config: | ||
| extra = "forbid" | ||
|
|
||
| class WRROCInputs(BaseModel): | ||
| id: str | ||
| name: str | ||
|
|
||
| class WRROCOutputs(BaseModel): | ||
| id: str | ||
| name: str | ||
|
|
||
| class WRROCData(BaseModel): | ||
| id: str | ||
| name: str | ||
| description: Optional[str] = "" | ||
| instrument: Optional[str] = None | ||
| object: list[WRROCInputs] | ||
| result: list[WRROCOutputs] | ||
| startTime: Optional[str] = None | ||
| endTime: Optional[str] = None | ||
|
|
||
| class Config: | ||
| extra = "forbid" | ||
|
|
||
| class WRROCDataTES(BaseModel): | ||
| id: str | ||
| name: str | ||
| description: Optional[str] = "" | ||
| instrument: Optional[str] = None | ||
| object: list[WRROCInputs] | ||
| result: list[WRROCOutputs] | ||
| startTime: Optional[str] = None | ||
| endTime: Optional[str] = None | ||
|
|
||
| class Config: | ||
| extra = "forbid" | ||
|
|
||
| class WRROCDataWES(BaseModel): | ||
| id: str | ||
| name: str | ||
| status: str | ||
| result: list[WRROCOutputs] | ||
| startTime: Optional[str] = None | ||
| endTime: Optional[str] = None | ||
|
|
||
| class Config: | ||
| extra = "forbid" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| from pydantic import ValidationError | ||
| from .models import WRROCData, WRROCDataTES, WRROCDataWES | ||
|
|
||
| def validate_wrroc(data: dict) -> WRROCData: | ||
| """ | ||
| Validate that the input data is a valid WRROC entity. | ||
|
|
||
| Args: | ||
| data (dict): The input data to validate. | ||
|
|
||
| Returns: | ||
| WRROCData: The validated WRROC data. | ||
|
|
||
| Raises: | ||
| ValueError: If the data is not valid WRROC data. | ||
| """ | ||
| try: | ||
| return WRROCData(**data) | ||
| except ValidationError as e: | ||
| raise ValueError(f"Invalid WRROC data: {e}") | ||
|
Comment on lines
+4
to
+20
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't currently seem to be used. If you want (or need) to include code to filter out additional properties not needed by WES/TES, you also need to put that code somewhere. Two options:
If you do need to filter out additional properties, please try to abstract and write a function for that where you somehow pass the properties that you want to keep.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An additional note about the additional properties: I really don't think we need to filter those out - they don't hurt us. We should only validate what we need to validate, and that is that everything we need is available and of the correct/expected type.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please work on the models first (see comments below and above), then rewrite your validators to account for what is written in those comments (especially the one below). But as I said, please do so in a clean, fresh PR that only changes/adds the 3 mentioned files. Make sure the code adheres to all good standards (docstrings, typing, error chaining etc), but only for the new code - the old code will be changed later. |
||
|
|
||
| def validate_wrroc_tes(data: dict) -> WRROCDataTES: | ||
| """ | ||
| Validate that the input data is a valid WRROC entity for TES. | ||
|
|
||
| Args: | ||
| data (dict): The input data to validate. | ||
|
|
||
| Returns: | ||
| WRROCDataTES: The validated WRROC data for TES. | ||
|
|
||
| Raises: | ||
| ValueError: If the data is not valid WRROC data for TES. | ||
| """ | ||
| try: | ||
| return WRROCDataTES(**data) | ||
| except ValidationError as e: | ||
| raise ValueError(f"Invalid WRROC data: {e}") | ||
|
|
||
| def validate_wrroc_wes(data: dict) -> WRROCDataWES: | ||
| """ | ||
| Validate that the input data is a valid WRROC entity for WES. | ||
|
|
||
| Args: | ||
| data (dict): The input data to validate. | ||
|
|
||
| Returns: | ||
| WRROCDataWES: The validated WRROC data for WES. | ||
|
|
||
| Raises: | ||
| ValueError: If the data is not valid WRROC data for WES. | ||
| """ | ||
| try: | ||
| return WRROCDataWES(**data) | ||
| except ValidationError as e: | ||
| raise ValueError(f"Invalid WRROC data: {e}") | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't it be expressive enough to call the arg just
datain all cases, instead oftes_data,wes_dataandwrroc_data? The input "type" is already included in the function names.Also, please add the following in your next PR for ALL methods/functions (should have already been there, I didn't pay attention during review):
See our contributor guide for details.
Please do not add it in this PR, otherwise it will take forever to review and merge it. But make sure it is the very next PR, as it is very important to us to have well documented and typed code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the feedback. I will rename the arguments to data in all cases for consistency. I will also ensure that all methods/functions include type hints and Google-style docstrings in the next PR as per the contributor guide. I understand the importance of well-documented and typed code.