Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 37 additions & 3 deletions api/serializers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# SPDX-FileCopyrightText: 2025 Jonas Huber <https://github.com/jh-RLI> © Reiner Lemoine Institut
# SPDX-FileCopyrightText: 2025 Jonas Huber <https://github.com/jh-RLI> © Reiner Lemoine Institut
# SPDX-FileCopyrightText: 2025 Jonas Huber <https://github.com/jh-RLI> © Reiner Lemoine Institut # noqa: E501
#
# SPDX-License-Identifier: AGPL-3.0-or-later

Expand All @@ -10,7 +9,7 @@
from rest_framework import serializers

from dataedit.helper import get_readable_table_name
from dataedit.models import Table
from dataedit.models import Dataset, Table
from modelview.models import Energyframework, Energymodel
from oeplatform.settings import URL

Expand Down Expand Up @@ -164,3 +163,38 @@ def validate_dataset(self, value):
raise serializers.ValidationError("Dataset names must be unique.")

return value


class DatasetReadSerializer(serializers.ModelSerializer):
class Meta:
model = Dataset
fields = ["uuid", "name", "metadata", "created_at"]


class DatasetCreateSerializer(serializers.Serializer):
name = serializers.CharField()
title = serializers.CharField()
description = serializers.CharField()
at_id = serializers.URLField(required=False)


class DatasetAssignTablesSerializer(serializers.Serializer):
tables = serializers.ListField(
child=serializers.DictField(child=serializers.CharField()), min_length=1
)

def validate_tables(self, value):
for item in value:
if "schema" not in item or "name" not in item:
raise serializers.ValidationError(
"Each table must have 'schema' and 'name'."
)
return value


class DatasetResourceSerializer(serializers.ModelSerializer):
schema = serializers.StringRelatedField()

class Meta:
model = Table
fields = ["id", "schema", "name", "oemetadata", "human_readable_name"]
25 changes: 25 additions & 0 deletions api/services/dataset_creation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: 2025 Jonas Huber <https://github.com/jh-RLI> © Reiner Lemoine Institut # noqa: E501
#
# SPDX-License-Identifier: AGPL-3.0-or-later

from copy import deepcopy
from typing import Any

from oemetadata.v2.v20.example import OEMETADATA_V20_EXAMPLE
from oemetadata.v2.v20.template import OEMETADATA_V20_TEMPLATE


def assemble_dataset_metadata(
validated_data: dict[str, Any], oemetadata: dict = OEMETADATA_V20_TEMPLATE
) -> dict[str, Any]:
# set the context
oemetadata = deepcopy(oemetadata)
oemetadata["@context"] = OEMETADATA_V20_EXAMPLE["@context"]
oemetadata["resources"] = [] # Remove resources

oemetadata["@id"] = validated_data.get("at_id")
oemetadata["name"] = validated_data["name"]
oemetadata["title"] = validated_data["title"]
oemetadata["description"] = validated_data["description"]

return oemetadata
176 changes: 176 additions & 0 deletions api/tests/test_datasets_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# SPDX-FileCopyrightText: 2025 Jonas Huber <https://github.com/jh-RLI> © Reiner Lemoine Institut # noqa: E501
#
# SPDX-License-Identifier: AGPL-3.0-or-later

from copy import deepcopy

from oemetadata.latest.template import OEMETADATA_LATEST_TEMPLATE
from rest_framework import status
from rest_framework.test import APITestCase

from dataedit.models import Dataset, Schema, Table


class DatasetAPITests(APITestCase):
def setUpDatasetMetadata(self, dataset_name: str):
metadata = deepcopy(OEMETADATA_LATEST_TEMPLATE)

metadata["name"] = dataset_name
metadata["resources"] = []

return metadata

def setUpResourceMetadata(self, table_name: str):
metadata = deepcopy(OEMETADATA_LATEST_TEMPLATE)

metadata["resources"][0]["name"] = table_name

return metadata

def test_create_dataset(self):
payload = {
"name": "test_dataset",
"title": "Test Dataset",
"description": "This is a test dataset",
}
response = self.client.post(
"/api/v0/datasets/", payload, format="json"
) # fixed
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
self.assertIn("metadata", response.data)
self.assertIn("resources", response.data["metadata"])
self.assertEqual(response.data["metadata"]["name"], "test_dataset")

def test_list_datasets(self):
Dataset.objects.create(name="ds1", metadata=self.setUpDatasetMetadata("ds1"))
Dataset.objects.create(name="ds2", metadata=self.setUpDatasetMetadata("ds2"))
response = self.client.get("/api/v0/datasets/") # fixed
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(len(response.data), 2)

def test_assign_tables_to_dataset(self):
schema = Schema.objects.create(name="test_schema")
Table.objects.create(
name="t1", schema=schema, oemetadata=self.setUpResourceMetadata("t1")
)
Table.objects.create(
name="t2", schema=schema, oemetadata=self.setUpResourceMetadata("t2")
)
dataset = Dataset.objects.create(
name="test_dataset", metadata={"name": "test_dataset"}
)

payload = {
"dataset_name": "test_dataset",
"tables": [
{"schema": "test_schema", "name": "t1"},
{"schema": "test_schema", "name": "t2"},
],
}

response = self.client.post(
"/api/v0/datasets/test_dataset/assign-tables/", payload, format="json"
)
self.assertEqual(response.status_code, 200)
dataset.refresh_from_db()
self.assertEqual(len(dataset.tables.all()), 2)
self.assertEqual(len(dataset.metadata["resources"]), 2)

def test_list_resources_for_dataset(self):
schema = Schema.objects.create(name="test_schema")
table = Table.objects.create(
name="t1", schema=schema, oemetadata=self.setUpResourceMetadata("t1")
)
dataset = Dataset.objects.create(
name="test_dataset", metadata=self.setUpDatasetMetadata("test_dataset")
)
dataset.tables.add(table)
dataset.update_resources_from_tables()

response = self.client.get(
f"/api/v0/datasets/{dataset.name}/resources/"
) # fixed
self.assertEqual(response.status_code, 200)
self.assertEqual(len(response.data), 1)
self.assertEqual(response.data[0]["name"], "t1")

def test_assign_missing_table(self):
Dataset.objects.create(
name="ds_missing", metadata=self.setUpDatasetMetadata("ds_missing")
)

payload = {
"dataset_name": "ds_missing",
"tables": [{"schema": "nonexistent", "name": "missing"}],
}

response = self.client.post(
"/api/v0/datasets/ds_missing/assign-tables/", payload, format="json"
)
self.assertEqual(response.status_code, 200)
self.assertIn("missing", response.data)
self.assertEqual(len(response.data["missing"]), 1)

def test_list_resources_dataset_not_found(self):
response = self.client.get("/api/v0/datasets/nonexistent/resources/") # fixed
self.assertEqual(response.status_code, 404)


class DatasetManagerAPITests(APITestCase):
def setUp(self):
self.dataset = Dataset.objects.create(
name="test_dataset",
metadata={
"name": "test_dataset",
"title": "Test Title",
"description": "Test Description",
"resources": [],
},
)
self.detail_url = f"/api/v0/datasets/{self.dataset.name}/"

def test_get_dataset(self):
response = self.client.get(self.detail_url)
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.data["name"], "test_dataset")

def test_update_dataset(self):
updated_data = {
"name": "test_dataset", # must match existing name
"title": "Updated Title",
"description": "Updated Description",
"at_id": "https://example.org/dataset/test_dataset",
}

response = self.client.put(self.detail_url, updated_data, format="json")
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.dataset.refresh_from_db()
self.assertEqual(self.dataset.metadata["title"], "Updated Title")
self.assertEqual(self.dataset.metadata["description"], "Updated Description")
self.assertEqual(
self.dataset.metadata["@id"], "https://example.org/dataset/test_dataset"
)

def test_delete_dataset(self):
response = self.client.delete(self.detail_url)
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
self.assertFalse(Dataset.objects.filter(name="test_dataset").exists())

def test_get_nonexistent_dataset(self):
response = self.client.get("/api/v0/datasets/nonexistent_dataset/")
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)

def test_put_nonexistent_dataset(self):
payload = {
"name": "nonexistent_dataset",
"title": "Does Not Exist",
"description": "Should return 404",
}
response = self.client.put(
"/api/v0/datasets/nonexistent_dataset/", payload, format="json"
)
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)

def test_delete_nonexistent_dataset(self):
response = self.client.delete("/api/v0/datasets/nonexistent_dataset/")
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
20 changes: 20 additions & 0 deletions api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,26 @@
views.ManageOekgScenarioDatasets.as_view(),
name="add-scenario-datasets",
),
path(
"v0/datasets/",
views.DatasetsListCreate.as_view(),
name="dataset-list-create",
),
path(
"v0/datasets/<str:dataset_name>/assign-tables/",
views.AssignDatasetTables.as_view(),
name="dataset-assign-tables",
),
path(
"v0/datasets/<str:dataset_name>/",
views.DatasetManager.as_view(),
name="dataset",
),
path(
"v0/datasets/<str:dataset_name>/resources/",
views.DatasetsListResources.as_view(),
name="dataset-resources",
),
]


Expand Down
Loading
Loading