From 39d6648ac30c2a973087dd2cb01aaec1bceae99f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 11 Mar 2025 17:51:18 -0400 Subject: [PATCH 01/22] big work on schemas --- pepdbagent/_version.py | 2 +- pepdbagent/db_utils.py | 173 +++++-- pepdbagent/models.py | 62 ++- pepdbagent/modules/project.py | 21 +- pepdbagent/modules/schema.py | 937 ++++++++++++++++++---------------- tests/test_schema.py | 222 ++------ tests/utils.py | 11 +- 7 files changed, 750 insertions(+), 678 deletions(-) diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index fee46bd..ea370a8 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.11.1" +__version__ = "0.12.0" diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 45d7391..7d7a297 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -86,12 +86,11 @@ class Projects(Base): last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column( default=deliver_update_date, # onupdate=deliver_update_date, # This field should not be updated, while we are adding project to favorites ) - pep_schema: Mapped[Optional[str]] schema_id: Mapped[Optional[int]] = mapped_column( - ForeignKey("schemas.id", ondelete="SET NULL"), nullable=True + ForeignKey("schema_versions.id", ondelete="SET NULL"), nullable=True ) - schema_mapping: Mapped["Schemas"] = relationship("Schemas", lazy="joined") + schema_mapping: Mapped["SchemaVersions"] = relationship("SchemaVersions", lazy="joined") pop: Mapped[Optional[bool]] = mapped_column(default=False) samples_mapping: Mapped[List["Samples"]] = relationship( @@ -302,68 +301,160 @@ class HistorySamples(Base): ) -class Schemas(Base): +class SchemaRecords(Base): + __tablename__ = "schema_records" - __tablename__ = "schemas" - - id: Mapped[int] = mapped_column(primary_key=True, index=True) + id: Mapped[int] = mapped_column(primary_key=True) namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE")) - name: Mapped[str] = mapped_column(nullable=False, index=True) - description: Mapped[Optional[str]] = mapped_column(nullable=True, index=True) - schema_json: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue()) + name: Mapped[str] = mapped_column(nullable=False) + maintainers: Mapped[str] = mapped_column(nullable=True) + lifecycle_stage: Mapped[str] = mapped_column(nullable=True) + description: Mapped[Optional[str]] = mapped_column(nullable=True) private: Mapped[bool] = mapped_column(default=False) - submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column( default=deliver_update_date, onupdate=deliver_update_date ) - projects_mappings: Mapped[List["Projects"]] = relationship( - "Projects", back_populates="schema_mapping" - ) - group_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( - "SchemaGroupRelations", back_populates="schema_mapping" - ) - __table_args__ = (UniqueConstraint("namespace", "name"),) + versions_mapping: Mapped[List["SchemaVersions"]] = relationship( + "SchemaVersions", back_populates="schema_mapping", cascade="all, delete-orphan" + ) -class SchemaGroups(Base): - __tablename__ = "schema_groups" +class SchemaVersions(Base): + __tablename__ = "schema_versions" - id: Mapped[int] = mapped_column(primary_key=True, index=True) - namespace: Mapped[str] = mapped_column( - ForeignKey("users.namespace", ondelete="CASCADE"), index=True + id: Mapped[int] = mapped_column(primary_key=True) + schema_id: Mapped[int] = mapped_column(ForeignKey("schema_records.id", ondelete="CASCADE")) + version: Mapped[str] = mapped_column(nullable=False) + schema_value: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue()) + release_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) + last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column( + default=deliver_update_date, onupdate=deliver_update_date ) - name: Mapped[str] = mapped_column(nullable=False, index=True) - description: Mapped[Optional[str]] = mapped_column(nullable=True) + contributors: Mapped[Optional[str]] = mapped_column(nullable=True) + release_notes: Mapped[Optional[str]] = mapped_column(nullable=True) - schema_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( - "SchemaGroupRelations", back_populates="group_mapping" - ) + __table_args__ = (UniqueConstraint("schema_id", "version"),) - __table_args__ = (UniqueConstraint("namespace", "name"),) + schema_mapping: Mapped["SchemaRecords"] = relationship( + "SchemaRecords", back_populates="versions_mapping" + ) + tags_mapping: Mapped[List["SchemaTags"]] = relationship( + "SchemaTags", back_populates="schema_mapping", lazy="joined", cascade="all, delete-orphan" + ) -class SchemaGroupRelations(Base): - __tablename__ = "schema_group_relations" +class SchemaTags(Base): + __tablename__ = "schema_tags" - schema_id: Mapped[int] = mapped_column( - ForeignKey("schemas.id", ondelete="CASCADE"), index=True, primary_key=True - ) - group_id: Mapped[int] = mapped_column( - ForeignKey("schema_groups.id", ondelete="CASCADE"), index=True, primary_key=True + id: Mapped[int] = mapped_column(primary_key=True) + tag_name: Mapped[str] = mapped_column(nullable=False) + tag_value: Mapped[str] = mapped_column(nullable=True) + schema_version_id: Mapped[int] = mapped_column( + ForeignKey("schema_versions.id", ondelete="CASCADE") ) - schema_mapping: Mapped["Schemas"] = relationship( - "Schemas", back_populates="group_relation_mapping" - ) - group_mapping: Mapped["SchemaGroups"] = relationship( - "SchemaGroups", back_populates="schema_relation_mapping" + schema_mapping: Mapped["SchemaVersions"] = relationship( + "SchemaVersions", back_populates="tags_mapping" ) +# Update the `last_update_date` when versions_mapping is modified +@event.listens_for(SchemaRecords.versions_mapping, "append") +def update_last_update_date(target, value, initiator): + target.last_update_date = datetime.datetime.now() + + +@event.listens_for(Session, "before_flush") +def before_flush(session, flush_context, instances): + for instance in session.dirty: + if isinstance(instance, SchemaRecords): + instance.last_update_date = datetime.datetime.now() + + +# class SchemaTagRelations(Base): +# __tablename__ = "schema_tag_relations" +# +# version_id: Mapped[int] = mapped_column( +# ForeignKey("schema_versions.id", ondelete="CASCADE"), index=True, primary_key=True +# ) +# tag_id: Mapped[int] = mapped_column( +# ForeignKey("schema_tags.id", ondelete="CASCADE"), index=True, primary_key=True +# ) +# +# tag_mapping: Mapped["SchemaRecords"] = relationship( +# "SchemaVersions", back_populates="tag_mapping" +# ) +# version_mapping: Mapped["SchemaTags"] = relationship( +# "SchemaTags", back_populates="version_mapping" +# ) + + +# class SchemaRecords(Base): +# +# __tablename__ = "schemas" +# +# id: Mapped[int] = mapped_column(primary_key=True, index=True) +# namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE")) +# name: Mapped[str] = mapped_column(nullable=False, index=True) +# description: Mapped[Optional[str]] = mapped_column(nullable=True, index=True) +# schema_json: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue()) +# private: Mapped[bool] = mapped_column(default=False) +# submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) +# last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column( +# default=deliver_update_date, onupdate=deliver_update_date +# ) +# +# projects_mappings: Mapped[List["Projects"]] = relationship( +# "Projects", back_populates="schema_mapping" +# ) +# group_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( +# "SchemaGroupRelations", back_populates="schema_mapping" +# ) +# +# __table_args__ = (UniqueConstraint("namespace", "name"),) +# +# +# class SchemaGroups(Base): +# +# __tablename__ = "schema_groups" +# +# id: Mapped[int] = mapped_column(primary_key=True, index=True) +# namespace: Mapped[str] = mapped_column( +# ForeignKey("users.namespace", ondelete="CASCADE"), index=True +# ) +# name: Mapped[str] = mapped_column(nullable=False, index=True) +# description: Mapped[Optional[str]] = mapped_column(nullable=True) +# +# schema_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( +# "SchemaGroupRelations", back_populates="group_mapping" +# ) +# +# __table_args__ = (UniqueConstraint("namespace", "name"),) +# +# +# class SchemaGroupRelations(Base): +# +# __tablename__ = "schema_group_relations" +# +# schema_id: Mapped[int] = mapped_column( +# ForeignKey("schemas.id", ondelete="CASCADE"), index=True, primary_key=True +# ) +# group_id: Mapped[int] = mapped_column( +# ForeignKey("schema_groups.id", ondelete="CASCADE"), index=True, primary_key=True +# ) +# +# schema_mapping: Mapped["SchemaRecords"] = relationship( +# "SchemaRecords", back_populates="group_relation_mapping" +# ) +# group_mapping: Mapped["SchemaGroups"] = relationship( +# "SchemaGroups", back_populates="schema_relation_mapping" +# ) + + class TarNamespace(Base): __tablename__ = "namespace_archives" diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 3bcd402..b82509f 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -249,17 +249,37 @@ class HistoryAnnotationModel(BaseModel): history: List[HistoryChangeModel] -class SchemaAnnotation(BaseModel): +class PaginationResult(BaseModel): + page: int = 0 + page_size: int = 10 + total: int + + +class SchemaVersionAnnotation(BaseModel): + """ + Schema version annotation model + """ + + version: str + contributors: Optional[str] = "" + release_notes: Optional[str] = "" + tags: List[str] + release_date: datetime.datetime + last_update_date: datetime.datetime + + +class SchemaRecordAnnotation(BaseModel): """ Schema annotation model """ namespace: str name: str - last_update_date: str - submission_date: str description: Optional[str] = "" - popularity_number: Optional[int] = 0 + maintainers: str = "" + lifecycle_stage: str = "" + private: bool = False + last_update_date: datetime.datetime class SchemaSearchResult(BaseModel): @@ -267,32 +287,30 @@ class SchemaSearchResult(BaseModel): Schema search result model """ - count: int - limit: int - offset: int - results: List[SchemaAnnotation] + pagination: PaginationResult + results: List[SchemaRecordAnnotation] -class SchemaGroupAnnotation(BaseModel): +class SchemaVersionSearchResult(BaseModel): """ - Schema group annotation model + Schema version search result model """ - namespace: str - name: str - description: Optional[str] = "" - schemas: List[SchemaAnnotation] + pagination: PaginationResult + results: List[SchemaVersionAnnotation] -class SchemaGroupSearchResult(BaseModel): - """ - Schema group search result model - """ +class UpdateSchemaRecordFields(BaseModel): + maintainers: Optional[str] = None + lifecycle_stage: Optional[str] = None + private: Optional[bool] = False + name: Optional[str] = None - count: int - limit: int - offset: int - results: List[SchemaGroupAnnotation] + +class UpdateSchemaVersionFields(BaseModel): + contributors: Optional[str] = None + schema_value: Optional[dict] = None + release_notes: Optional[str] = None class TarNamespaceModel(BaseModel): diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index cf2cfbe..83a045d 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -31,7 +31,8 @@ HistorySamples, Projects, Samples, - Schemas, + SchemaRecords, + SchemaVersions, Subsamples, TarNamespace, UpdateTypes, @@ -371,10 +372,10 @@ def create( schema_namespace, schema_name = schema_path_converter(pep_schema) with Session(self._sa_engine) as session: schema_mapping = session.scalar( - select(Schemas).where( + select(SchemaRecords).where( and_( - Schemas.namespace == schema_namespace, - Schemas.name == schema_name, + SchemaRecords.namespace == schema_namespace, + SchemaRecords.name == schema_name, ) ) ) @@ -693,12 +694,16 @@ def _convert_update_schema_id(session: Session, update_values: dict): return None """ if "pep_schema" in update_values: - schema_namespace, schema_name = schema_path_converter(update_values["pep_schema"]) + schema_namespace, schema_name = schema_path_converter( + update_values["pep_schema"] + ) # TODO: fix it. schema_mapping = session.scalar( - select(Schemas).where( + select(SchemaVersions) + .join(SchemaRecords) + .where( and_( - Schemas.namespace == schema_namespace, - Schemas.name == schema_name, + SchemaRecords.namespace == schema_namespace, + SchemaRecords.name == schema_name, ) ) ) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 4294476..687112c 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -1,25 +1,27 @@ import logging +from typing import List, Optional, Union, Dict + from sqlalchemy import Select, and_, delete, func, or_, select from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified from pepdbagent.const import PKG_NAME -from pepdbagent.db_utils import BaseEngine, SchemaGroupRelations, SchemaGroups, Schemas, User +from pepdbagent.db_utils import BaseEngine, SchemaRecords, SchemaTags, SchemaVersions, User from pepdbagent.exceptions import ( SchemaAlreadyExistsError, SchemaAlreadyInGroupError, SchemaDoesNotExistError, - SchemaGroupAlreadyExistsError, - SchemaGroupDoesNotExistError, - SchemaIsNotInGroupError, ) from pepdbagent.models import ( - SchemaAnnotation, - SchemaGroupAnnotation, - SchemaGroupSearchResult, + SchemaRecordAnnotation, + SchemaVersionAnnotation, + PaginationResult, + SchemaVersionSearchResult, SchemaSearchResult, + UpdateSchemaRecordFields, + UpdateSchemaVersionFields, ) _LOGGER = logging.getLogger(PKG_NAME) @@ -27,7 +29,7 @@ class PEPDatabaseSchema: """ - Class that represents Schemas in Database. + Class that represents SchemaRecords in Database. While using this class, user can create, retrieve, delete, and update schemas from database """ @@ -39,223 +41,79 @@ def __init__(self, pep_db_engine: BaseEngine): self._sa_engine = pep_db_engine.engine self._pep_db_engine = pep_db_engine - def get(self, namespace: str, name: str) -> dict: + def get(self, namespace: str, name: str, version: str) -> dict: """ Get schema from the database. :param namespace: user namespace :param name: schema name + :param version: schema version :return: schema dict """ with Session(self._sa_engine) as session: schema_obj = session.scalar( - select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) - ) - - if not schema_obj: - raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") - - return schema_obj.schema_json - - def info(self, namespace: str, name: str) -> SchemaAnnotation: - """ - Get schema information from the database. - - :param namespace: user namespace - :param name: schema name - - :return: SchemaAnnotation object: - - namespace: schema namespace - - name: schema name - - last_update_date: last update date - - submission_date: submission date - - description: schema description - """ - - with Session(self._sa_engine) as session: - schema_obj = session.scalar( - select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) - ) - - if not schema_obj: - raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") - - return SchemaAnnotation( - namespace=schema_obj.namespace, - name=schema_obj.name, - last_update_date=str(schema_obj.last_update_date), - submission_date=str(schema_obj.submission_date), - description=schema_obj.description, - popularity_number=len(schema_obj.projects_mappings), - ) - - def search( - self, - namespace: str = None, - search_str: str = "", - limit: int = 100, - offset: int = 0, - order_by: str = "update_date", - order_desc: bool = False, - ) -> SchemaSearchResult: - """ - Search schemas in the database. - - :param namespace: user namespace [Default: None]. If None, search in all namespaces - :param search_str: query string. [Default: ""]. If empty, return all schemas - :param limit: limit number of schemas [Default: 100] - :param offset: offset number of schemas [Default: 0] - :param order_by: sort the result-set by the information - Options: ["name", "update_date", "submission_date"] - [Default: update_date] - :param order_desc: Sort the records in descending order. [Default: False] - - :return: list of schema dicts - """ - - statement = select(Schemas) - statement = self._add_condition(statement, namespace, search_str) - statement = statement.limit(limit).offset(offset) - statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) - - return_list = [] - - with Session(self._sa_engine) as session: - results = session.scalars(statement) - - for result in results: - return_list.append( - SchemaAnnotation( - namespace=result.namespace, - name=result.name, - last_update_date=str(result.last_update_date), - submission_date=str(result.submission_date), - description=result.description, - # popularity_number=sum(result.projects_mappings), + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, ) ) - - return SchemaSearchResult( - count=self._count_search(namespace=namespace, search_str=search_str), - limit=limit, - offset=offset, - results=return_list, - ) - - def _count_search(self, namespace: str = None, search_str: str = "") -> int: - """ - Count number of found schemas - - :param namespace: user namespace [Default: None]. If None, search in all namespaces - :param search_str: query string. [Default: ""]. If empty, return all schemas - - :return: list of schema dicts - """ - statement = select(func.count(Schemas.id)) - - statement = self._add_condition(statement, namespace, search_str) - - with Session(self._sa_engine) as session: - result = session.execute(statement).one() - - return result[0] - - @staticmethod - def _add_order_by_keyword( - statement: Select, by: str = "update_date", desc: bool = False - ) -> Select: - """ - Add order by clause to sqlalchemy statement - - :param statement: sqlalchemy representation of a SELECT statement. - :param by: sort the result-set by the information - Options: ["name", "update_date", "submission_date"] - [Default: "update_date"] - :param desc: Sort the records in descending order. [Default: False] - :return: sqlalchemy representation of a SELECT statement with order by keyword - """ - if by == "update_date": - order_by_obj = Schemas.last_update_date - elif by == "name": - order_by_obj = Schemas.name - elif by == "submission_date": - order_by_obj = Schemas.submission_date - else: - _LOGGER.warning( - f"order by: '{by}' statement is unavailable. Projects are sorted by 'update_date'" ) - order_by_obj = Schemas.last_update_date - - if desc and by == "name": - order_by_obj = order_by_obj.desc() - - elif by != "name" and not desc: - order_by_obj = order_by_obj.desc() - return statement.order_by(order_by_obj) + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") - @staticmethod - def _add_condition( - statement: Select, - namespace: str = None, - search_str: str = None, - ) -> Select: - if search_str: - sql_search_str = f"%{search_str}%" - search_query = or_( - Schemas.name.ilike(sql_search_str), - Schemas.description.ilike(sql_search_str), - ) - statement = statement.where(search_query) - if namespace: - statement = statement.where(Schemas.namespace == namespace) - return statement + return schema_obj.schema_value def create( self, namespace: str, name: str, - schema: dict, + schema_value: dict, + version: str = "default", description: str = "", - # private: bool = False, # TODO: for simplicity was not implemented yet - overwrite: bool = False, - update_only: bool = False, + lifecycle_stage: str = "", + maintainers: str = "", + contributors: str = "", + release_notes: str = "", + tags: Optional[Union[List[str], str, Dict[str, str], List[Dict[str, str]]]] = None, + private: bool = False, # TODO: for simplicity was not implemented yet ) -> None: """ Create or update schema in the database. :param namespace: user namespace :param name: schema name - :param schema: schema dict + :param schema_value: schema dict + :param version: schema version [Default: "1.0.0"] :param description: schema description [Default: ""] - :param overwrite: overwrite schema if exists [Default: False] - :param update_only: update only schema if exists [Default: False] + :param lifecycle_stage: schema lifecycle stage [Default: ""] + :param maintainers: schema maintainers [Default: ""] + :param contributors: schema contributors [Default: ""] + :param release_notes: schema release notes [Default: ""] + :param tags: schema tags [Default: None] + :param private: schema privacy [Default: False] + + :return: None """ - if description: - schema["description"] = description - else: - description = schema.get("description", "") - - if self.exist(namespace, name): - if overwrite: - self.update(namespace, name, schema, description) - return None - elif update_only: - self.update(namespace, name, schema, description) - return None - else: - raise SchemaAlreadyExistsError(f"Schema '{name}' already exists in the database") + tags = self._unify_tags(tags) - if update_only: - raise SchemaDoesNotExistError( - f"Schema '{name}' does not exist in the database" - f"Cannot update schema that does not exist" + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(SchemaRecords).where( + and_(SchemaRecords.namespace == namespace, SchemaRecords.name == name) + ) ) - with Session(self._sa_engine) as session: + if schema_obj: + raise SchemaAlreadyExistsError(f"Schema '{name}' already exists in the database") + user = session.scalar(select(User).where(User.namespace == namespace)) if not user: @@ -263,71 +121,134 @@ def create( session.add(user) session.commit() - schema_obj = Schemas( + schema_obj = SchemaRecords( namespace=namespace, name=name, - schema_json=schema, description=description, + maintainers=maintainers, + lifecycle_stage=lifecycle_stage, + private=private, ) + session.add(schema_obj) + + schema_version_obj = SchemaVersions( + schema_mapping=schema_obj, + version=version, + schema_value=schema_value, + release_notes=release_notes, + contributors=contributors, + ) + + for tag_name, tag_value in tags.items(): + tag_obj = session.scalar(select(SchemaTags).where(SchemaTags.tag_name == tag_name)) + if not tag_obj: + tag_obj = SchemaTags( + tag_name=tag_name, tag_value=tag_value, schema_mapping=schema_version_obj + ) + session.add(tag_obj) + + session.add(schema_version_obj) session.commit() - def update( + return None + + def add_version( self, namespace: str, name: str, - schema: dict, - description: str = "", - # private: bool = False, # TODO: for simplicity was not implemented yet + version: str, + schema_value: dict, + release_notes: str = "", + contributors: str = "", + overwrite: bool = False, + tags: Optional[Union[List[str], str, Dict[str, str], List[Dict[str, str]]]] = None, ) -> None: - """ - Update schema in the database. - :param namespace: user namespace - :param name: schema name - :param schema: schema dict - :param description: schema description [Default: ""] - - :return: None - """ + tags = self._unify_tags(tags) with Session(self._sa_engine) as session: schema_obj = session.scalar( - select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) + select(SchemaRecords).where( + and_(SchemaRecords.namespace == namespace, SchemaRecords.name == name) + ) ) - if not schema_obj: - raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + raise SchemaDoesNotExistError( + f"Schema '{name}' does not exist in the database. Unable to add version." + ) - schema_obj.schema_json = schema - schema_obj.description = description - flag_modified(schema_obj, "schema_json") + version_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, + ) + ) + ) - session.commit() + if version_obj: + if not overwrite: + raise SchemaAlreadyExistsError( + f"Schema '{name}' with version '{version}' already exists in the database" + ) - def delete(self, namespace: str, name: str) -> None: - """ - Delete schema from the database. + return self.update_schema_version( + namespace, + name, + version, + update_fields=UpdateSchemaVersionFields( + schema_value=schema_value, + release_notes=release_notes, + contributors=contributors, + ), + ) - :param namespace: user namespace - :param name: schema name + schema_version_obj = SchemaVersions( + schema_id=schema_obj.id, + version=version, + schema_value=schema_value, + release_notes=release_notes, + contributors=contributors, + ) - :return: None - """ + for tag_name, tag_value in tags: + tag_obj = session.scalar(select(SchemaTags).where(SchemaTags.tag_name == tag_name)) + if not tag_obj: + tag_obj = SchemaTags( + tag_name=tag_name, tag_value=tag_value, schema_mapping=schema_version_obj + ) + session.add(tag_obj) - with Session(self._sa_engine) as session: - schema_obj = session.scalar( - select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) - ) + session.add(schema_version_obj) + session.commit() - if not schema_obj: - raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + return None - session.delete(schema_obj) + def update_schema_version( + self, + namespace: str, + name: str, + version: str, + update_fields: Union[UpdateSchemaVersionFields, dict], + ) -> None: + """ """ + # flag_modified(update_fields, "schema_value") + ... - session.commit() + def update_schema_record( + self, + namespace: str, + name: str, + update_fields: UpdateSchemaRecordFields, + ) -> None: + """ """ + ... - def exist(self, namespace: str, name: str) -> bool: + def schema_exist(self, namespace: str, name: str) -> bool: """ Check if schema exists in the database. @@ -339,302 +260,450 @@ def exist(self, namespace: str, name: str) -> bool: with Session(self._sa_engine) as session: schema_obj = session.scalar( - select(Schemas).where(and_(Schemas.namespace == namespace, Schemas.name == name)) + select(SchemaRecords).where( + and_(SchemaRecords.namespace == namespace, SchemaRecords.name == name) + ) ) return True if schema_obj else False - def group_create(self, namespace: str, name: str, description: str = "") -> None: + def version_exist(self, namespace: str, name: str, version: str) -> bool: """ - Create schema group in the database. + Check if schema version exists in the database. :param namespace: user namespace - :param name: schema group name - :param description: schema group description [Default: ""] + :param name: schema name + :param version: schema version - :return: None + :return: True if schema version exists, False otherwise """ - try: - with Session(self._sa_engine) as session: - session.add( - SchemaGroups( - namespace=namespace, - name=name, - description=description, + + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, ) ) - session.commit() - - except IntegrityError: - raise SchemaGroupAlreadyExistsError + ) + return True if schema_obj else False - def group_get(self, namespace: str, name: str) -> SchemaGroupAnnotation: + def get_schema_info(self, namespace: str, name: str) -> SchemaRecordAnnotation: """ - Get schema group from the database. + Get schema information from the database. :param namespace: user namespace - :param name: schema group name + :param name: schema name - :return: SchemaGroupAnnotation object: - - namespace: schema group namespace - - name: schema group name - - description: schema group description - - schemas: list of SchemaAnnotation objects + :return: SchemaRecordAnnotation """ with Session(self._sa_engine) as session: - schema_group_obj = session.scalar( - select(SchemaGroups).where( - and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) + schema_obj = session.scalar( + select(SchemaRecords).where( + and_(SchemaRecords.namespace == namespace, SchemaRecords.name == name) ) ) - if not schema_group_obj: - raise SchemaGroupDoesNotExistError( - f"Schema group '{name}' does not exist in the database" - ) - - schemas = [] - for schema_relation in schema_group_obj.schema_relation_mapping: - schema_annotation = schema_relation.schema_mapping - schemas.append( - SchemaAnnotation( - namespace=schema_annotation.namespace, - name=schema_annotation.name, - last_update_date=str(schema_annotation.last_update_date), - submission_date=str(schema_annotation.submission_date), - desciription=schema_annotation.description, - ) - ) + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") - return SchemaGroupAnnotation( - namespace=schema_group_obj.namespace, - name=schema_group_obj.name, - description=schema_group_obj.description, - schemas=schemas, + return SchemaRecordAnnotation( + namespace=schema_obj.namespace, + name=schema_obj.name, + description=schema_obj.description, + maintainers=schema_obj.maintainers, + private=schema_obj.private, + last_update_date=schema_obj.last_update_date, ) - def group_search( - self, namespace: str = None, search_str: str = "", limit: int = 100, offset: int = 0 - ) -> SchemaGroupSearchResult: + def get_version_info(self, namespace: str, name: str, version: str) -> SchemaVersionAnnotation: """ - Search schema groups in the database. + Get schema version information from the database. - :param namespace: user namespace [Default: None]. If None, search in all namespaces - :param search_str: query string. [Default: ""]. If empty, return all schema groups - :param limit: limit of the search - :param offset: offset of the search - - :return: SchemaGroupSearchResult object: - - count: number of found schema groups - - limit: limit number of schema groups - - offset: offset number of schema groups - - results: list of SchemaGroupAnnotation objects - """ + :param namespace: user namespace + :param name: schema name + :param version: schema version - statement = select(SchemaGroups) - statement = self._add_group_condition( - statement=statement, namespace=namespace, search_str=search_str - ) + :return: SchemaVersionAnnotation + """ with Session(self._sa_engine) as session: - results = session.scalars(statement) - - return_results = [] - for result in results: - return_results.append( - SchemaGroupAnnotation( - namespace=result.namespace, - name=result.name, - description=result.description, - schemas=[], + version_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, ) ) + ) - return SchemaGroupSearchResult( - count=self._group_search_count(namespace, search_str), - limit=limit, - offset=offset, - results=return_results, - ) + if not version_obj: + raise SchemaDoesNotExistError( + f"Schema '{name}' with version '{version}' does not exist in the database" + ) - @staticmethod - def _add_group_condition( - statement: Select, + return SchemaVersionAnnotation( + version=version_obj.version, + contributors=version_obj.contributors, + release_notes=version_obj.release_notes, + tags=[tag.tag_name for tag in version_obj.tags_mapping], + release_date=version_obj.release_date, + last_update_date=version_obj.last_update_date, + ) + + def query_schemas( + self, namespace: str = None, search_str: str = "", - ) -> Select: + page: int = 0, + page_size: int = 10, + order_by: str = "update_date", + order_desc: bool = False, + ) -> SchemaSearchResult: """ - Add query condition to statement in group search + Search schemas in the database with pagination. + + :param namespace: user namespace [Default: None]. If None, search in all namespaces + :param search_str: query string. [Default: ""]. If empty, return all schemas + :param page: page number [Default: 0] + :param page_size: number of schemas per page [Default: 0] + :param order_by: sort the result-set by the information + Options: ["name", "update_date"] + [Default: update_date] + :param order_desc: Sort the records in descending order. [Default: False] - :param statement: Select statement - :param namespace: Namespace of schema group [Default: None]. If none set, all search in all namespaces - :param search_str: Search string to look for schemas. Search in name and description of the group + :return: { + pagination: {page: int, + page_size: int, + total: int}, + results: [SchemaRecordAnnotation] """ - if search_str: - sql_search_str = f"%{search_str}%" - search_query = or_( - SchemaGroups.name.ilike(sql_search_str), - SchemaGroups.description.ilike(sql_search_str), - ) - statement = statement.where(search_query) + + search_str = search_str.lower() if search_str else "" + + where_statement = or_( + SchemaRecords.name.ilike(f"%{search_str}%"), + SchemaRecords.maintainers.ilike(f"%{search_str}%"), + SchemaRecords.description.ilike(f"%{search_str}%"), + ) if namespace: - statement = statement.where(SchemaGroups.namespace == namespace) - return statement + where_statement = and_(where_statement, SchemaRecords.namespace == namespace) + + with Session(self._sa_engine) as session: + total = session.scalar(select(func.count(SchemaRecords.id)).where(where_statement)) + statement = ( + select(SchemaRecords) + .where(where_statement) + .limit(page_size) + .offset(page * page_size) + ) + statement = self._add_order_by_schemas_keyword(statement, by=order_by, desc=order_desc) + results_objects = session.scalars(statement) - def _group_search_count(self, namespace: str = None, search_str: str = ""): + return SchemaSearchResult( + pagination=PaginationResult( + page=page, + page_size=page_size, + total=total, + ), + results=[ + SchemaRecordAnnotation( + namespace=result.namespace, + name=result.name, + description=result.description, + maintainers=result.maintainers, + private=result.private, + last_update_date=result.last_update_date, + ) + for result in results_objects + ], + ) + + def query_schema_version( + self, + namespace: str, + name: str, + tag: str = None, + search_str: str = "", + page: int = 0, + page_size: int = 10, + ) -> SchemaVersionSearchResult: """ - Count number of found group of schemas + Search schema versions in the database with pagination. - :param namespace: user namespace [Default: None]. If None, search in all namespaces + :param namespace: user namespace + :param name: schema name + :param tag: tag name. [Default: None]. If None, return versions with all tags :param search_str: query string. [Default: ""]. If empty, return all schemas + :param page: result page number [Default: 10] + :param page_size: number of schemas per page [Default: 10] - :return: list of schema dicts + :return: { + pagination: {page: int, + page_size: int, + total: int}, + results: [SchemaVersionAnnotation] """ - statement = select(func.count(SchemaGroups.id)) - statement = self._add_group_condition(statement, namespace, search_str) + search_str = search_str.lower() if search_str else "" with Session(self._sa_engine) as session: - result = session.execute(statement).one() + schema_obj = session.scalar( + select(SchemaRecords).where( + and_(SchemaRecords.namespace == namespace, SchemaRecords.name == name) + ) + ) + + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + + where_statement = or_( + SchemaVersions.version.ilike(f"%{search_str}%"), + SchemaVersions.release_notes.ilike(f"%{search_str}%"), + ) + if tag: + where_statement = and_(where_statement, SchemaTags.tag_name == tag) + + total = session.scalar(select(func.count(SchemaVersions.id)).where(where_statement)) - return result[0] + results_objects = session.scalars( + select(SchemaVersions) + .where(where_statement) + .limit(page_size) + .offset(page * page_size) + ) - def group_delete(self, namespace: str, name: str) -> None: + return SchemaVersionSearchResult( + pagination=PaginationResult( + page=page, + page_size=page_size, + total=total, + ), + results=[ + SchemaVersionAnnotation( + version=result.version, + contributors=result.contributors, + release_notes=result.release_notes, + tags=[tag.tag_name for tag in result.tags_mapping], + release_date=result.release_date, + last_update_date=result.last_update_date, + ) + for result in results_objects + ], + ) + + def delete_schema(self, namespace: str, name: str) -> None: """ - Delete schema group from the database. + Delete schema from the database. :param namespace: user namespace - :param name: schema group name - + :param name: schema name :return: None """ - if not self.group_exist(namespace, name): - raise SchemaGroupDoesNotExistError( - f"Schema group '{name}' does not exist in the database" + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(SchemaRecords).where( + and_(SchemaRecords.namespace == namespace, SchemaRecords.name == name) + ) ) + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + + session.delete(schema_obj) + session.commit() + + def delete_version(self, namespace: str, name: str, version: str) -> None: + """ + Delete version of the schema + + :param namespace: Namespace of the schema + :param name: Name of the schema + :param version: Version of the Schema + + :raise: SchemaDoesNotExistError if version doesn't exist + :return: None + """ with Session(self._sa_engine) as session: - session.execute( - delete(SchemaGroups).where( - and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) + schema_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, + ) ) ) + if not schema_obj: + raise SchemaDoesNotExistError( + f"Schema '{name}' with version '{version}' does not exist in the database. Unable to update version." + ) + session.delete(schema_obj) session.commit() - def group_add_schema( - self, namespace: str, name: str, schema_namespace: str, schema_name: str + def add_tag_to_schema( + self, + namespace: str, + name: str, + version: str, + tag: Optional[Union[List[str], str, Dict[str, str]]], ) -> None: """ - Add schema to the schema group. + Add tag to the schema - :param namespace: user namespace - :param name: schema group name - :param schema_namespace: schema namespace - :param schema_name: schema name + :param namespace: Namespace of the schema + :param name: Name of the schema + :param version: Version of the Schema + :param tag: Tag to be added. Can be a string, list of strings or dictionaries + :raise: SchemaDoesNotExistError if version doesn't exist :return: None """ - try: - with Session(self._sa_engine) as session: - group_mapping = session.scalar( - select(SchemaGroups).where( - and_( - SchemaGroups.namespace == namespace, - SchemaGroups.name == name, - ) - ) - ) - - if not group_mapping: - raise SchemaGroupDoesNotExistError( - f"Group of Schemas with namespace='{namespace}' and name='{name}' does not exist" - ) + tag = self._unify_tags(tag) - schema_mapping = session.scalar( - select(Schemas).where( - and_( - Schemas.namespace == schema_namespace, - Schemas.name == schema_name, - ) + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, ) ) - - if not schema_mapping: - raise SchemaDoesNotExistError( - f"Schema with namespace='{schema_namespace}' and name='{schema_name}' does not exist" + ) + if not schema_obj: + raise SchemaDoesNotExistError( + f"Schema '{name}' with version '{version}' does not exist in the database. Unable to add tag." + ) + if isinstance(tag, str): + tag = [tag] + + for tag_name, tag_value in tag.items(): + tag_obj = session.scalar(select(SchemaTags).where(SchemaTags.tag_name == tag_name)) + if not tag_obj: + tag_obj = SchemaTags( + tag_name=tag_name, tag_value=tag_value, schema_mapping=schema_obj ) - - session.add( - SchemaGroupRelations( - schema_id=schema_mapping.id, - group_id=group_mapping.id, + session.add(tag_obj) + else: + raise SchemaAlreadyInGroupError( + f"Tag '{tag_name}' already exists in the schema" ) - ) - session.commit() - except IntegrityError: - raise SchemaAlreadyInGroupError - def group_remove_schema( - self, namespace: str, name: str, schema_namespace: str, schema_name: str - ) -> None: + session.commit() + + def remove_tag_from_schema(self, namespace: str, name: str, version: str, tag: str) -> None: """ - Remove schema from the schema group. + Remove tag from the schema - :param namespace: user namespace - :param name: schema group name - :param schema_namespace: schema namespace - :param schema_name: schema name + :param namespace: Namespace of the schema + :param name: Name of the schema + :param version: Version of the Schema + :param tag: Tag to be removed + :raise: SchemaDoesNotExistError if version doesn't exist :return: None """ - - try: - with Session(self._sa_engine) as session: - delete_statement = delete(SchemaGroupRelations).where( + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( and_( - SchemaGroupRelations.schema_id - == select(Schemas.id) - .where( - and_( - Schemas.namespace == schema_namespace, - Schemas.name == schema_name, - ) - ) - .scalar_subquery(), - SchemaGroupRelations.group_id - == select(SchemaGroups.id) - .where( - and_( - SchemaGroups.namespace == namespace, - SchemaGroups.name == name, - ) - ) - .scalar_subquery(), + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, ) ) + ) + if not schema_obj: + raise SchemaDoesNotExistError( + f"Schema '{name}' with version '{version}' does not exist in the database. Unable to remove tag." + ) - session.execute(delete_statement) - session.commit() - except IntegrityError: - raise SchemaIsNotInGroupError("Schema not found in the group") + tag_obj = session.scalar( + select(SchemaTags).where( + SchemaTags.tag_name == tag, SchemaTags.schema_version_id == schema_obj.id + ) + ) + if not tag_obj: + raise SchemaDoesNotExistError(f"Tag '{tag}' does not exist in the schema") + + session.delete(tag_obj) + session.commit() - def group_exist(self, namespace: str, name: str) -> bool: + @staticmethod + def _add_order_by_schemas_keyword( + statement: Select, by: str = "update_date", desc: bool = False + ) -> Select: """ - Check if schema group exists in the database. + Add order by clause to sqlalchemy statement - :param namespace: user namespace - :param name: schema group name + :param statement: sqlalchemy representation of a SELECT statement. + :param by: sort the result-set by the information + Options: ["name", "update_date"] + [Default: "update_date"] + :param desc: Sort the records in descending order. [Default: False] + :return: sqlalchemy representation of a SELECT statement with order by keyword + """ + if by == "update_date": + order_by_obj = SchemaRecords.last_update_date + elif by == "name": + order_by_obj = SchemaRecords.name + else: + _LOGGER.warning( + f"order by: '{by}' statement is unavailable. Projects are sorted by 'update_date'" + ) + order_by_obj = SchemaRecords.last_update_date + + if desc and by == "name": + order_by_obj = order_by_obj.desc() - :return: True if schema group exists, False otherwise + elif by != "name" and not desc: + order_by_obj = order_by_obj.desc() + + return statement.order_by(order_by_obj) + + def _unify_tags( + self, tags: Optional[Union[List[str], str, Dict[str, str], List[Dict[str, str]]]] + ) -> [Dict[str, str]]: """ + Convert provided tags to one standard - with Session(self._sa_engine) as session: - schema_group_obj = session.scalar( - select(SchemaGroups).where( - and_(SchemaGroups.namespace == namespace, SchemaGroups.name == name) + :param tags: tags to be converted from types: str, dict, list of str, list of dict + + :raise: ValueError if tags are not in the correct format + :return: dictionary of tags + """ + if tags is None: + tags = {} + elif isinstance(tags, str): + tags = {tags: None} + elif isinstance(tags, dict): + pass + elif isinstance(tags, list): + if all(isinstance(tag, str) for tag in tags): + tags = {tag: None for tag in tags} + else: + raise ValueError( + f"tags should be a list of strings or a list of dictionaries. Tag values: {tags}" ) + else: + raise ValueError( + f"tags should be a list of strings or a list of dictionaries. Tag values: {tags}" ) - return True if schema_group_obj else False + return tags diff --git a/tests/test_schema.py b/tests/test_schema.py index 52e329c..29819bb 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -17,207 +17,87 @@ class TestSamples: ) def test_get(self, namespace, name): with PEPDBAgentContextManager(add_schemas=True) as agent: - schema = agent.schema.get(namespace=namespace, name=name) - assert agent.schema.exist(namespace=namespace, name=name) + assert agent.schema.schema_exist(namespace=namespace, name=name) + assert agent.schema.version_exist(namespace=namespace, name=name, version="1.0.0") + schema = agent.schema.get(namespace=namespace, name=name, version="1.0.0") assert schema - @pytest.mark.parametrize( - "namespace, name", - [ - ["namespace1", "2.0.0"], - ], - ) - def test_delete(self, namespace, name): - with PEPDBAgentContextManager(add_schemas=True) as agent: - assert agent.schema.exist(namespace=namespace, name=name) - agent.schema.delete(namespace=namespace, name=name) - assert not agent.schema.exist(namespace=namespace, name=name) + def test_search_schema(self): ... - @pytest.mark.parametrize( - "namespace, name", - [ - ["namespace1", "2.0.0"], - ], - ) - def test_update(self, namespace, name): - with PEPDBAgentContextManager(add_schemas=True) as agent: - schema = agent.schema.get(namespace=namespace, name=name) - schema["new"] = "hello" - agent.schema.update(namespace=namespace, name=name, schema=schema) - assert agent.schema.exist(namespace=namespace, name=name) - assert schema == agent.schema.get(namespace=namespace, name=name) + def test_search_schema_namespace(self): ... - @pytest.mark.parametrize( - "namespace, name", - [ - ["namespace1", "2.0.0"], - ], - ) - def test_get_annotation(self, namespace, name): - with PEPDBAgentContextManager(add_schemas=True) as agent: - schema_annot = agent.schema.info(namespace=namespace, name=name) - assert schema_annot - assert schema_annot.model_fields_set == { - "namespace", - "name", - "last_update_date", - "submission_date", - "description", - "popularity_number", - } + def test_update_schema(self): ... - @pytest.mark.parametrize( - "namespace, name", - [ - ["namespace1", "2.0.0"], - ], - ) - def test_update_annotation(self, namespace, name): - with PEPDBAgentContextManager(add_schemas=True) as agent: - schema_annot = agent.schema.info(namespace=namespace, name=name) - schema = agent.schema.get(namespace=namespace, name=name) - agent.schema.update( - namespace=namespace, name=name, schema=schema, description="new desc" - ) - assert schema_annot != agent.schema.info(namespace=namespace, name=name) + def test_update_schema_update_date(self): ... - @pytest.mark.parametrize( - "namespace, name", - [ - ["namespace2", "bedboss"], - ], - ) - def test_annotation_popular(self, namespace, name): - with PEPDBAgentContextManager(add_data=True, add_schemas=True) as agent: - agent.project.update( - namespace="namespace1", - name="amendments1", - update_dict={"pep_schema": "namespace2/bedboss"}, - ) - schema_annot = agent.schema.info(namespace=namespace, name=name) - assert schema_annot.popularity_number == 1 + def test_add_schema_version(self): ... - def test_search(self): - with PEPDBAgentContextManager(add_schemas=True) as agent: - results = agent.schema.search(namespace="namespace2") - assert results - assert results.count == 3 - assert len(results.results) == 3 + def test_update_schema_version(self): ... - def test_search_offset(self): - with PEPDBAgentContextManager(add_schemas=True) as agent: - results = agent.schema.search(namespace="namespace2", offset=1) - assert results - assert results.count == 3 - assert len(results.results) == 2 + def test_search_schema_version(self): ... - def test_search_limit(self): - with PEPDBAgentContextManager(add_schemas=True) as agent: - results = agent.schema.search(namespace="namespace2", limit=1) - assert results - assert results.count == 3 - assert len(results.results) == 1 - - def test_search_limit_offset(self): - with PEPDBAgentContextManager(add_schemas=True) as agent: - results = agent.schema.search(namespace="namespace2", limit=2, offset=2) - assert results - assert results.count == 3 - assert len(results.results) == 1 - - def test_search_query(self): - with PEPDBAgentContextManager(add_schemas=True) as agent: - results = agent.schema.search(namespace="namespace2", search_str="bedb") - assert results - assert results.count == 2 - assert len(results.results) == 2 + def test_search_schema_version_with_tags(self): ... @pytest.mark.parametrize( "namespace, name", [ - ["namespace1", "2.0.0"], + ["namespace2", "bedmaker"], ], ) - def test_create_group(self, namespace, name): + def test_schema_delete(self, namespace, name): with PEPDBAgentContextManager(add_schemas=True) as agent: - group_name = "new_group" - agent.schema.group_create( - namespace=namespace, name=group_name, description="new group" - ) - assert agent.schema.group_exist(namespace=namespace, name=group_name) + assert agent.schema.schema_exist(namespace=namespace, name=name) + agent.schema.delete_schema(namespace=namespace, name=name) + assert not agent.schema.version_exist(namespace=namespace, name=name, version="1.0.0") + assert not agent.schema.schema_exist(namespace=namespace, name=name) @pytest.mark.parametrize( "namespace, name", [ - ["namespace1", "2.0.0"], + ["namespace2", "bedmaker"], ], ) - def test_delete_group(self, namespace, name): + def test_schema_version_delete(self, namespace, name): with PEPDBAgentContextManager(add_schemas=True) as agent: - group_name = "new_group" - agent.schema.group_create( - namespace=namespace, name=group_name, description="new group" - ) - assert agent.schema.group_exist(namespace=namespace, name=group_name) - agent.schema.group_delete(namespace=namespace, name=group_name) - assert not agent.schema.group_exist(namespace=namespace, name=group_name) + assert agent.schema.version_exist(namespace=namespace, name=name, version="1.0.0") + agent.schema.delete_version(namespace=namespace, name=name, version="1.0.0") + assert not agent.schema.version_exist(namespace=namespace, name=name, version="1.0.0") + assert agent.schema.schema_exist(namespace=namespace, name=name) - @pytest.mark.parametrize( - "namespace, name", - [ - ["namespace1", "2.0.0"], - ], - ) - def test_add_to_group(self, namespace, name): + +class TestSchemaTags: + def test_insert_tags(self): with PEPDBAgentContextManager(add_schemas=True) as agent: - group_name = "new_group" - agent.schema.group_create( - namespace=namespace, name=group_name, description="new group" - ) - agent.schema.group_add_schema( - namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace + new_tag1 = "new_tag" + new_tag2 = "tag2" + agent.schema.add_tag_to_schema( + "namespace1", "2.0.0", "1.0.0", tag=[new_tag1, new_tag2] ) - group_annot = agent.schema.group_get(namespace=namespace, name=group_name) - assert group_annot.schemas[0].name == name + + result = agent.schema.get_version_info("namespace1", "2.0.0", "1.0.0") + + assert new_tag1 in result.tags + assert new_tag2 in result.tags + + def test_insert_one_tag(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + new_tag1 = "new_tag" + agent.schema.add_tag_to_schema("namespace1", "2.0.0", "1.0.0", tag=new_tag1) + result = agent.schema.get_version_info("namespace1", "2.0.0", "1.0.0") + assert new_tag1 in result.tags @pytest.mark.parametrize( "namespace, name", [ - ["namespace1", "2.0.0"], + ["namespace2", "bedmaker"], ], ) - def test_remove_from_group(self, namespace, name): + def test_delete_tag(self, namespace, name): with PEPDBAgentContextManager(add_schemas=True) as agent: - group_name = "new_group" - agent.schema.group_create( - namespace=namespace, name=group_name, description="new group" - ) - agent.schema.group_add_schema( - namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace - ) - group_annot = agent.schema.group_get(namespace=namespace, name=group_name) - assert len(group_annot.schemas) == 1 - - agent.schema.group_remove_schema( - namespace=namespace, name=group_name, schema_name=name, schema_namespace=namespace - ) - group_annot = agent.schema.group_get(namespace=namespace, name=group_name) - assert len(group_annot.schemas) == 0 - - def test_search_group(self): - with PEPDBAgentContextManager(add_schemas=True) as agent: - group_name1 = "new_group1" - group_name2 = "new2" - group_name3 = "new_group3" - agent.schema.group_create( - namespace="namespace1", name=group_name1, description="new group" - ) - agent.schema.group_create(namespace="namespace1", name=group_name2, description="new") - agent.schema.group_create( - namespace="namespace1", name=group_name3, description="new group" - ) - - results = agent.schema.group_search(search_str="new_group") - - assert results.count == 2 - assert len(results.results) == 2 + new_tag1 = "new_tag" + agent.schema.add_tag_to_schema(namespace, name, "1.0.0", tag=new_tag1) + result = agent.schema.get_version_info(namespace, name, "1.0.0") + assert new_tag1 in result.tags + agent.schema.remove_tag_from_schema(namespace, name, "1.0.0", tag=new_tag1) + result = agent.schema.get_version_info(namespace, name, "1.0.0") + assert not new_tag1 in result.tags diff --git a/tests/utils.py b/tests/utils.py index 2afc623..7505708 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -120,7 +120,16 @@ def _add_schemas(self): for name, path in item.items(): file_dict = read_yaml_file(path) - pepdb_con.schema.create(namespace=namespace, name=name[0:-5], schema=file_dict) + pepdb_con.schema.create( + namespace=namespace, + name=name[0:-5], + version="1.0.0", + schema_value=file_dict, + maintainers="Teddy", + contributors="Teddy, John", + release_notes="Initial release", + tags=["latest", "gf1"], + ) @property def agent(self) -> PEPDatabaseAgent: From 9d9aaa9ba306d7b936489795ed36101abffbd619 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 12 Mar 2025 15:12:14 -0400 Subject: [PATCH 02/22] Accommodated new schema structure to projects --- pepdbagent/modules/annotation.py | 12 ++--- pepdbagent/modules/project.py | 33 +++++++++----- pepdbagent/modules/schema.py | 76 +++++++++++++++++++++++++++++--- pepdbagent/modules/user.py | 2 +- pepdbagent/utils.py | 4 +- 5 files changed, 101 insertions(+), 26 deletions(-) diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 8f755b0..4698cda 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -212,7 +212,7 @@ def _get_single_annotation( last_update_date=str(query_result.last_update_date), digest=query_result.digest, pep_schema=( - f"{query_result.schema_mapping.namespace}/{query_result.schema_mapping.name}" + f"{query_result.schema_mapping.schema_mapping.namespace}/{query_result.schema_mapping.schema_mapping.name}" if query_result.schema_mapping else None ), @@ -326,6 +326,7 @@ def _get_projects( statement = self._add_date_filter_if_provided( statement, filter_by, filter_start_date, filter_end_date ) + statement = statement statement = self._add_order_by_keyword(statement, by=order_by, desc=order_desc) statement = statement.limit(limit).offset(offset) if pep_type: @@ -333,7 +334,8 @@ def _get_projects( results_list = [] with Session(self._sa_engine) as session: - results = session.scalars(statement) + # Unique should be called because of the join with schema_mapping + results = session.scalars(statement).unique() for result in results: results_list.append( AnnotationModel( @@ -347,7 +349,7 @@ def _get_projects( last_update_date=str(result.last_update_date), digest=result.digest, pep_schema=( - f"{result.schema_mapping.namespace}/{result.schema_mapping.name}" + f"{result.schema_mapping.schema_mapping.namespace}/{result.schema_mapping.schema_mapping.name}" if result.schema_mapping else None ), @@ -548,7 +550,7 @@ def get_by_rp_list( statement = select(Projects).where(or_(*or_statement_list)) anno_results = [] with Session(self._sa_engine) as session: - query_result = session.scalars(statement) + query_result = session.scalars(statement).unique() for result in query_result: project_obj = result annot = AnnotationModel( @@ -562,7 +564,7 @@ def get_by_rp_list( last_update_date=str(project_obj.last_update_date), digest=project_obj.digest, pep_schema=( - f"{project_obj.schema_mapping.namespace}/{project_obj.schema_mapping.name}" + f"{project_obj.schema_mapping.schema_mapping.namespace}/{project_obj.schema_mapping.schema_mapping.name}:{project_obj.schema_mapping.version}" if project_obj.schema_mapping else None ), diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 83a045d..386a064 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -369,10 +369,13 @@ def create( number_of_samples = len(proj_dict[SAMPLE_RAW_DICT_KEY]) if pep_schema: - schema_namespace, schema_name = schema_path_converter(pep_schema) + schema_namespace, schema_name, _ = schema_path_converter(pep_schema) with Session(self._sa_engine) as session: + schema_mapping = session.scalar( - select(SchemaRecords).where( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( and_( SchemaRecords.namespace == schema_namespace, SchemaRecords.name == schema_name, @@ -694,18 +697,23 @@ def _convert_update_schema_id(session: Session, update_values: dict): return None """ if "pep_schema" in update_values: - schema_namespace, schema_name = schema_path_converter( + schema_namespace, schema_name, schema_version = schema_path_converter( update_values["pep_schema"] ) # TODO: fix it. - schema_mapping = session.scalar( - select(SchemaVersions) - .join(SchemaRecords) - .where( - and_( - SchemaRecords.namespace == schema_namespace, - SchemaRecords.name == schema_name, - ) + + if schema_version: + where_clause = and_( + SchemaRecords.namespace == schema_namespace, + SchemaRecords.name == schema_name, + SchemaVersions.version == schema_version, ) + else: + where_clause = and_( + SchemaRecords.namespace == schema_namespace, + SchemaRecords.name == schema_name, + ) + schema_mapping = session.scalar( + select(SchemaVersions).join(SchemaRecords).where(where_clause) ) if not schema_mapping: raise SchemaDoesNotExistError( @@ -713,6 +721,7 @@ def _convert_update_schema_id(session: Session, update_values: dict): f"Project won't be updated." ) update_values["schema_id"] = schema_mapping.id + del update_values["pep_schema"] def _update_samples( self, @@ -1070,7 +1079,7 @@ def fork( fork_prj.forked_from_id = original_prj.id fork_prj.pop = original_prj.pop fork_prj.submission_date = original_prj.submission_date - fork_prj.pep_schema = original_prj.pep_schema + fork_prj.schema_id = original_prj.schema_id fork_prj.description = description or original_prj.description session.commit() diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 687112c..282b0d9 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -235,18 +235,82 @@ def update_schema_version( version: str, update_fields: Union[UpdateSchemaVersionFields, dict], ) -> None: - """ """ - # flag_modified(update_fields, "schema_value") - ... + """ + Update schema version in the database. + + :param namespace: user namespace + :param name: schema name + :param version: schema version + :param update_fields: fields to be updated. Fields are optional, and include: + - contributors: str + - schema_value: dict + - release_notes: str + """ + + update_fields = UpdateSchemaVersionFields(**update_fields) + update_fields = update_fields.model_dump(exclude_unset=True, exclude_defaults=True) + + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, + ) + ) + ) + + if not schema_obj: + raise SchemaDoesNotExistError( + f"Schema '{name}' with version '{version}' does not exist in the database. Unable to update version." + ) + + for field, value in update_fields.items(): + setattr(schema_obj, field, value) + if field == "schema_value": + flag_modified(schema_obj, field) + + session.commit() def update_schema_record( self, namespace: str, name: str, - update_fields: UpdateSchemaRecordFields, + update_fields: Union[UpdateSchemaRecordFields, dict], ) -> None: - """ """ - ... + """ + Update schema record in the database. + + :param namespace: user namespace + :param name: schema name + :param update_fields: fields to be updated. Fields are optional, and include: + - maintainers: str + - lifecycle_stage: str + - private: bool + - name: str + """ + + update_fields = UpdateSchemaRecordFields(**update_fields) + + update_fields = update_fields.model_dump(exclude_unset=True, exclude_defaults=True) + + with Session(self._sa_engine) as session: + schema_obj = session.scalar( + select(SchemaRecords).where( + and_(SchemaRecords.namespace == namespace, SchemaRecords.name == name) + ) + ) + + if not schema_obj: + raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + + for field, value in update_fields.items(): + setattr(schema_obj, field, value) + + session.commit() def schema_exist(self, namespace: str, name: str) -> bool: """ diff --git a/pepdbagent/modules/user.py b/pepdbagent/modules/user.py index 125c7c1..e6e1ce0 100644 --- a/pepdbagent/modules/user.py +++ b/pepdbagent/modules/user.py @@ -177,7 +177,7 @@ def get_favorites(self, namespace: str) -> AnnotationList: last_update_date=str(prj.last_update_date), submission_date=str(prj.submission_date), digest=prj.digest, - pep_schema=prj.pep_schema, + pep_schema=f"{prj.schema_mapping.schema_mapping.namespace}/{prj.schema_mapping.schema_mapping.name}:{prj.schema_mapping.version}", pop=prj.pop, stars_number=prj.number_of_stars, forked_from=( diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index 2c4f990..34eecb0 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -80,7 +80,7 @@ def registry_path_converter(registry_path: str) -> Tuple[str, str, str]: raise RegistryPathError(f"Error in: '{registry_path}'") -def schema_path_converter(schema_path: str) -> Tuple[str, str]: +def schema_path_converter(schema_path: str) -> Tuple[str, str, str]: """ Convert schema path to namespace, name @@ -89,7 +89,7 @@ def schema_path_converter(schema_path: str) -> Tuple[str, str]: """ if "/" in schema_path: namespace, name = schema_path.split("/") - return namespace, name + return namespace, name, None raise RegistryPathError(f"Error in: '{schema_path}'") From f9f57043278da19851ed1346ad4725b3869c6415 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 13 Mar 2025 23:12:40 -0400 Subject: [PATCH 03/22] Added tests and fixed code --- pepdbagent/const.py | 2 + pepdbagent/models.py | 2 + pepdbagent/modules/project.py | 21 ++-- pepdbagent/modules/schema.py | 140 ++++++++++++++---------- pepdbagent/utils.py | 10 +- tests/test_schema.py | 194 ++++++++++++++++++++++++++++++---- tests/utils.py | 7 +- 7 files changed, 279 insertions(+), 97 deletions(-) diff --git a/pepdbagent/const.py b/pepdbagent/const.py index aae67f6..235afaf 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -22,3 +22,5 @@ PEPHUB_SAMPLE_ID_KEY = "ph_id" MAX_HISTORY_SAMPLES_NUMBER = 2000 + +DEFAULT_TAG_VERSION = "default" diff --git a/pepdbagent/models.py b/pepdbagent/models.py index b82509f..05fd1c5 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -260,6 +260,8 @@ class SchemaVersionAnnotation(BaseModel): Schema version annotation model """ + namespace: str + name: str version: str contributors: Optional[str] = "" release_notes: Optional[str] = "" diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 386a064..9d1b1cf 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -369,7 +369,7 @@ def create( number_of_samples = len(proj_dict[SAMPLE_RAW_DICT_KEY]) if pep_schema: - schema_namespace, schema_name, _ = schema_path_converter(pep_schema) + schema_namespace, schema_name, schema_version = schema_path_converter(pep_schema) with Session(self._sa_engine) as session: schema_mapping = session.scalar( @@ -379,6 +379,7 @@ def create( and_( SchemaRecords.namespace == schema_namespace, SchemaRecords.name == schema_name, + SchemaVersions.version == schema_version, ) ) ) @@ -699,19 +700,13 @@ def _convert_update_schema_id(session: Session, update_values: dict): if "pep_schema" in update_values: schema_namespace, schema_name, schema_version = schema_path_converter( update_values["pep_schema"] - ) # TODO: fix it. + ) + where_clause = and_( + SchemaRecords.namespace == schema_namespace, + SchemaRecords.name == schema_name, + SchemaVersions.version == schema_version, + ) - if schema_version: - where_clause = and_( - SchemaRecords.namespace == schema_namespace, - SchemaRecords.name == schema_name, - SchemaVersions.version == schema_version, - ) - else: - where_clause = and_( - SchemaRecords.namespace == schema_namespace, - SchemaRecords.name == schema_name, - ) schema_mapping = session.scalar( select(SchemaVersions).join(SchemaRecords).where(where_clause) ) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 282b0d9..3f732de 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -7,7 +7,7 @@ from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified -from pepdbagent.const import PKG_NAME +from pepdbagent.const import PKG_NAME, DEFAULT_TAG_VERSION from pepdbagent.db_utils import BaseEngine, SchemaRecords, SchemaTags, SchemaVersions, User from pepdbagent.exceptions import ( SchemaAlreadyExistsError, @@ -75,7 +75,7 @@ def create( namespace: str, name: str, schema_value: dict, - version: str = "default", + version: str = DEFAULT_TAG_VERSION, description: str = "", lifecycle_stage: str = "", maintainers: str = "", @@ -215,13 +215,11 @@ def add_version( contributors=contributors, ) - for tag_name, tag_value in tags: - tag_obj = session.scalar(select(SchemaTags).where(SchemaTags.tag_name == tag_name)) - if not tag_obj: - tag_obj = SchemaTags( - tag_name=tag_name, tag_value=tag_value, schema_mapping=schema_version_obj - ) - session.add(tag_obj) + for tag_name, tag_value in tags.items(): + tag_obj = SchemaTags( + tag_name=tag_name, tag_value=tag_value, schema_mapping=schema_version_obj + ) + session.add(tag_obj) session.add(schema_version_obj) session.commit() @@ -246,8 +244,8 @@ def update_schema_version( - schema_value: dict - release_notes: str """ - - update_fields = UpdateSchemaVersionFields(**update_fields) + if isinstance(update_fields, dict): + update_fields = UpdateSchemaVersionFields(**update_fields) update_fields = update_fields.model_dump(exclude_unset=True, exclude_defaults=True) with Session(self._sa_engine) as session: @@ -293,7 +291,8 @@ def update_schema_record( - name: str """ - update_fields = UpdateSchemaRecordFields(**update_fields) + if isinstance(update_fields, dict): + update_fields = UpdateSchemaRecordFields(**update_fields) update_fields = update_fields.model_dump(exclude_unset=True, exclude_defaults=True) @@ -382,6 +381,7 @@ def get_schema_info(self, namespace: str, name: str) -> SchemaRecordAnnotation: maintainers=schema_obj.maintainers, private=schema_obj.private, last_update_date=schema_obj.last_update_date, + lifecycle_stage=schema_obj.lifecycle_stage, ) def get_version_info(self, namespace: str, name: str, version: str) -> SchemaVersionAnnotation: @@ -414,6 +414,8 @@ def get_version_info(self, namespace: str, name: str, version: str) -> SchemaVer ) return SchemaVersionAnnotation( + namespace=version_obj.schema_mapping.namespace, + name=version_obj.schema_mapping.name, version=version_obj.version, contributors=version_obj.contributors, release_notes=version_obj.release_notes, @@ -471,24 +473,24 @@ def query_schemas( statement = self._add_order_by_schemas_keyword(statement, by=order_by, desc=order_desc) results_objects = session.scalars(statement) - return SchemaSearchResult( - pagination=PaginationResult( - page=page, - page_size=page_size, - total=total, - ), - results=[ - SchemaRecordAnnotation( - namespace=result.namespace, - name=result.name, - description=result.description, - maintainers=result.maintainers, - private=result.private, - last_update_date=result.last_update_date, - ) - for result in results_objects - ], - ) + return SchemaSearchResult( + pagination=PaginationResult( + page=page, + page_size=page_size, + total=total, + ), + results=[ + SchemaRecordAnnotation( + namespace=result.namespace, + name=result.name, + description=result.description, + maintainers=result.maintainers, + private=result.private, + last_update_date=result.last_update_date, + ) + for result in results_objects + ], + ) def query_schema_version( self, @@ -528,41 +530,65 @@ def query_schema_version( if not schema_obj: raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") - where_statement = or_( - SchemaVersions.version.ilike(f"%{search_str}%"), - SchemaVersions.release_notes.ilike(f"%{search_str}%"), + where_statement = and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + or_( + SchemaVersions.version.ilike(f"%{search_str}%"), + SchemaVersions.release_notes.ilike(f"%{search_str}%"), + ), ) + if tag: where_statement = and_(where_statement, SchemaTags.tag_name == tag) + total_statement = ( + select(func.count(SchemaVersions.id)) + .join(SchemaRecords) + .join(SchemaTags) + .where(where_statement) + ) + find_statement = ( + select(SchemaVersions) + .join(SchemaRecords) + .join(SchemaTags) + .where(where_statement) + ) - total = session.scalar(select(func.count(SchemaVersions.id)).where(where_statement)) + else: + total_statement = ( + select(func.count(SchemaVersions.id)) + .join(SchemaRecords) + .where(where_statement) + ) + find_statement = select(SchemaVersions).join(SchemaRecords).where(where_statement) + + total = session.scalar(total_statement) results_objects = session.scalars( - select(SchemaVersions) - .where(where_statement) - .limit(page_size) - .offset(page * page_size) + find_statement.limit(page_size).offset(page * page_size) + ).unique() + + return SchemaVersionSearchResult( + pagination=PaginationResult( + page=page, + page_size=page_size, + total=total, + ), + results=[ + SchemaVersionAnnotation( + namespace=result.schema_mapping.namespace, + name=result.schema_mapping.name, + version=result.version, + contributors=result.contributors, + release_notes=result.release_notes, + tags=[tag.tag_name for tag in result.tags_mapping], + release_date=result.release_date, + last_update_date=result.last_update_date, + ) + for result in results_objects + ], ) - return SchemaVersionSearchResult( - pagination=PaginationResult( - page=page, - page_size=page_size, - total=total, - ), - results=[ - SchemaVersionAnnotation( - version=result.version, - contributors=result.contributors, - release_notes=result.release_notes, - tags=[tag.tag_name for tag in result.tags_mapping], - release_date=result.release_date, - last_update_date=result.last_update_date, - ) - for result in results_objects - ], - ) - def delete_schema(self, namespace: str, name: str) -> None: """ Delete schema from the database. diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index 34eecb0..b13073f 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -85,11 +85,15 @@ def schema_path_converter(schema_path: str) -> Tuple[str, str, str]: Convert schema path to namespace, name :param schema_path: schema path that has structure: "namespace/name.yaml" - :return: tuple(namespace, name) + :return: tuple(namespace, name, version) """ if "/" in schema_path: - namespace, name = schema_path.split("/") - return namespace, name, None + namespace, name_tag = schema_path.split("/") + if ":" in name_tag: + name, version = name_tag.split(":") + return namespace, name, version + + return namespace, name_tag, "default" raise RegistryPathError(f"Error in: '{schema_path}'") diff --git a/tests/test_schema.py b/tests/test_schema.py index 29819bb..34892cc 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,7 +1,13 @@ +from distutils.ccompiler import new_compiler + import pytest from .utils import PEPDBAgentContextManager +from pepdbagent.models import UpdateSchemaVersionFields, UpdateSchemaRecordFields + +DEFAULT_SCHEMA_VERSION = "default" + @pytest.mark.skipif( not PEPDBAgentContextManager().db_setup(), @@ -18,25 +24,157 @@ class TestSamples: def test_get(self, namespace, name): with PEPDBAgentContextManager(add_schemas=True) as agent: assert agent.schema.schema_exist(namespace=namespace, name=name) - assert agent.schema.version_exist(namespace=namespace, name=name, version="1.0.0") - schema = agent.schema.get(namespace=namespace, name=name, version="1.0.0") + assert agent.schema.version_exist( + namespace=namespace, name=name, version=DEFAULT_SCHEMA_VERSION + ) + schema = agent.schema.get( + namespace=namespace, name=name, version=DEFAULT_SCHEMA_VERSION + ) assert schema - def test_search_schema(self): ... + def test_update_schema(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + new_maintainers = "New Maintainer" + new_lifecycle_stage = "New Stage" + new_private = True + new_name = "new_schema_name" + + agent.schema.update_schema_record( + "namespace1", + "2.0.0", + UpdateSchemaRecordFields( + maintainers=new_maintainers, + lifecycle_stage=new_lifecycle_stage, + private=new_private, + name=new_name, + ), + ) + result = agent.schema.get_schema_info("namespace1", new_name) + assert result.maintainers == new_maintainers + assert result.lifecycle_stage == new_lifecycle_stage + assert result.private == new_private + assert result.name == new_name - def test_search_schema_namespace(self): ... + def test_update_schema_update_date(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + version_name = "2.0.0" + version_schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + + first_time = agent.schema.get_schema_info("namespace1", "2.0.0").last_update_date + + agent.schema.add_version( + "namespace1", + version_name, + "pablo1", + schema_value=version_schema, + contributors="Teddy", + release_notes="Initial release", + ) + result = agent.schema.get_schema_info("namespace1", "2.0.0") + assert result.last_update_date != first_time - def test_update_schema(self): ... + def test_add_schema_version(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + new_schema = { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + } + new_contributors = "New Maintainer" + new_release_notes = "New release" + agent.schema.update_schema_version( + "namespace1", + "2.0.0", + DEFAULT_SCHEMA_VERSION, + UpdateSchemaVersionFields( + schema_value=new_schema, + contributors=new_contributors, + release_notes=new_release_notes, + ), + ) + result = agent.schema.get_version_info("namespace1", "2.0.0", DEFAULT_SCHEMA_VERSION) + assert result.contributors == new_contributors + assert result.release_notes == new_release_notes + assert agent.schema.get("namespace1", "2.0.0", DEFAULT_SCHEMA_VERSION) == new_schema - def test_update_schema_update_date(self): ... + def test_search_schema_version(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + result = agent.schema.query_schema_version("namespace1", "2.0.0") + assert result.pagination.total == 1 + assert len(result.results) == 1 - def test_add_schema_version(self): ... + def test_search_schema_version_with_tags(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + schema1 = { + "type": "object", + "properties": { + "name1": {"type": "string"}, + "age1": {"type": "integer"}, + }, + } + schema2 = { + "type": "object", + "properties": { + "name2": {"type": "string"}, + "age2": {"type": "integer"}, + }, + } + + agent.schema.add_version( + "namespace1", + "2.0.0", + "bino1", + schema_value=schema1, + tags=["tag1", "bioinfo"], + release_notes="computer change", + ) + agent.schema.add_version( + "namespace1", + "2.0.0", + "bino2", + schema_value=schema2, + tags=["bioinfo"], + release_notes="language", + ) - def test_update_schema_version(self): ... + result = agent.schema.query_schema_version("namespace1", "2.0.0", tag="tag1") - def test_search_schema_version(self): ... + assert result.pagination.total == 1 + assert len(result.results) == 1 - def test_search_schema_version_with_tags(self): ... + result = agent.schema.query_schema_version("namespace1", "2.0.0", tag="bioinfo") + + assert result.pagination.total == 2 + assert len(result.results) == 2 + + def test_search_schema(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + result = agent.schema.query_schemas(search_str="bed") + assert result.pagination.total == 3 + assert len(result.results) == 3 + + def test_search_schema_namespace(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + result = agent.schema.query_schemas("namespace1") + assert result.pagination.total == 2 + assert result.results[0].namespace == "namespace1" + assert result.results[0].name == "2.0.0" + + def test_search_schema_page_number(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + result = agent.schema.query_schemas("namespace2", page_size=2, page=1) + assert result.pagination.total == 3 + assert result.pagination.page == 1 + assert result.pagination.page_size == 2 + assert len(result.results) == 1 @pytest.mark.parametrize( "namespace, name", @@ -48,7 +186,9 @@ def test_schema_delete(self, namespace, name): with PEPDBAgentContextManager(add_schemas=True) as agent: assert agent.schema.schema_exist(namespace=namespace, name=name) agent.schema.delete_schema(namespace=namespace, name=name) - assert not agent.schema.version_exist(namespace=namespace, name=name, version="1.0.0") + assert not agent.schema.version_exist( + namespace=namespace, name=name, version=DEFAULT_SCHEMA_VERSION + ) assert not agent.schema.schema_exist(namespace=namespace, name=name) @pytest.mark.parametrize( @@ -59,9 +199,15 @@ def test_schema_delete(self, namespace, name): ) def test_schema_version_delete(self, namespace, name): with PEPDBAgentContextManager(add_schemas=True) as agent: - assert agent.schema.version_exist(namespace=namespace, name=name, version="1.0.0") - agent.schema.delete_version(namespace=namespace, name=name, version="1.0.0") - assert not agent.schema.version_exist(namespace=namespace, name=name, version="1.0.0") + assert agent.schema.version_exist( + namespace=namespace, name=name, version=DEFAULT_SCHEMA_VERSION + ) + agent.schema.delete_version( + namespace=namespace, name=name, version=DEFAULT_SCHEMA_VERSION + ) + assert not agent.schema.version_exist( + namespace=namespace, name=name, version=DEFAULT_SCHEMA_VERSION + ) assert agent.schema.schema_exist(namespace=namespace, name=name) @@ -71,10 +217,10 @@ def test_insert_tags(self): new_tag1 = "new_tag" new_tag2 = "tag2" agent.schema.add_tag_to_schema( - "namespace1", "2.0.0", "1.0.0", tag=[new_tag1, new_tag2] + "namespace1", "2.0.0", DEFAULT_SCHEMA_VERSION, tag=[new_tag1, new_tag2] ) - result = agent.schema.get_version_info("namespace1", "2.0.0", "1.0.0") + result = agent.schema.get_version_info("namespace1", "2.0.0", DEFAULT_SCHEMA_VERSION) assert new_tag1 in result.tags assert new_tag2 in result.tags @@ -82,8 +228,10 @@ def test_insert_tags(self): def test_insert_one_tag(self): with PEPDBAgentContextManager(add_schemas=True) as agent: new_tag1 = "new_tag" - agent.schema.add_tag_to_schema("namespace1", "2.0.0", "1.0.0", tag=new_tag1) - result = agent.schema.get_version_info("namespace1", "2.0.0", "1.0.0") + agent.schema.add_tag_to_schema( + "namespace1", "2.0.0", DEFAULT_SCHEMA_VERSION, tag=new_tag1 + ) + result = agent.schema.get_version_info("namespace1", "2.0.0", DEFAULT_SCHEMA_VERSION) assert new_tag1 in result.tags @pytest.mark.parametrize( @@ -95,9 +243,11 @@ def test_insert_one_tag(self): def test_delete_tag(self, namespace, name): with PEPDBAgentContextManager(add_schemas=True) as agent: new_tag1 = "new_tag" - agent.schema.add_tag_to_schema(namespace, name, "1.0.0", tag=new_tag1) - result = agent.schema.get_version_info(namespace, name, "1.0.0") + agent.schema.add_tag_to_schema(namespace, name, DEFAULT_SCHEMA_VERSION, tag=new_tag1) + result = agent.schema.get_version_info(namespace, name, DEFAULT_SCHEMA_VERSION) assert new_tag1 in result.tags - agent.schema.remove_tag_from_schema(namespace, name, "1.0.0", tag=new_tag1) - result = agent.schema.get_version_info(namespace, name, "1.0.0") + agent.schema.remove_tag_from_schema( + namespace, name, DEFAULT_SCHEMA_VERSION, tag=new_tag1 + ) + result = agent.schema.get_version_info(namespace, name, DEFAULT_SCHEMA_VERSION) assert not new_tag1 in result.tags diff --git a/tests/utils.py b/tests/utils.py index 7505708..93f4934 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -95,6 +95,9 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, exc_traceback): self.db_engine.delete_schema() + def __del__(self): + self.db_engine.delete_schema() + def _insert_data(self): pepdb_con = PEPDatabaseAgent(dsn=self.url, echo=self._echo) for namespace, item in list_of_available_peps().items(): @@ -111,7 +114,7 @@ def _insert_data(self): is_private=private, project=prj, overwrite=True, - pep_schema="namespace1/2.0.0", + pep_schema="namespace1/2.0.0:default", ) def _add_schemas(self): @@ -123,7 +126,7 @@ def _add_schemas(self): pepdb_con.schema.create( namespace=namespace, name=name[0:-5], - version="1.0.0", + version="default", schema_value=file_dict, maintainers="Teddy", contributors="Teddy, John", From 842d03c05187110b38faf00ac812d1fcdb1fdf6b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 14 Mar 2025 11:26:47 -0400 Subject: [PATCH 04/22] fixed last updated date --- pepdbagent/db_utils.py | 93 ------------------------------------ pepdbagent/modules/schema.py | 3 ++ 2 files changed, 3 insertions(+), 93 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 7d7a297..e844133 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -362,99 +362,6 @@ class SchemaTags(Base): ) -# Update the `last_update_date` when versions_mapping is modified -@event.listens_for(SchemaRecords.versions_mapping, "append") -def update_last_update_date(target, value, initiator): - target.last_update_date = datetime.datetime.now() - - -@event.listens_for(Session, "before_flush") -def before_flush(session, flush_context, instances): - for instance in session.dirty: - if isinstance(instance, SchemaRecords): - instance.last_update_date = datetime.datetime.now() - - -# class SchemaTagRelations(Base): -# __tablename__ = "schema_tag_relations" -# -# version_id: Mapped[int] = mapped_column( -# ForeignKey("schema_versions.id", ondelete="CASCADE"), index=True, primary_key=True -# ) -# tag_id: Mapped[int] = mapped_column( -# ForeignKey("schema_tags.id", ondelete="CASCADE"), index=True, primary_key=True -# ) -# -# tag_mapping: Mapped["SchemaRecords"] = relationship( -# "SchemaVersions", back_populates="tag_mapping" -# ) -# version_mapping: Mapped["SchemaTags"] = relationship( -# "SchemaTags", back_populates="version_mapping" -# ) - - -# class SchemaRecords(Base): -# -# __tablename__ = "schemas" -# -# id: Mapped[int] = mapped_column(primary_key=True, index=True) -# namespace: Mapped[str] = mapped_column(ForeignKey("users.namespace", ondelete="CASCADE")) -# name: Mapped[str] = mapped_column(nullable=False, index=True) -# description: Mapped[Optional[str]] = mapped_column(nullable=True, index=True) -# schema_json: Mapped[dict] = mapped_column(JSON, server_default=FetchedValue()) -# private: Mapped[bool] = mapped_column(default=False) -# submission_date: Mapped[datetime.datetime] = mapped_column(default=deliver_update_date) -# last_update_date: Mapped[Optional[datetime.datetime]] = mapped_column( -# default=deliver_update_date, onupdate=deliver_update_date -# ) -# -# projects_mappings: Mapped[List["Projects"]] = relationship( -# "Projects", back_populates="schema_mapping" -# ) -# group_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( -# "SchemaGroupRelations", back_populates="schema_mapping" -# ) -# -# __table_args__ = (UniqueConstraint("namespace", "name"),) -# -# -# class SchemaGroups(Base): -# -# __tablename__ = "schema_groups" -# -# id: Mapped[int] = mapped_column(primary_key=True, index=True) -# namespace: Mapped[str] = mapped_column( -# ForeignKey("users.namespace", ondelete="CASCADE"), index=True -# ) -# name: Mapped[str] = mapped_column(nullable=False, index=True) -# description: Mapped[Optional[str]] = mapped_column(nullable=True) -# -# schema_relation_mapping: Mapped[List["SchemaGroupRelations"]] = relationship( -# "SchemaGroupRelations", back_populates="group_mapping" -# ) -# -# __table_args__ = (UniqueConstraint("namespace", "name"),) -# -# -# class SchemaGroupRelations(Base): -# -# __tablename__ = "schema_group_relations" -# -# schema_id: Mapped[int] = mapped_column( -# ForeignKey("schemas.id", ondelete="CASCADE"), index=True, primary_key=True -# ) -# group_id: Mapped[int] = mapped_column( -# ForeignKey("schema_groups.id", ondelete="CASCADE"), index=True, primary_key=True -# ) -# -# schema_mapping: Mapped["SchemaRecords"] = relationship( -# "SchemaRecords", back_populates="group_relation_mapping" -# ) -# group_mapping: Mapped["SchemaGroups"] = relationship( -# "SchemaGroups", back_populates="schema_relation_mapping" -# ) - - class TarNamespace(Base): __tablename__ = "namespace_archives" diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 3f732de..4f3aee1 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -207,6 +207,8 @@ def add_version( ), ) + schema_obj.last_update_date = func.now() + schema_version_obj = SchemaVersions( schema_id=schema_obj.id, version=version, @@ -265,6 +267,7 @@ def update_schema_version( raise SchemaDoesNotExistError( f"Schema '{name}' with version '{version}' does not exist in the database. Unable to update version." ) + schema_obj.last_update_date = func.now() for field, value in update_fields.items(): setattr(schema_obj, field, value) From ec98ab947b30a670688a6af59851d68668c4320e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 14 Mar 2025 16:10:40 -0400 Subject: [PATCH 05/22] More fixes --- pepdbagent/exceptions.py | 16 +++++++-------- pepdbagent/models.py | 2 +- pepdbagent/modules/project.py | 3 --- pepdbagent/modules/schema.py | 38 +++++++++++++++++++---------------- tests/test_schema.py | 4 +--- 5 files changed, 31 insertions(+), 32 deletions(-) diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index caf152b..ef27a4d 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -129,21 +129,21 @@ def __init__(self, msg=""): super().__init__(f"""Schema already exists. {msg}""") -class SchemaGroupDoesNotExistError(PEPDatabaseAgentError): +class SchemaVersionDoesNotExistError(PEPDatabaseAgentError): def __init__(self, msg=""): - super().__init__(f"""Schema group does not exist. {msg}""") + super().__init__(f"""Schema version does not exist. {msg}""") -class SchemaGroupAlreadyExistsError(PEPDatabaseAgentError): +class SchemaVersionAlreadyExistsError(PEPDatabaseAgentError): def __init__(self, msg=""): - super().__init__(f"""Schema group already exists. {msg}""") + super().__init__(f"""Schema version already exists. {msg}""") -class SchemaAlreadyInGroupError(PEPDatabaseAgentError): +class SchemaTagDoesNotExistError(PEPDatabaseAgentError): def __init__(self, msg=""): - super().__init__(f"""Schema already in the group. {msg}""") + super().__init__(f"""Schema tag does not exist. {msg}""") -class SchemaIsNotInGroupError(PEPDatabaseAgentError): +class SchemaTagAlreadyExistsError(PEPDatabaseAgentError): def __init__(self, msg=""): - super().__init__(f"""Schema not found in group. {msg}""") + super().__init__(f"""Schema tag already exists. {msg}""") diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 05fd1c5..59e6cf0 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -265,7 +265,7 @@ class SchemaVersionAnnotation(BaseModel): version: str contributors: Optional[str] = "" release_notes: Optional[str] = "" - tags: List[str] + tags: Dict[str, Union[str, None]] = {} release_date: datetime.datetime last_update_date: datetime.datetime diff --git a/pepdbagent/modules/project.py b/pepdbagent/modules/project.py index 9d1b1cf..d5ee5a7 100644 --- a/pepdbagent/modules/project.py +++ b/pepdbagent/modules/project.py @@ -34,7 +34,6 @@ SchemaRecords, SchemaVersions, Subsamples, - TarNamespace, UpdateTypes, User, ) @@ -48,8 +47,6 @@ SchemaDoesNotExistError, ) from pepdbagent.models import ( - TarNamespaceModel, - TarNamespaceModelReturn, HistoryAnnotationModel, HistoryChangeModel, ProjectDict, diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 4f3aee1..1f0d9d6 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -2,8 +2,7 @@ from typing import List, Optional, Union, Dict -from sqlalchemy import Select, and_, delete, func, or_, select -from sqlalchemy.exc import IntegrityError +from sqlalchemy import Select, and_, func, or_, select from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified @@ -11,8 +10,11 @@ from pepdbagent.db_utils import BaseEngine, SchemaRecords, SchemaTags, SchemaVersions, User from pepdbagent.exceptions import ( SchemaAlreadyExistsError, - SchemaAlreadyInGroupError, + SchemaVersionDoesNotExistError, SchemaDoesNotExistError, + SchemaTagAlreadyExistsError, + SchemaTagDoesNotExistError, + SchemaVersionAlreadyExistsError, ) from pepdbagent.models import ( SchemaRecordAnnotation, @@ -66,7 +68,9 @@ def get(self, namespace: str, name: str, version: str) -> dict: ) if not schema_obj: - raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + raise SchemaVersionDoesNotExistError( + f"Schema '{name}' does not exist in the database" + ) return schema_obj.schema_value @@ -192,7 +196,7 @@ def add_version( if version_obj: if not overwrite: - raise SchemaAlreadyExistsError( + raise SchemaVersionAlreadyExistsError( f"Schema '{name}' with version '{version}' already exists in the database" ) @@ -264,7 +268,7 @@ def update_schema_version( ) if not schema_obj: - raise SchemaDoesNotExistError( + raise SchemaVersionDoesNotExistError( f"Schema '{name}' with version '{version}' does not exist in the database. Unable to update version." ) schema_obj.last_update_date = func.now() @@ -412,7 +416,7 @@ def get_version_info(self, namespace: str, name: str, version: str) -> SchemaVer ) if not version_obj: - raise SchemaDoesNotExistError( + raise SchemaVersionDoesNotExistError( f"Schema '{name}' with version '{version}' does not exist in the database" ) @@ -422,7 +426,7 @@ def get_version_info(self, namespace: str, name: str, version: str) -> SchemaVer version=version_obj.version, contributors=version_obj.contributors, release_notes=version_obj.release_notes, - tags=[tag.tag_name for tag in version_obj.tags_mapping], + tags={tag.tag_name: tag.tag_value for tag in version_obj.tags_mapping}, release_date=version_obj.release_date, last_update_date=version_obj.last_update_date, ) @@ -584,7 +588,7 @@ def query_schema_version( version=result.version, contributors=result.contributors, release_notes=result.release_notes, - tags=[tag.tag_name for tag in result.tags_mapping], + tags={tag.tag_name: tag.tag_value for tag in result.tags_mapping}, release_date=result.release_date, last_update_date=result.last_update_date, ) @@ -622,7 +626,7 @@ def delete_version(self, namespace: str, name: str, version: str) -> None: :param name: Name of the schema :param version: Version of the Schema - :raise: SchemaDoesNotExistError if version doesn't exist + :raise: SchemaVersionDoesNotExistError if version doesn't exist :return: None """ with Session(self._sa_engine) as session: @@ -638,7 +642,7 @@ def delete_version(self, namespace: str, name: str, version: str) -> None: ) ) if not schema_obj: - raise SchemaDoesNotExistError( + raise SchemaVersionDoesNotExistError( f"Schema '{name}' with version '{version}' does not exist in the database. Unable to update version." ) @@ -660,7 +664,7 @@ def add_tag_to_schema( :param version: Version of the Schema :param tag: Tag to be added. Can be a string, list of strings or dictionaries - :raise: SchemaDoesNotExistError if version doesn't exist + :raise: SchemaVersionDoesNotExistError if version doesn't exist :return: None """ @@ -679,7 +683,7 @@ def add_tag_to_schema( ) ) if not schema_obj: - raise SchemaDoesNotExistError( + raise SchemaVersionDoesNotExistError( f"Schema '{name}' with version '{version}' does not exist in the database. Unable to add tag." ) if isinstance(tag, str): @@ -693,7 +697,7 @@ def add_tag_to_schema( ) session.add(tag_obj) else: - raise SchemaAlreadyInGroupError( + raise SchemaTagAlreadyExistsError( f"Tag '{tag_name}' already exists in the schema" ) @@ -708,7 +712,7 @@ def remove_tag_from_schema(self, namespace: str, name: str, version: str, tag: s :param version: Version of the Schema :param tag: Tag to be removed - :raise: SchemaDoesNotExistError if version doesn't exist + :raise: SchemaVersionDoesNotExistError if version doesn't exist :return: None """ with Session(self._sa_engine) as session: @@ -724,7 +728,7 @@ def remove_tag_from_schema(self, namespace: str, name: str, version: str, tag: s ) ) if not schema_obj: - raise SchemaDoesNotExistError( + raise SchemaVersionDoesNotExistError( f"Schema '{name}' with version '{version}' does not exist in the database. Unable to remove tag." ) @@ -734,7 +738,7 @@ def remove_tag_from_schema(self, namespace: str, name: str, version: str, tag: s ) ) if not tag_obj: - raise SchemaDoesNotExistError(f"Tag '{tag}' does not exist in the schema") + raise SchemaTagDoesNotExistError(f"Tag '{tag}' does not exist in the schema") session.delete(tag_obj) session.commit() diff --git a/tests/test_schema.py b/tests/test_schema.py index 34892cc..1b8f12f 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,5 +1,3 @@ -from distutils.ccompiler import new_compiler - import pytest from .utils import PEPDBAgentContextManager @@ -250,4 +248,4 @@ def test_delete_tag(self, namespace, name): namespace, name, DEFAULT_SCHEMA_VERSION, tag=new_tag1 ) result = agent.schema.get_version_info(namespace, name, DEFAULT_SCHEMA_VERSION) - assert not new_tag1 in result.tags + assert new_tag1 not in result.tags From 299b53cbcae5d4dc8cb64686c1d9b2df05d4bb8f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Sat, 15 Mar 2025 14:16:27 -0400 Subject: [PATCH 06/22] Updated version and models --- pepdbagent/_version.py | 2 +- pepdbagent/models.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index ea370a8..5e036fa 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.12.0" +__version__ = "0.12.0_dev0" diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 59e6cf0..58b3873 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -307,6 +307,7 @@ class UpdateSchemaRecordFields(BaseModel): lifecycle_stage: Optional[str] = None private: Optional[bool] = False name: Optional[str] = None + description: Optional[str] = None class UpdateSchemaVersionFields(BaseModel): From 16652bfd345b2955319aaf36c6b930a09e8246a4 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 17 Mar 2025 13:06:49 -0400 Subject: [PATCH 07/22] Fixed tags --- pepdbagent/modules/schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 1f0d9d6..00eb3a3 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -786,7 +786,9 @@ def _unify_tags( :raise: ValueError if tags are not in the correct format :return: dictionary of tags """ - if tags is None: + if not tags: + tags = {} + if tags == (None,): tags = {} elif isinstance(tags, str): tags = {tags: None} From 1f94ebef9a9ddd4d5e74e25ed8382c324c9bb834 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 18 Mar 2025 17:29:29 -0400 Subject: [PATCH 08/22] 1. Added number of schemas for users. 2. Added order to versions for schema record --- pepdbagent/db_utils.py | 10 ++++++- pepdbagent/models.py | 1 + pepdbagent/modules/annotation.py | 4 +-- pepdbagent/modules/namespace.py | 45 ++------------------------------ pepdbagent/modules/schema.py | 12 ++++++++- tests/test_schema.py | 10 ++++++- tests/test_updates.py | 10 +++---- tests/utils.py | 6 ++--- 8 files changed, 42 insertions(+), 56 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index e844133..5c66fa3 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -199,10 +199,14 @@ class User(Base): order_by="Stars.star_date.desc()", ) number_of_projects: Mapped[int] = mapped_column(default=0) + number_of_schemas: Mapped[int] = mapped_column(default=0) projects_mapping: Mapped[List["Projects"]] = relationship( "Projects", back_populates="namespace_mapping" ) + schemas_mapping: Mapped[List["SchemaRecords"]] = relationship( + "SchemaRecords", back_populates="user_mapping" + ) class Stars(Base): @@ -318,8 +322,12 @@ class SchemaRecords(Base): __table_args__ = (UniqueConstraint("namespace", "name"),) versions_mapping: Mapped[List["SchemaVersions"]] = relationship( - "SchemaVersions", back_populates="schema_mapping", cascade="all, delete-orphan" + "SchemaVersions", + back_populates="schema_mapping", + cascade="all, delete-orphan", + order_by="SchemaVersions.version", ) + user_mapping: Mapped["User"] = relationship("User", back_populates="schemas_mapping") class SchemaVersions(Base): diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 58b3873..fa9bda5 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -158,6 +158,7 @@ class NamespaceInfo(BaseModel): namespace: str number_of_projects: int + number_of_schemas: int class ListOfNamespaceInfo(BaseModel): diff --git a/pepdbagent/modules/annotation.py b/pepdbagent/modules/annotation.py index 4698cda..6481796 100644 --- a/pepdbagent/modules/annotation.py +++ b/pepdbagent/modules/annotation.py @@ -212,7 +212,7 @@ def _get_single_annotation( last_update_date=str(query_result.last_update_date), digest=query_result.digest, pep_schema=( - f"{query_result.schema_mapping.schema_mapping.namespace}/{query_result.schema_mapping.schema_mapping.name}" + f"{query_result.schema_mapping.schema_mapping.namespace}/{query_result.schema_mapping.schema_mapping.name}:{query_result.schema_mapping.version}" if query_result.schema_mapping else None ), @@ -349,7 +349,7 @@ def _get_projects( last_update_date=str(result.last_update_date), digest=result.digest, pep_schema=( - f"{result.schema_mapping.schema_mapping.namespace}/{result.schema_mapping.schema_mapping.name}" + f"{result.schema_mapping.schema_mapping.namespace}/{result.schema_mapping.schema_mapping.name}:{result.schema_mapping.version}" if result.schema_mapping else None ), diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 332c1cb..61559dc 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -174,49 +174,6 @@ def _add_condition( ) return statement - # old function, that counts namespace info based on Projects table - # def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo: - # """ - # Get list of top n namespaces in the database - # - # :param limit: limit of results (top namespace ) - # :return: number_of_namespaces: int - # limit: int - # results: { namespace: str - # number_of_projects: int - # } - # """ - # total_number_of_namespaces = self._count_namespace() - # - # statement = ( - # select( - # func.count(Projects.namespace).label("number_of_projects"), - # Projects.namespace, - # ) - # .select_from(Projects) - # .where(Projects.private.is_(False)) - # .limit(limit) - # .order_by(text("number_of_projects desc")) - # .group_by(Projects.namespace) - # ) - # - # with Session(self._sa_engine) as session: - # query_results = session.execute(statement).all() - # - # list_of_results = [] - # for result in query_results: - # list_of_results.append( - # NamespaceInfo( - # namespace=result.namespace, - # number_of_projects=result.number_of_projects, - # ) - # ) - # return ListOfNamespaceInfo( - # number_of_namespaces=total_number_of_namespaces, - # limit=limit, - # results=list_of_results, - # ) - def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo: """ Get list of top n namespaces in the database @@ -228,6 +185,7 @@ def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo: limit: int results: { namespace: str number_of_projects: int + number_of_schemas: int } """ with Session(self._sa_engine) as session: @@ -241,6 +199,7 @@ def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo: NamespaceInfo( namespace=result.namespace, number_of_projects=result.number_of_projects, + number_of_schemas=result.number_of_schemas, ) ) return ListOfNamespaceInfo( diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 00eb3a3..8714ee6 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -125,6 +125,8 @@ def create( session.add(user) session.commit() + user.number_of_schemas += 1 + schema_obj = SchemaRecords( namespace=namespace, name=name, @@ -572,7 +574,9 @@ def query_schema_version( total = session.scalar(total_statement) results_objects = session.scalars( - find_statement.limit(page_size).offset(page * page_size) + find_statement.order_by(SchemaVersions.version.desc()) + .limit(page_size) + .offset(page * page_size) ).unique() return SchemaVersionSearchResult( @@ -615,6 +619,12 @@ def delete_schema(self, namespace: str, name: str) -> None: if not schema_obj: raise SchemaDoesNotExistError(f"Schema '{name}' does not exist in the database") + statement = select(User).where(User.namespace == namespace) + user = session.scalar(statement) + if user: + user.number_of_schemas -= 1 + session.commit() + session.delete(schema_obj) session.commit() diff --git a/tests/test_schema.py b/tests/test_schema.py index 1b8f12f..7b0c26f 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -4,7 +4,7 @@ from pepdbagent.models import UpdateSchemaVersionFields, UpdateSchemaRecordFields -DEFAULT_SCHEMA_VERSION = "default" +DEFAULT_SCHEMA_VERSION = "1.0.0" @pytest.mark.skipif( @@ -208,6 +208,14 @@ def test_schema_version_delete(self, namespace, name): ) assert agent.schema.schema_exist(namespace=namespace, name=name) + def test_number_of_schemas_in_namespace(self): + with PEPDBAgentContextManager(add_schemas=True) as agent: + for k in agent.namespace.info().results: + if k.namespace == "namespace1": + assert k.number_of_schemas == 2 + if k.namespace == "namespace2": + assert k.number_of_schemas == 3 + class TestSchemaTags: def test_insert_tags(self): diff --git a/tests/test_updates.py b/tests/test_updates.py index e057024..628772e 100644 --- a/tests/test_updates.py +++ b/tests/test_updates.py @@ -99,16 +99,16 @@ def test_update_project_description(self, namespace, name, new_description): def test_update_project_schema(self, namespace, name, new_schema): with PEPDBAgentContextManager(add_data=True) as agent: prj_annot = agent.annotation.get(namespace=namespace, name=name) - assert prj_annot.results[0].pep_schema == "namespace1/2.0.0" + assert prj_annot.results[0].pep_schema == "namespace1/2.0.0:1.0.0" agent.project.update( namespace=namespace, name=name, tag="default", - update_dict={"pep_schema": "namespace2/bedboss"}, + update_dict={"pep_schema": "namespace2/bedboss:1.0.0"}, ) prj_annot = agent.annotation.get(namespace=namespace, name=name) - assert prj_annot.results[0].pep_schema == "namespace2/bedboss" + assert prj_annot.results[0].pep_schema == "namespace2/bedboss:1.0.0" @pytest.mark.parametrize( "namespace, name, new_description", @@ -155,8 +155,8 @@ def test_update_whole_project(self, namespace, name): @pytest.mark.parametrize( "namespace, name, pep_schema", [ - ["namespace1", "amendments1", "namespace2/bedmaker"], - ["namespace2", "derive", "namespace2/bedbuncher"], + ["namespace1", "amendments1", "namespace2/bedmaker:1.0.0"], + ["namespace2", "derive", "namespace2/bedbuncher:1.0.0"], ], ) def test_update_pep_schema(self, namespace, name, pep_schema): diff --git a/tests/utils.py b/tests/utils.py index 93f4934..fc9cc89 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -114,7 +114,7 @@ def _insert_data(self): is_private=private, project=prj, overwrite=True, - pep_schema="namespace1/2.0.0:default", + pep_schema="namespace1/2.0.0:1.0.0", # TODO: test without this line ) def _add_schemas(self): @@ -126,12 +126,12 @@ def _add_schemas(self): pepdb_con.schema.create( namespace=namespace, name=name[0:-5], - version="default", + version="1.0.0", schema_value=file_dict, maintainers="Teddy", contributors="Teddy, John", release_notes="Initial release", - tags=["latest", "gf1"], + tags={"maturity_level": "trial_use"}, ) @property From 6b2cefb64663ac62c440f6c55a40586a9ab42c43 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 18 Mar 2025 17:45:49 -0400 Subject: [PATCH 09/22] Updated namespace info return model --- pepdbagent/models.py | 2 +- pepdbagent/modules/namespace.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index fa9bda5..73305cc 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -167,7 +167,7 @@ class ListOfNamespaceInfo(BaseModel): """ number_of_namespaces: int - limit: int + page_size: int results: List[NamespaceInfo] diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 61559dc..8c52a75 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -204,7 +204,7 @@ def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo: ) return ListOfNamespaceInfo( number_of_namespaces=len(list_of_results), - limit=limit, + page_size=limit, results=list_of_results, ) From 113dbd54dcb82f793fb8e1bbd02c9a98926d791a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 19 Mar 2025 12:38:40 -0400 Subject: [PATCH 10/22] Updated namespace info return model 2 --- pepdbagent/models.py | 16 ++++++++-------- pepdbagent/modules/namespace.py | 27 +++++++++++++++++++++------ 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 73305cc..9b10d8e 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -52,6 +52,12 @@ def is_private_should_be_bool(cls, v): return v +class PaginationResult(BaseModel): + page: int = 0 + page_size: int = 10 + total: int + + class AnnotationList(BaseModel): """ Annotation return model. @@ -166,8 +172,8 @@ class ListOfNamespaceInfo(BaseModel): Namespace information response model """ - number_of_namespaces: int - page_size: int + + pagination: PaginationResult results: List[NamespaceInfo] @@ -250,12 +256,6 @@ class HistoryAnnotationModel(BaseModel): history: List[HistoryChangeModel] -class PaginationResult(BaseModel): - page: int = 0 - page_size: int = 10 - total: int - - class SchemaVersionAnnotation(BaseModel): """ Schema version annotation model diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 8c52a75..0dff6b1 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -18,6 +18,7 @@ NamespaceStats, TarNamespaceModel, TarNamespaceModelReturn, + PaginationResult, ) from pepdbagent.utils import tuple_converter @@ -174,13 +175,16 @@ def _add_condition( ) return statement - def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo: + def info(self, page: int = 0, page_size: int = DEFAULT_LIMIT_INFO, order_by: str = "number_of_projects") -> ListOfNamespaceInfo: """ Get list of top n namespaces in the database ! Warning: this function counts number of all projects in namespaces. ! it does not filter private projects (It was done for efficiency reasons) - :param limit: limit of results (top namespace ) + :param page: page number + :param page_size: number of namespaces to show + :param order_by: order by field. Options: number_of_projects, number_of_schemas [Default: number_of_projects] + :return: number_of_namespaces: int limit: int results: { namespace: str @@ -188,9 +192,17 @@ def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo: number_of_schemas: int } """ + + statement = select(User) + + if order_by == "number_of_projects": + statement = statement.order_by(User.number_of_projects.desc()) + elif order_by == "number_of_schemas": + statement = statement.order_by(User.number_of_schemas.desc()) + with Session(self._sa_engine) as session: results = session.scalars( - select(User).limit(limit).order_by(User.number_of_projects.desc()) + statement.limit(page_size).offset(page_size*page) ) list_of_results = [] @@ -203,9 +215,12 @@ def info(self, limit: int = DEFAULT_LIMIT_INFO) -> ListOfNamespaceInfo: ) ) return ListOfNamespaceInfo( - number_of_namespaces=len(list_of_results), - page_size=limit, - results=list_of_results, + pagination=PaginationResult( + page=page, + page_size=page_size, + total=len(list_of_results), + ), + results=list_of_results ) def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats: From e3dc82e84ff49e5e2cfd97029e71c19fa63bc0dd Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 19 Mar 2025 12:58:42 -0400 Subject: [PATCH 11/22] Added latest version to schema annotation --- pepdbagent/db_utils.py | 2 +- pepdbagent/models.py | 2 +- pepdbagent/modules/namespace.py | 13 ++++++++----- pepdbagent/modules/schema.py | 2 ++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pepdbagent/db_utils.py b/pepdbagent/db_utils.py index 5c66fa3..d048dca 100644 --- a/pepdbagent/db_utils.py +++ b/pepdbagent/db_utils.py @@ -325,7 +325,7 @@ class SchemaRecords(Base): "SchemaVersions", back_populates="schema_mapping", cascade="all, delete-orphan", - order_by="SchemaVersions.version", + order_by="SchemaVersions.version.desc()", ) user_mapping: Mapped["User"] = relationship("User", back_populates="schemas_mapping") diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 9b10d8e..01fb5ab 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -172,7 +172,6 @@ class ListOfNamespaceInfo(BaseModel): Namespace information response model """ - pagination: PaginationResult results: List[NamespaceInfo] @@ -281,6 +280,7 @@ class SchemaRecordAnnotation(BaseModel): description: Optional[str] = "" maintainers: str = "" lifecycle_stage: str = "" + latest_version: str private: bool = False last_update_date: datetime.datetime diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 0dff6b1..28c7acd 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -175,7 +175,12 @@ def _add_condition( ) return statement - def info(self, page: int = 0, page_size: int = DEFAULT_LIMIT_INFO, order_by: str = "number_of_projects") -> ListOfNamespaceInfo: + def info( + self, + page: int = 0, + page_size: int = DEFAULT_LIMIT_INFO, + order_by: str = "number_of_projects", + ) -> ListOfNamespaceInfo: """ Get list of top n namespaces in the database ! Warning: this function counts number of all projects in namespaces. @@ -201,9 +206,7 @@ def info(self, page: int = 0, page_size: int = DEFAULT_LIMIT_INFO, order_by: str statement = statement.order_by(User.number_of_schemas.desc()) with Session(self._sa_engine) as session: - results = session.scalars( - statement.limit(page_size).offset(page_size*page) - ) + results = session.scalars(statement.limit(page_size).offset(page_size * page)) list_of_results = [] for result in results: @@ -220,7 +223,7 @@ def info(self, page: int = 0, page_size: int = DEFAULT_LIMIT_INFO, order_by: str page_size=page_size, total=len(list_of_results), ), - results=list_of_results + results=list_of_results, ) def stats(self, namespace: str = None, monthly: bool = False) -> NamespaceStats: diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 8714ee6..caca7b1 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -387,6 +387,7 @@ def get_schema_info(self, namespace: str, name: str) -> SchemaRecordAnnotation: namespace=schema_obj.namespace, name=schema_obj.name, description=schema_obj.description, + latest_version=schema_obj.versions_mapping[0].version, maintainers=schema_obj.maintainers, private=schema_obj.private, last_update_date=schema_obj.last_update_date, @@ -492,6 +493,7 @@ def query_schemas( SchemaRecordAnnotation( namespace=result.namespace, name=result.name, + latest_version=result.versions_mapping[0].version, description=result.description, maintainers=result.maintainers, private=result.private, From cd11e2084f2b2b3454d2ad80a3d2790f1e311543 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 19 Mar 2025 13:17:56 -0400 Subject: [PATCH 12/22] Added special "latest" version to schema version function --- pepdbagent/const.py | 3 +- pepdbagent/modules/schema.py | 64 +++++++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/pepdbagent/const.py b/pepdbagent/const.py index 235afaf..cf0577b 100644 --- a/pepdbagent/const.py +++ b/pepdbagent/const.py @@ -23,4 +23,5 @@ MAX_HISTORY_SAMPLES_NUMBER = 2000 -DEFAULT_TAG_VERSION = "default" +DEFAULT_TAG_VERSION = "1.0.0" +LATEST_SCHEMA_VERSION = "latest" diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index caca7b1..1f28eaf 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -6,7 +6,7 @@ from sqlalchemy.orm import Session from sqlalchemy.orm.attributes import flag_modified -from pepdbagent.const import PKG_NAME, DEFAULT_TAG_VERSION +from pepdbagent.const import PKG_NAME, DEFAULT_TAG_VERSION, LATEST_SCHEMA_VERSION from pepdbagent.db_utils import BaseEngine, SchemaRecords, SchemaTags, SchemaVersions, User from pepdbagent.exceptions import ( SchemaAlreadyExistsError, @@ -55,17 +55,32 @@ def get(self, namespace: str, name: str, version: str) -> dict: """ with Session(self._sa_engine) as session: - schema_obj = session.scalar( - select(SchemaVersions) - .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) - .where( - and_( - SchemaRecords.namespace == namespace, - SchemaRecords.name == name, - SchemaVersions.version == version, + if version == LATEST_SCHEMA_VERSION: + schema_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + ) + ) + .order_by(SchemaVersions.version.desc()) + ) + + else: + + schema_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, + ) ) ) - ) if not schema_obj: raise SchemaVersionDoesNotExistError( @@ -406,17 +421,28 @@ def get_version_info(self, namespace: str, name: str, version: str) -> SchemaVer """ with Session(self._sa_engine) as session: - version_obj = session.scalar( - select(SchemaVersions) - .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) - .where( - and_( - SchemaRecords.namespace == namespace, - SchemaRecords.name == name, - SchemaVersions.version == version, + + # if user provided "latest" version + if version == LATEST_SCHEMA_VERSION: + version_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where(and_(SchemaRecords.namespace == namespace, SchemaRecords.name == name)) + .order_by(SchemaVersions.version.desc()) + .limit(1) + ) + else: + version_obj = session.scalar( + select(SchemaVersions) + .join(SchemaRecords, SchemaRecords.id == SchemaVersions.schema_id) + .where( + and_( + SchemaRecords.namespace == namespace, + SchemaRecords.name == name, + SchemaVersions.version == version, + ) ) ) - ) if not version_obj: raise SchemaVersionDoesNotExistError( From 53787b12009490ea8cba071acd43b2c9871d2b64 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 20 Mar 2025 11:15:50 -0400 Subject: [PATCH 13/22] added fetch schema function --- pepdbagent/modules/schema.py | 92 +++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 1f28eaf..6a21fa3 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -460,6 +460,97 @@ def get_version_info(self, namespace: str, name: str, version: str) -> SchemaVer last_update_date=version_obj.last_update_date, ) + def fetch_schemas( + self, + namespace: str = None, + name: str = None, + maintainer: str = None, + lifecycle_stage: str = None, + latest_version: str = None, + page: int = 0, + page_size: int = 10, + order_by: str = "update_date", + order_desc: bool = False, + ) -> SchemaSearchResult: + """ + Get schemas with providing filters. + If not filters provided, return all schemas. + + :param namespace: user namespace [Default: None]. If None, search in all namespaces + :param name: schema name [Default: None] + :param maintainer: schema maintainer [Default: None] + :param lifecycle_stage: schema lifecycle stage [Default: None] + :param latest_version: schema latest version [Default: None] + + :param page: page number [Default: 0] + :param page_size: number of schemas per page [Default: 0] + :param order_by: sort the result-set by the information + Options: ["name", "update_date"] + [Default: update_date] + :param order_desc: Sort the records in descending order. [Default: False] + + :return: { + pagination: {page: int, + page_size: int, + total: int}, + results: [SchemaRecordAnnotation] + """ + + # filters = [ + # SchemaRecords.namespace == namespace if namespace else None, + # SchemaRecords.name == name if name else None, + # SchemaRecords.maintainers == maintainer if maintainer else None, + # SchemaRecords.lifecycle_stage == lifecycle_stage if lifecycle_stage else None, + # ] + filters = [ + SchemaRecords.namespace.ilike(f"%{namespace}%") if namespace else None, + SchemaRecords.name.ilike(f"%{name}%") if name else None, + SchemaRecords.maintainers.ilike(f"%{maintainer}%") if maintainer else None, + ( + SchemaRecords.lifecycle_stage.ilike(f"%{lifecycle_stage}%") + if lifecycle_stage + else None + ), + ] + + # Remove None values before applying and_ + conditions = [f for f in filters if f is not None] + + statement = ( + select(SchemaRecords).where(and_(*conditions)) if conditions else select(SchemaRecords) + ) + statement_count = ( + select(func.count(SchemaRecords.id)).where(and_(*conditions)) + if conditions + else select(func.count(SchemaRecords.id)) + ) + + with Session(self._sa_engine) as session: + total = session.scalar(statement_count) + + statement = self._add_order_by_schemas_keyword(statement, by=order_by, desc=order_desc) + + results_objects = session.scalars(statement.limit(page_size).offset(page * page_size)) + return SchemaSearchResult( + pagination=PaginationResult( + page=page, + page_size=page_size, + total=total, + ), + results=[ + SchemaRecordAnnotation( + namespace=result.namespace, + name=result.name, + latest_version=result.versions_mapping[0].version, + description=result.description, + maintainers=result.maintainers, + private=result.private, + last_update_date=result.last_update_date, + ) + for result in results_objects + ], + ) + def query_schemas( self, namespace: str = None, @@ -492,7 +583,6 @@ def query_schemas( where_statement = or_( SchemaRecords.name.ilike(f"%{search_str}%"), - SchemaRecords.maintainers.ilike(f"%{search_str}%"), SchemaRecords.description.ilike(f"%{search_str}%"), ) if namespace: From e38bd26ab6799062b7615dae42a33c47a859641b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 24 Mar 2025 15:48:10 -0400 Subject: [PATCH 14/22] few endpoint specification fixes --- .github/workflows/black.yml | 4 ++-- pepdbagent/models.py | 9 +++++---- pepdbagent/modules/namespace.py | 3 ++- pepdbagent/modules/schema.py | 16 ++++++++-------- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index 8b48ddf..052e2ec 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -6,6 +6,6 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 - uses: psf/black@stable diff --git a/pepdbagent/models.py b/pepdbagent/models.py index 01fb5ab..b37b181 100644 --- a/pepdbagent/models.py +++ b/pepdbagent/models.py @@ -162,7 +162,8 @@ class NamespaceInfo(BaseModel): Model with information about namespace """ - namespace: str + namespace_name: str + contact_url: Optional[str] = None number_of_projects: int number_of_schemas: int @@ -261,7 +262,7 @@ class SchemaVersionAnnotation(BaseModel): """ namespace: str - name: str + schema_name: str version: str contributors: Optional[str] = "" release_notes: Optional[str] = "" @@ -276,11 +277,11 @@ class SchemaRecordAnnotation(BaseModel): """ namespace: str - name: str + schema_name: str description: Optional[str] = "" maintainers: str = "" lifecycle_stage: str = "" - latest_version: str + latest_released_version: str private: bool = False last_update_date: datetime.datetime diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 28c7acd..5563693 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -212,7 +212,8 @@ def info( for result in results: list_of_results.append( NamespaceInfo( - namespace=result.namespace, + namespace_name=result.namespace, + contact_url=f"https://github.com/{result.namespace}", number_of_projects=result.number_of_projects, number_of_schemas=result.number_of_schemas, ) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 6a21fa3..fe216ad 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -400,9 +400,9 @@ def get_schema_info(self, namespace: str, name: str) -> SchemaRecordAnnotation: return SchemaRecordAnnotation( namespace=schema_obj.namespace, - name=schema_obj.name, + schema_name=schema_obj.name, description=schema_obj.description, - latest_version=schema_obj.versions_mapping[0].version, + latest_released_version=schema_obj.versions_mapping[0].version, maintainers=schema_obj.maintainers, private=schema_obj.private, last_update_date=schema_obj.last_update_date, @@ -451,7 +451,7 @@ def get_version_info(self, namespace: str, name: str, version: str) -> SchemaVer return SchemaVersionAnnotation( namespace=version_obj.schema_mapping.namespace, - name=version_obj.schema_mapping.name, + schema_name=version_obj.schema_mapping.name, version=version_obj.version, contributors=version_obj.contributors, release_notes=version_obj.release_notes, @@ -540,8 +540,8 @@ def fetch_schemas( results=[ SchemaRecordAnnotation( namespace=result.namespace, - name=result.name, - latest_version=result.versions_mapping[0].version, + schema_name=result.name, + latest_released_version=result.versions_mapping[0].version, description=result.description, maintainers=result.maintainers, private=result.private, @@ -608,8 +608,8 @@ def query_schemas( results=[ SchemaRecordAnnotation( namespace=result.namespace, - name=result.name, - latest_version=result.versions_mapping[0].version, + schema_name=result.name, + latest_released_version=result.versions_mapping[0].version, description=result.description, maintainers=result.maintainers, private=result.private, @@ -706,7 +706,7 @@ def query_schema_version( results=[ SchemaVersionAnnotation( namespace=result.schema_mapping.namespace, - name=result.schema_mapping.name, + schema_name=result.schema_mapping.name, version=result.version, contributors=result.contributors, release_notes=result.release_notes, From 8aa0af0f93822c5f297fa78306978c7f78b838a3 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Mar 2025 14:05:00 -0400 Subject: [PATCH 15/22] fixed schema connector for projects --- pepdbagent/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pepdbagent/utils.py b/pepdbagent/utils.py index b13073f..fc96684 100644 --- a/pepdbagent/utils.py +++ b/pepdbagent/utils.py @@ -93,7 +93,7 @@ def schema_path_converter(schema_path: str) -> Tuple[str, str, str]: name, version = name_tag.split(":") return namespace, name, version - return namespace, name_tag, "default" + return namespace, name_tag, "latest" raise RegistryPathError(f"Error in: '{schema_path}'") From cc2eccb0ce03577e441ef62797ce39c9db51e831 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Mar 2025 15:20:02 -0400 Subject: [PATCH 16/22] fixed tests --- pepdbagent/_version.py | 2 +- tests/test_schema.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index 5e036fa..b3f45a8 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.12.0_dev0" +__version__ = "0.12.0_dev1" diff --git a/tests/test_schema.py b/tests/test_schema.py index 7b0c26f..90a05d9 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -51,7 +51,7 @@ def test_update_schema(self): assert result.maintainers == new_maintainers assert result.lifecycle_stage == new_lifecycle_stage assert result.private == new_private - assert result.name == new_name + assert result.schema_name == new_name def test_update_schema_update_date(self): with PEPDBAgentContextManager(add_schemas=True) as agent: @@ -164,7 +164,7 @@ def test_search_schema_namespace(self): result = agent.schema.query_schemas("namespace1") assert result.pagination.total == 2 assert result.results[0].namespace == "namespace1" - assert result.results[0].name == "2.0.0" + assert result.results[0].schema_name == "2.0.0" def test_search_schema_page_number(self): with PEPDBAgentContextManager(add_schemas=True) as agent: @@ -211,9 +211,9 @@ def test_schema_version_delete(self, namespace, name): def test_number_of_schemas_in_namespace(self): with PEPDBAgentContextManager(add_schemas=True) as agent: for k in agent.namespace.info().results: - if k.namespace == "namespace1": + if k.namespace_name == "namespace1": assert k.number_of_schemas == 2 - if k.namespace == "namespace2": + if k.namespace_name == "namespace2": assert k.number_of_schemas == 3 From b2e1b9a058dceb2a85a21472b162e93c2300735f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Mar 2025 16:00:12 -0400 Subject: [PATCH 17/22] lint --- pepdbagent/__init__.py | 2 +- pepdbagent/exceptions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pepdbagent/__init__.py b/pepdbagent/__init__.py index cced953..a8b67f2 100644 --- a/pepdbagent/__init__.py +++ b/pepdbagent/__init__.py @@ -1,4 +1,4 @@ -""" Package-level data """ +"""Package-level data""" import coloredlogs import logmuse diff --git a/pepdbagent/exceptions.py b/pepdbagent/exceptions.py index ef27a4d..17be697 100644 --- a/pepdbagent/exceptions.py +++ b/pepdbagent/exceptions.py @@ -1,4 +1,4 @@ -""" Custom error types """ +"""Custom error types""" class PEPDatabaseAgentError(Exception): From 96a1612696e5798f595f6a7c96642e7e2f12a2b2 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Mar 2025 16:04:01 -0400 Subject: [PATCH 18/22] fixed schema test --- tests/test_schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_schema.py b/tests/test_schema.py index 90a05d9..6d6b15d 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -163,8 +163,8 @@ def test_search_schema_namespace(self): with PEPDBAgentContextManager(add_schemas=True) as agent: result = agent.schema.query_schemas("namespace1") assert result.pagination.total == 2 - assert result.results[0].namespace == "namespace1" - assert result.results[0].schema_name == "2.0.0" + assert "namespace1" in [f.namepsace for f in result.results] + assert "2.0.0" in [f.schema_name for f in result.results] def test_search_schema_page_number(self): with PEPDBAgentContextManager(add_schemas=True) as agent: From 0a989feaa7354a23f9312cdd986ca00fb953ef22 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Mar 2025 16:05:45 -0400 Subject: [PATCH 19/22] lint --- tests/test_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_schema.py b/tests/test_schema.py index 6d6b15d..d1890fc 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -164,7 +164,7 @@ def test_search_schema_namespace(self): result = agent.schema.query_schemas("namespace1") assert result.pagination.total == 2 assert "namespace1" in [f.namepsace for f in result.results] - assert "2.0.0" in [f.schema_name for f in result.results] + assert "2.0.0" in [f.schema_name for f in result.results] def test_search_schema_page_number(self): with PEPDBAgentContextManager(add_schemas=True) as agent: From 7c374796077e005b545e9fd03bf63dc3ecfd146a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Mar 2025 16:08:53 -0400 Subject: [PATCH 20/22] typo --- tests/test_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_schema.py b/tests/test_schema.py index d1890fc..e677c97 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -163,7 +163,7 @@ def test_search_schema_namespace(self): with PEPDBAgentContextManager(add_schemas=True) as agent: result = agent.schema.query_schemas("namespace1") assert result.pagination.total == 2 - assert "namespace1" in [f.namepsace for f in result.results] + assert "namespace1" in [f.namespace for f in result.results] assert "2.0.0" in [f.schema_name for f in result.results] def test_search_schema_page_number(self): From 89f29e61ef295afad82fd0314e6dedbea635ea20 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Mar 2025 16:25:03 -0400 Subject: [PATCH 21/22] fixed incorrect total size for namespaces info --- pepdbagent/modules/namespace.py | 3 ++- pepdbagent/modules/schema.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pepdbagent/modules/namespace.py b/pepdbagent/modules/namespace.py index 5563693..7766fb6 100644 --- a/pepdbagent/modules/namespace.py +++ b/pepdbagent/modules/namespace.py @@ -207,6 +207,7 @@ def info( with Session(self._sa_engine) as session: results = session.scalars(statement.limit(page_size).offset(page_size * page)) + total_number_of_namespaces = session.execute(select(func.count(User.id))).one()[0] list_of_results = [] for result in results: @@ -222,7 +223,7 @@ def info( pagination=PaginationResult( page=page, page_size=page_size, - total=len(list_of_results), + total=total_number_of_namespaces, ), results=list_of_results, ) diff --git a/pepdbagent/modules/schema.py b/pepdbagent/modules/schema.py index 6a22387..fe216ad 100644 --- a/pepdbagent/modules/schema.py +++ b/pepdbagent/modules/schema.py @@ -230,7 +230,6 @@ def add_version( schema_obj.last_update_date = func.now() - schema_version_obj = SchemaVersions( schema_id=schema_obj.id, version=version, From 460d6a280688509f885fb7965f8d9cc537cca114 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 27 Mar 2025 11:53:20 -0400 Subject: [PATCH 22/22] updated version and changelog --- docs/changelog.md | 8 ++++++++ pepdbagent/_version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 76ac388..9fee62b 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,10 +2,18 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. + +## [0.12.0] -- 2025-03-27 +- New database model for schemas +- Added schema version and schema record +- Added tags to schemas + + ## [0.11.1] -- 2024-09-04 - Added archive table of namespaces - Added sort by stars + ## [0.11.0] -- 2024-07-24 - Added validation schemas diff --git a/pepdbagent/_version.py b/pepdbagent/_version.py index b3f45a8..ea370a8 100644 --- a/pepdbagent/_version.py +++ b/pepdbagent/_version.py @@ -1 +1 @@ -__version__ = "0.12.0_dev1" +__version__ = "0.12.0"