From 83c7063b804d80d188dad0c4bf95580f42f0ac84 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 11 Dec 2025 19:32:38 -0500 Subject: [PATCH 1/2] fixed typos --- pephub/dependencies.py | 6 +++--- pephub/routers/api/v1/search.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pephub/dependencies.py b/pephub/dependencies.py index 7b2644ba..d231892d 100644 --- a/pephub/dependencies.py +++ b/pephub/dependencies.py @@ -111,10 +111,10 @@ def jwt_encode_user_data(user_data: dict, exp: datetime = None) -> str: hf_model_sparse = os.environ.get("HF_MODEL_SPARSE", SPARSE_ENCODER_MODEL) if token is None: sparse_model = None - _LOGGER_PEPHUB.warning("No HF_TOKEN provided, sparce model disabled.") + _LOGGER_PEPHUB.warning("No HF_TOKEN provided, sparse model disabled.") else: sparse_model = SparseEncoder(hf_model_sparse, token=token) - _LOGGER_PEPHUB.info(f"Sparce model in use: {hf_model_sparse}") + _LOGGER_PEPHUB.info(f"Sparse model in use: {hf_model_sparse}") ## Qdrant connection @@ -403,7 +403,7 @@ def get_sentence_transformer() -> Embedding: def get_sparse_model() -> Union[SparseEncoder, None]: """ - Return sparce encoder model + Return sparse encoder model """ return sparse_model diff --git a/pephub/routers/api/v1/search.py b/pephub/routers/api/v1/search.py index f1e672d9..d5c3618d 100644 --- a/pephub/routers/api/v1/search.py +++ b/pephub/routers/api/v1/search.py @@ -54,7 +54,7 @@ async def search_for_pep( query: SearchQuery, qdrant: QdrantClient = Depends(get_qdrant), model: Embedding = Depends(get_sentence_transformer), - model_sparce: SparseEncoder = Depends(get_sparse_model), + model_sparse: SparseEncoder = Depends(get_sparse_model), agent: PEPDatabaseAgent = Depends(get_db), namespace_access: List[str] = Depends(get_namespace_access_list), ) -> SearchReturnModel: @@ -74,8 +74,8 @@ async def search_for_pep( if qdrant is not None: dense_query = list(list(model.embed(query.query))[0]) - if model_sparce: - sparse_result = model_sparce.encode(query.query).coalesce() + if model_sparse: + sparse_result = model_sparse.encode(query.query).coalesce() sparse_embeddings = SparseVector( indices=sparse_result.indices().tolist()[0], values=sparse_result.values().tolist(), From 5d19987c9b74aa434136735770b23acfedad9c57 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Dec 2025 11:35:47 -0500 Subject: [PATCH 2/2] updated sparse encoder default model --- pephub/_version.py | 2 +- pephub/const.py | 2 +- pephub/dependencies.py | 16 +++++++++------- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pephub/_version.py b/pephub/_version.py index 07c3e7b8..66a62a27 100644 --- a/pephub/_version.py +++ b/pephub/_version.py @@ -1 +1 @@ -__version__ = "0.15.6" +__version__ = "0.15.7" diff --git a/pephub/const.py b/pephub/const.py index 53fd674f..739fdbba 100644 --- a/pephub/const.py +++ b/pephub/const.py @@ -65,7 +65,7 @@ # figure 4 # great speed to accuracy tradeoff DENSE_ENCODER_MODEL = "sentence-transformers/all-MiniLM-L6-v2" -SPARSE_ENCODER_MODEL = "naver/splade-v3" +SPARSE_ENCODER_MODEL = "prithivida/Splade_PP_en_v2" EIDO_TEMPLATES_DIRNAME = "templates/eido" EIDO_TEMPLATES_PATH = os.path.join( diff --git a/pephub/dependencies.py b/pephub/dependencies.py index d231892d..31f104b4 100644 --- a/pephub/dependencies.py +++ b/pephub/dependencies.py @@ -107,14 +107,16 @@ def jwt_encode_user_data(user_data: dict, exp: datetime = None) -> str: ) # embedding_model = None -token = os.environ.get("HF_TOKEN", None) +# token = os.environ.get("HF_TOKEN", None) +# hf_model_sparse = os.environ.get("HF_MODEL_SPARSE", SPARSE_ENCODER_MODEL) +# if token is None: +# sparse_model = None +# _LOGGER_PEPHUB.warning("No HF_TOKEN provided, sparse model disabled.") +# else: + hf_model_sparse = os.environ.get("HF_MODEL_SPARSE", SPARSE_ENCODER_MODEL) -if token is None: - sparse_model = None - _LOGGER_PEPHUB.warning("No HF_TOKEN provided, sparse model disabled.") -else: - sparse_model = SparseEncoder(hf_model_sparse, token=token) - _LOGGER_PEPHUB.info(f"Sparse model in use: {hf_model_sparse}") +sparse_model = SparseEncoder(hf_model_sparse) +_LOGGER_PEPHUB.info(f"Sparse model in use: {hf_model_sparse}") ## Qdrant connection