Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/deploy-api-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
image: ${{ steps.build-image.outputs.image }}

- name: Deploy Amazon ECS task definition
uses: aws-actions/amazon-ecs-deploy-task-definition@v1
uses: aws-actions/amazon-ecs-deploy-task-definition@v2
with:
task-definition: ${{ steps.task-def.outputs.task-definition }}
service: pephub-service-dev
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
image: ${{ steps.build-image.outputs.image }}

- name: Deploy Amazon ECS task definition
uses: aws-actions/amazon-ecs-deploy-task-definition@v1
uses: aws-actions/amazon-ecs-deploy-task-definition@v2
with:
task-definition: ${{ steps.task-def.outputs.task-definition }}
service: pephub-service-primary
Expand Down
5 changes: 3 additions & 2 deletions deployment/dockerhub/dev.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -------------
# BUILD BACKEND
# -------------
FROM python:3.10-slim
FROM python:3.13-slim
LABEL authors="Nathan LeRoy, Nathan Sheffield, Oleksandr Khoroshevskyi"

RUN apt-get update
Expand All @@ -17,6 +17,7 @@ WORKDIR /app
COPY . /app

RUN python -m pip install --upgrade pip
RUN pip install -r requirements/requirements-all.txt --no-cache-dir
RUN python -m pip install uv
RUN uv pip install -r requirements/requirements-all.txt --no-cache-dir --system

CMD ["uvicorn", "pephub.main:app", "--host", "0.0.0.0", "--port", "80"]
7 changes: 4 additions & 3 deletions deployment/dockerhub/primary.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# -------------
# BUILD BACKEND
# -------------
FROM python:3.10-slim
LABEL authors="Nathan LeRoy, Nathan Sheffield"
FROM python:3.13-slim
LABEL authors="Nathan LeRoy, Nathan Sheffield, Oleksandr Khoroshevskyi"

RUN apt-get update
RUN apt-get install -y gcc
Expand All @@ -17,6 +17,7 @@ WORKDIR /app
COPY . /app

RUN python -m pip install --upgrade pip
RUN pip install -r requirements/requirements-all.txt --no-cache-dir
RUN python -m pip install uv
RUN uv pip install -r requirements/requirements-all.txt --no-cache-dir --system

CMD ["uvicorn", "pephub.main:app", "--host", "0.0.0.0", "--port", "80"]
2 changes: 1 addition & 1 deletion pephub/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.15.3"
__version__ = "0.15.4"
8 changes: 5 additions & 3 deletions pephub/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,11 @@
DEFAULT_PEP_SCHEMA = "databio/pep:2.1.0"
DEFAULT_TAG = "default"

DEFAULT_QDRANT_SCORE_THRESHOLD = (
0.72 # empirical value, highly dependent on the model used
)
# DEFAULT_QDRANT_SCORE_THRESHOLD = (
# 0.72 # empirical value, highly dependent on the model used
# )

DEFAULT_QDRANT_SCORE_THRESHOLD = 0.15

ARCHIVE_URL_PATH = "https://cloud2.databio.org/pephub/"

Expand Down
89 changes: 45 additions & 44 deletions pephub/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,57 @@ def jwt_encode_user_data(user_data: dict, exp: datetime = None) -> str:
)

# sentence_transformer model
_LOGGER_PEPHUB.info(f"HF MODEL IN USE: {os.getenv('HF_MODEL', DEFAULT_HF_MODEL)}")
embedding_model = Embedding(
model_name=os.getenv("HF_MODEL", DEFAULT_HF_MODEL), max_length=512
)
# embedding_model = None


## Qdrant connection
def parse_boolean_env_var(env_var: str) -> bool:
"""
Helper function to parse a boolean environment variable
"""
return env_var.lower() in ["true", "1", "t", "y", "yes"]


def initialize_qdrant_client() -> Union[QdrantClient, None]:
"""
Initialize Qdrant client if enabled
"""

if parse_boolean_env_var(os.environ.get("QDRANT_ENABLED", "false")):
try:
qdrant = QdrantClient(
url=os.environ.get("QDRANT_HOST", DEFAULT_QDRANT_HOST),
port=os.environ.get("QDRANT_PORT", DEFAULT_QDRANT_PORT),
api_key=os.environ.get("QDRANT_API_KEY", None),
)
qdrant.list_full_snapshots()
return qdrant
except Exception as e:
_LOGGER_PEPHUB.error(f"Error connecting to Qdrant: {e}")

else:
_LOGGER_PEPHUB.warning(
"QDRANT_ENABLED is not set to true. Qdrant features will be disabled.\
To enable Qdrant, set the environment variable QDRANT_ENABLED to 'true'."
)
return None


qdrant = initialize_qdrant_client()


def get_qdrant() -> Union[QdrantClient, None]:
"""
Return connection to qdrant client
"""

return qdrant


def generate_random_auth_code() -> str:
"""
Generate a random 32-digit code.
Expand Down Expand Up @@ -337,50 +382,6 @@ def verify_user_can_fork(
raise HTTPException(401, "Unauthorized to fork this repo")


def parse_boolean_env_var(env_var: str) -> bool:
"""
Helper function to parse a boolean environment variable
"""
return env_var.lower() in ["true", "1", "t", "y", "yes"]


def get_qdrant_enabled() -> bool:
"""
Check if qdrant is enabled
"""
return parse_boolean_env_var(os.environ.get("QDRANT_ENABLED", "false"))


def get_qdrant(
qdrant_enabled: bool = Depends(get_qdrant_enabled),
) -> Union[QdrantClient, None]: # type: ignore
"""
Return connection to qdrant client
"""
# return None if qdrant is not enabled
if not qdrant_enabled:
try:
yield None
finally:
pass
# else try to connect, test connectiona and return client if connection is successful.
qdrant = QdrantClient(
url=os.environ.get("QDRANT_HOST", DEFAULT_QDRANT_HOST),
port=os.environ.get("QDRANT_PORT", DEFAULT_QDRANT_PORT),
api_key=os.environ.get("QDRANT_API_KEY", None),
)
try:
# test the connection first
qdrant.list_full_snapshots()
yield qdrant
except ResponseHandlingException as e:
print(f"Error getting qdrant client: {e}")
yield None
finally:
# no need to close the connection
pass


def get_sentence_transformer() -> Embedding:
"""
Return sentence transformer encoder
Expand Down
152 changes: 42 additions & 110 deletions pephub/routers/api/v1/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
get_qdrant,
get_sentence_transformer,
)
from ...models import SearchQuery
from ...models import SearchQuery, SearchReturnModel
from qdrant_client.models import ScoredPoint
from pepdbagent.models import Namespace

load_dotenv()

Expand All @@ -35,142 +37,72 @@ async def search_for_namespaces(


# perform a search
@search.post("/", summary="Search for a PEP")
@search.post("/", summary="Search for a PEP", response_model=SearchReturnModel)
async def search_for_pep(
query: SearchQuery,
qdrant: QdrantClient = Depends(get_qdrant),
model: Embedding = Depends(get_sentence_transformer),
agent: PEPDatabaseAgent = Depends(get_db),
namespace_access: List[str] = Depends(get_namespace_access_list),
):
) -> SearchReturnModel:
"""
Perform a search for PEPs. This can be done using qdrant (semantic search),
or with basic SQL string matches.
"""
limit = query.limit
offset = query.offset
score_threshold = query.score_threshold
if qdrant is not None:
try:
# get the embeding for the query
query_vec = list(model.embed(query.query))[0]

# get actual results using the limit and offset
vector_results = qdrant.search(
collection_name=(
query.collection_name or DEFAULT_QDRANT_COLLECTION_NAME
),
query_vector=query_vec,
limit=limit,
offset=offset,
score_threshold=score_threshold,
)
# get namespaces:
namespaces: list[Namespace] = agent.namespace.get(
query=query.query, admin=namespace_access, limit=limit, offset=offset
).results

# get sql results using the limit and offset
sql_results = agent.annotation.get(
query=query.query,
limit=limit,
offset=offset,
namespace=None,
admin=namespace_access,
)
if qdrant is not None:
query_vec = list(model.embed(query.query))[0]

# map the results to the format we want
vector_results_mapped = [r.model_dump() for r in vector_results]
sql_results_mapped = [
{
"id": r.digest,
"version": 0,
"score": 1.0, # Its a SQL search, so we just set the score to 1.0
"payload": {
"description": r.description,
"registry": f"{r.namespace}/{r.name}:{r.tag}",
},
"vector": None,
}
for r in sql_results.results
]
results = vector_results_mapped + sql_results_mapped
namespaces = agent.namespace.get(admin=namespace_access)
namespace_hits = [
n.namespace
for n in namespaces.results
if query.query.lower() in n.namespace.lower()
]
namespace_hits.extend(
[
n
for n in list(
set(
[
r.model_dump()["payload"]["registry"].split("/")[0]
for r in vector_results
]
)
)
if n not in namespace_hits
]
)
vector_results = qdrant.query_points(
collection_name=(query.collection_name or DEFAULT_QDRANT_COLLECTION_NAME),
query=query_vec,
limit=limit,
offset=offset,
score_threshold=score_threshold,
).points

# finally, sort the results by score
results = sorted(results, key=lambda x: x["score"], reverse=True)
return SearchReturnModel(
query=query.query,
results=vector_results,
namespace_hits=namespaces,
limit=limit,
offset=offset,
total=len(vector_results),
Copy link

Copilot AI Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The total field is set to len(vector_results), which only reflects the number of results returned in this page (limited by limit parameter), not the total number of matching results. This could mislead clients about the actual total count of search results. Consider using a count query or a different approach to determine the true total count of matching results.

Copilot uses AI. Check for mistakes.
)

return JSONResponse(
content={
"query": query.query,
"results": results,
"namespace_hits": namespace_hits,
"limit": limit,
"offset": offset,
"total": len(vector_results) + sql_results.count,
}
)
except Exception as e:
# TODO: this isnt proper error handling. Also we need to use a logger
print("Qdrant search failed, falling back to SQL search. Reason: ", e)
else:
# fallback to SQL search
namespaces = agent.namespace.get(admin=namespace_access).results
results = agent.annotation.get(
query=query.query, limit=limit, offset=offset
).results
results = agent.annotation.get(query=query.query, limit=limit, offset=offset)

# emulate qdrant response from the SQL search
# for frontend compatibility
parsed_results = [
{
"id": None,
"version": 0,
"score": None,
"payload": {
ScoredPoint(
id=f"{r.namespace}/{r.name}:{r.tag}",
version=0,
score=1.0, # SQL search, so we just set the score to 1.0
payload={
"description": r.description,
"registry": f"{r.namespace}/{r.name}:{r.tag}",
},
"vector": None,
}
for r in results
vector=None,
)
for r in results.results
]

namespace_hits = [
n.namespace
for n in namespaces
if query.query.lower() in n.namespace.lower()
]
namespace_hits.extend(
[
n
for n in list(
set(
[r["payload"]["registry"].split("/")[0] for r in parsed_results]
)
)
if n not in namespace_hits
]
)
return JSONResponse(
content={
"query": query.query,
"results": parsed_results,
"namespace_hits": namespace_hits,
}
return SearchReturnModel(
query=query.query,
results=parsed_results,
namespace_hits=namespaces,
limit=limit,
offset=offset,
total=results.count,
)
Loading
Loading