Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/branching-database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,17 @@ jobs:
uses: neondatabase/create-branch-action@v6
with:
project_id: ${{ vars.NEON_PROJECT_ID }}
branch_name: pr/${{ needs.setup.outputs.current_branch }}
parent_branch: ${{ needs.setup.outputs.base_ref_branch != '' && format('pr/{0}', needs.setup.outputs.base_ref_branch) || 'production' }}
branch_name: ${{ needs.setup.outputs.current_branch }}
branch_type: "schema-only"
parent_branch: ${{ needs.setup.outputs.base_ref_branch || 'develop' }}
api_key: ${{ secrets.NEON_API_KEY }}
expires_at: ${{ env.EXPIRES_AT }}
- name: Post Schema Diff Comment to PR
uses: neondatabase/schema-diff-action@v1
with:
project_id: ${{ vars.NEON_PROJECT_ID }}
compare_branch: pr/${{ needs.setup.outputs.current_branch }}
compare_branch: ${{ needs.setup.outputs.current_branch }}
base_branch: ${{ needs.setup.outputs.base_ref_branch || 'develop' }}
api_key: ${{ secrets.NEON_API_KEY }}

delete_db_branch:
Expand Down
96 changes: 48 additions & 48 deletions .github/workflows/copilot-setup-steps.yml
Original file line number Diff line number Diff line change
@@ -1,53 +1,53 @@
name: "Copilot Setup Steps"

on:
workflow_dispatch:
push:
paths:
- .github/workflows/copilot-setup-steps.yml
pull_request:
paths:
- .github/workflows/copilot-setup-steps.yml
workflow_dispatch:
push:
paths:
- .github/workflows/copilot-setup-steps.yml
pull_request:
paths:
- .github/workflows/copilot-setup-steps.yml

jobs:
copilot-setup-steps:
runs-on: ubuntu-latest
permissions:
contents: read

steps:
- name: Checkout code
uses: actions/checkout@v5

- name: Get branch name
id: branch_name
uses: tj-actions/branch-names@v8

- name: Create DB Branch
id: create_db_branch
uses: neondatabase/create-branch-action@v6
with:
project_id: ${{ vars.NEON_PROJECT_ID }}
branch_name: pr/${{ steps.branch_name.outputs.current_branch }}
parent_branch: ${{ steps.branch_name.outputs.base_ref_branch != '' && format('pr/{0}', steps.branch_name.outputs.base_ref_branch) || 'production' }}
branch_type: "schema-only"
api_key: ${{ secrets.NEON_API_KEY }}

- name: Write database URL to .env
run: |
echo "DATABASE_URL=${{ steps.create_db_branch.outputs.db_url_with_pooler }}" > .env
echo "✅ Database URL written to .env file"

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Set up PDM
uses: pdm-project/setup-pdm@v4
with:
python-version: "3.12"
cache: true

- name: Install dependencies
run: pdm install -G dev --frozen-lockfile
copilot-setup-steps:
runs-on: ubuntu-latest
permissions:
contents: read

steps:
- name: Checkout code
uses: actions/checkout@v5

- name: Get branch name
id: branch_name
uses: tj-actions/branch-names@v8

- name: Create DB Branch
id: create_db_branch
uses: neondatabase/create-branch-action@v6
with:
project_id: ${{ vars.NEON_PROJECT_ID }}
branch_name: ${{ steps.branch_name.outputs.current_branch }}
parent_branch: ${{ needs.setup.outputs.base_ref_branch || 'develop' }}
branch_type: "schema-only"
api_key: ${{ secrets.NEON_API_KEY }}

- name: Write database URL to .env
run: |
echo "DATABASE_URL=${{ steps.create_db_branch.outputs.db_url_with_pooler }}" > .env
echo "✅ Database URL written to .env file"

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Set up PDM
uses: pdm-project/setup-pdm@v4
with:
python-version: "3.12"
cache: true

- name: Install dependencies
run: pdm install -G dev --frozen-lockfile
45 changes: 29 additions & 16 deletions .github/workflows/openapi-doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: Check & deploy API documentation
permissions:
contents: write
pull-requests: write

on:
push:
branches:
Expand All @@ -13,6 +14,7 @@ on:
- main
- staging
- develop

jobs:
generate-openapi:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -41,44 +43,55 @@ jobs:
SKIP_EXTENSIONS_SYNC: "1"
OBSRV__LOGGING_BACKEND: "none"
run: pdm run python scripts/generate-openapi.py

- name: Check if schema changed
id: check_changes
run: |
if git diff --quiet docs/openapi.json; then
echo "changed=false" >> $GITHUB_OUTPUT
else
echo "changed=true" >> $GITHUB_OUTPUT
fi
- name: Upload OpenAPI artifact
uses: actions/upload-artifact@v4
with:
name: openapi-schema
path: docs/openapi.json
retention-days: 1

- name: Commit and push changes
if: steps.check_changes.outputs.changed == 'true'
run: |
git config --local user.email "action@github.com"
git config --local user.name "GitHub Action"
git add docs/openapi.json
git commit -m "chore: update OpenAPI documentation"
git push
uses: stefanzweifel/git-auto-commit-action@v5
with:
commit_message: "chore: update OpenAPI documentation"
file_pattern: docs/openapi.json

deploy-doc:
if: ${{ github.event_name == 'push' }}
name: Deploy API documentation on Bump.sh
runs-on: ubuntu-latest
needs: generate-openapi
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Download OpenAPI artifact
uses: actions/download-artifact@v4
with:
name: openapi-schema
path: docs

- name: Deploy API documentation
uses: bump-sh/github-action@v1
with:
doc: core
token: ${{secrets.BUMP_SH_TOKEN}}
file: docs/openapi.json

api-diff:
if: ${{ github.event_name == 'pull_request' }}
name: Check API diff on Bump.sh
runs-on: ubuntu-latest
needs: generate-openapi
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Download OpenAPI artifact
uses: actions/download-artifact@v4
with:
name: openapi-schema
path: docs

- name: Comment pull request with API diff
uses: bump-sh/github-action@v1
with:
Expand Down
5 changes: 1 addition & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,4 @@ scratch/
ipynb

# extensions scripts
extensions/**/scripts/*

# Database migrations
migrations/versions/
extensions/**/scripts/*
104 changes: 25 additions & 79 deletions app/business/info_base/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
one_chat,
)
from app.schemas.info_base.block import (
BlockEmbeddingModel,
BlockID,
BlockModel,
ResolverType,
Expand Down Expand Up @@ -101,59 +100,27 @@ def create(
"Block created successfully",
extra={"block_id": block.id, "resolver": block.resolver},
)

scheduler.add_job(
func=cls._upsert_embedding,
kwargs={"block_id": block.id},
misfire_grace_time=None,
logger.debug(
"Embedding will be created asynchronously by interval job",
extra={"block_id": block.id},
)

return block

@classmethod
async def refresh_embeddings(cls):
"""Rebuild all blocks' embeddings"""
with SessionLocal() as db_session:
blocks = db_session.exec(
sqlmodel.select(BlockModel).where(
BlockModel.resolver == "learn_english.lexical"
) # FIXME
).all()
tasks = tuple(cls._upsert_embedding(block, db_session) for block in blocks)
await asyncio.gather(*tasks)
db_session.commit()

@classmethod
async def _upsert_embedding(
cls, block: BlockModel, db_session: Opt[sqlmodel.Session] = None
) -> BlockEmbeddingModel:
"""Upsert a block's embedding

:param block: 块
:param db_session: 可选的数据库会话,如果提供则使用该会话;不会提交。
"""
from .resolver import ResolverManager

resolver = ResolverManager.new_resolver(block)
embedding = BlockEmbeddingModel(
id=block.id, # type: ignore[arg-type]
embedding=Embedding("", "text-embedding-v3").embed(resolver.get_str_for_embedding()),
)
if db_session:
db_session.merge(embedding)
return embedding
with SessionLocal() as db_session:
db_session.merge(embedding)
db_session.commit()
db_session.refresh(embedding)
return embedding
"""Rebuild all blocks' embeddings - delegates to sink embedding service"""
from app.business.sink.embedding import EmbeddingManager
await EmbeddingManager.refresh_all_block_embeddings()

@classmethod
async def fetchsert(cls, block: BlockModel, db_session: sqlmodel.Session) -> BlockModel:
"""Create if not exists, else return the existing one.

Will NOT commit the session.
"""
from app.business.sink.embedding import EmbeddingManager

resolver = ResolverManager.new_resolver(block)
existing = resolver.get_existing(db_session)
if existing is not None:
Expand All @@ -170,8 +137,8 @@ async def fetchsert(cls, block: BlockModel, db_session: sqlmodel.Session) -> Blo
db_session.add(block)
db_session.flush()
db_session.refresh(block)
# and embedding
await cls._upsert_embedding(block, db_session)
# and embedding - use sink service
await EmbeddingManager.upsert_block_embedding(block, db_session)

return block

Expand Down Expand Up @@ -205,39 +172,20 @@ def query_by_embedding(
num: int = 10,
max_distance: float = 0.3,
) -> tuple[BlockModel, ...]:
"""根据余弦相似度查询块
"""Query blocks by cosine similarity - delegates to sink embedding service

:param block_id: 用已有块的embedding查询
:param embedding: 用给定的embedding查询
:param resolver: 限定解析器类型, None则不限定
:param block_id: Use existing block's embedding for query
:param embedding: Use given embedding for query
:param resolver: Filter by resolver type, None means no filter
"""
with SessionLocal() as db_session:
if block_id is not None:
base_embedding = db_session.exec(
sqlmodel.select(BlockEmbeddingModel.embedding).where(
BlockEmbeddingModel.id == block_id
)
)
else:
if embedding is not None:
base_embedding = embedding
else:
raise ValueError("one of block_id or embedding must be provided")

similar_blocks = db_session.exec(
sqlmodel.select(BlockModel)
.select_from(BlockModel)
.join(BlockEmbeddingModel, BlockEmbeddingModel.id == BlockModel.id) # type: ignore
.where(BlockModel.resolver == resolver if resolver else True)
.where(BlockEmbeddingModel.embedding is not None)
.where(BlockEmbeddingModel.id != block_id)
.where(
BlockEmbeddingModel.embedding.cosine_distance(base_embedding) < max_distance # type: ignore
)
.limit(num)
).all()

return tuple(similar_blocks) # type: ignore
from app.business.sink.embedding import EmbeddingManager
return EmbeddingManager.query_blocks_by_embedding(
block_id=block_id,
embedding=embedding,
resolver=resolver,
num=num,
max_distance=max_distance,
)

@classmethod
async def iterate_from_block(
Expand Down Expand Up @@ -445,11 +393,9 @@ def edit_block(
db_session.refresh(block)

logger.info("Block edited successfully", extra={"block_id": block.id})

scheduler.add_job(
func=cls._upsert_embedding,
kwargs={"block_id": block.id},
misfire_grace_time=None,
logger.debug(
"Embedding will be updated asynchronously by interval job",
extra={"block_id": block.id},
)

return block
Loading
Loading