diff --git a/.github/workflows/extralit-server.build-docker-images.yml b/.github/workflows/extralit-server.build-docker-images.yml index c69463a69..ee16416ec 100644 --- a/.github/workflows/extralit-server.build-docker-images.yml +++ b/.github/workflows/extralit-server.build-docker-images.yml @@ -124,6 +124,6 @@ jobs: with: username: ${{ env.DOCKER_USERNAME }} password: ${{ env.DOCKER_PASSWORD }} - repository: $${{ env.SERVER_DOCKER_IMAGE }} + repository: ${{ env.SERVER_DOCKER_IMAGE }} readme-filepath: extralit-server/docker/server/README.md diff --git a/.github/workflows/extralit.yml b/.github/workflows/extralit.yml index 7ad51606a..6603a4d30 100644 --- a/.github/workflows/extralit.yml +++ b/.github/workflows/extralit.yml @@ -30,28 +30,15 @@ jobs: if: github.event.pull_request.draft == false services: extralit-container: - image: extralitdev/extralit-hf-space:develop + image: extralitdev/extralit-hf-space:latest ports: - 6900:6900 env: - EXTRALIT_ENABLE_TELEMETRY: 0 + HF_HUB_DISABLE_TELEMETRY: 1 # Set credentials USERNAME: extralit PASSWORD: 12345678 API_KEY: extralit.apikey - EXTRALIT_S3_ENDPOINT: "http://minio:9000" - EXTRALIT_S3_ACCESS_KEY: "minioadmin" - EXTRALIT_S3_SECRET_KEY: "minioadmin" - minio: - image: lazybit/minio - volumes: - - /data:/data - env: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin - options: --name=minio --health-cmd "curl http://localhost:9000/minio/health/live" --health-interval=30s --health-timeout=10s --health-retries=3 - ports: - - 9000:9000 runs-on: ubuntu-22.04 defaults: run: @@ -95,9 +82,6 @@ jobs: # Stop log streaming kill $LOGS_PID || true - # Create a directory for local storage that the container can access - mkdir -p /tmp/extralit-files - chmod -R 777 /tmp/extralit-files - name: Set huggingface hub credentials run: | echo "HF_TOKEN_EXTRALIT_INTERNAL_TESTING=${{ secrets.HF_TOKEN_EXTRALIT_INTERNAL_TESTING }}" >> "$GITHUB_ENV" diff --git a/.kiro/steering/product.md b/.kiro/steering/product.md deleted file mode 100644 index 5931efee0..000000000 --- a/.kiro/steering/product.md +++ /dev/null @@ -1,22 +0,0 @@ -# Product Overview - -Extralit (EXTRAct LITerature) is a data extraction workflow platform designed for **LLM-assisted scientific data extraction** and **unstructured document intelligence** tasks. Built on top of Argilla, it extends capabilities with enhanced data extraction, validation, and human-in-the-loop workflows. - -## Core Value Proposition -- **Precision First**: Built for high data accuracy, ensuring reliable results -- **Human-in-the-Loop**: Seamlessly integrate human annotations to refine LLM outputs -- **Flexible & Scalable**: Available as Python SDK, CLI, and Web UI with multiple deployment options - -## Key Features -- **Schema-Driven Extraction**: Define structured schemas for context-aware, high-accuracy data extraction -- **Advanced PDF Processing**: AI-powered OCR detects complex table structures in both digital and scanned PDFs -- **Built-in Validation**: Automatically verify extracted data for accuracy -- **User-Friendly Interface**: Review, edit, and validate data with team-based consensus workflows -- **Data Flywheel**: Collect human annotations to monitor performance and build fine-tuning datasets - -## Target Use Cases -- Scientific literature data extraction -- Document intelligence tasks -- PDF processing and table extraction -- Research data validation and annotation -- Academic paper analysis and bibliography management \ No newline at end of file diff --git a/.kiro/steering/structure.md b/.kiro/steering/structure.md deleted file mode 100644 index 3b1319c1a..000000000 --- a/.kiro/steering/structure.md +++ /dev/null @@ -1,225 +0,0 @@ -# Project Structure - -## Repository Organization -This is a monorepo containing multiple related packages: - -``` -extralit/ -├── extralit-server/ # FastAPI backend server -├── extralit-frontend/ # Nuxt.js web UI -├── extralit/ # Python SDK and CLI -├── examples/ # Usage examples and deployments -└── .kiro/ # Kiro AI assistant configuration -``` - -## Backend Structure (extralit-server/) -``` -extralit-server/ -├── src/extralit_server/ -│ ├── api/ # FastAPI routes and handlers -│ │ ├── handlers/ # Request handlers by version -│ │ └── schemas/ # Pydantic models for API -│ ├── contexts/ # Business logic contexts -│ ├── models/ # SQLAlchemy database models -│ ├── jobs/ # Background job definitions -│ ├── cli/ # CLI commands -│ └── alembic/ # Database migrations -├── tests/ # Test suite -├── docker/ # Docker configurations -└── pyproject.toml # PDM configuration -``` - -### Key Backend Patterns -- **API Handlers**: Located in `api/handlers/v1/` - one file per resource -- **Database Models**: In `models/database.py` - SQLAlchemy models -- **Business Logic**: In `contexts/` - domain-specific logic -- **Background Jobs**: In `jobs/` - RQ job definitions -- **Migrations**: Use Alembic in `alembic/versions/` - -## Frontend Structure (extralit-frontend/) -``` -extralit-frontend/ -├── components/ -│ ├── base/ # Reusable UI components -│ └── features/ # Feature-specific components -├── pages/ # Nuxt.js pages (routes) -├── plugins/ # Vue plugins and extensions -├── assets/ # Static assets (SCSS, icons) -├── translation/ # i18n language files -├── v1/ # Domain and Infrastructure layers -│ ├── domain/ # Domain logic (entities, events, services, usecases) -│ │ ├── entities/ -│ │ ├── events/ -│ │ ├── services/ -│ │ └── usecases/ -│ └── infrastructure/ # Infrastructure implementations (events, repositories, services, storage, types) -│ ├── events/ -│ ├── repositories/ -│ ├── services/ -│ ├── storage/ -│ └── types/ -├── e2e/ # Playwright e2e tests -└── package.json # npm configuration -``` - -### Existing Auto-Imported Components - -, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , - - -### Key Frontend Patterns -- **Components**: Base components in `components/base/`, feature components in `components/features/` -- **Pages**: Nuxt.js file-based routing in `pages/` -- **Stores**: Pinia stores in `v1/store/` -- **Domain Logic**: Dependency injection in `v1/di/` -- **Axios**: @nuxt/axios makes API calls with `{proxy: true, browserBaseURL: "api"}` -- **Dependency Injection**: Use `useResolve` from `ts-injecty` for dependency resolution in use cases -- **View Models**: Use the simple function return pattern inspired by `useHomeViewModel.ts` - ```typescript - export const useMyViewModel = (props) => { - const dependency = useResolve(MyUseCase); - - const methodOne = () => { - // implementation - }; - - const methodTwo = async (param) => { - // implementation - }; - - return { - dependency, - methodOne, - methodTwo, - // ... all public methods and properties - }; - }; - ``` -- **Component Setup**: Components use `setup(props) { return useViewModelName(props); }` pattern -- **Styling**: SCSS in `assets/scss/` with component-scoped styles -- **Base Components**: BaseSimpleTable.vue already exists for tabular data display - -### Jest Testing Patterns -- **Test Files**: Place `.spec.js` files next to the component they test -- **Mock Setup**: Define mocks inline within `jest.mock()` calls to avoid hoisting issues: - ```javascript - // Mock dependencies inline to avoid hoisting issues - jest.mock("ts-injecty", () => ({ - useResolve: jest.fn(() => mockUseCase), - })); - - jest.mock("@nuxtjs/composition-api", () => ({ - ref: jest.fn(), - computed: jest.fn(), - watch: jest.fn(), - onMounted: jest.fn(), - })); - ``` -- **Mock Configuration**: Set up mocks in `beforeEach` by getting them from required modules: - ```javascript - beforeEach(() => { - jest.clearAllMocks(); - - const compositionApi = require("@nuxtjs/composition-api"); - mockRef = compositionApi.ref; - mockComputed = compositionApi.computed; - // Configure mock behavior... - }); - ``` -- **Component Stubs**: Use stubs in the mount options for base components: - ```javascript - wrapper = mount(ComponentName, { - propsData: { /* props */ }, - stubs: { - "BaseButton": { - template: '', - props: ["variant", "disabled", "loading"], - }, - "BaseIcon": true, - "BaseFlowModal": true, - }, - }); - ``` -- **Global Mocks**: Mock browser APIs and global functions: - ```javascript - beforeEach(() => { - // Mock window.confirm for modal dialogs - global.confirm = jest.fn(() => true); - // Mock other browser APIs as needed - global.alert = jest.fn(); - }); - - afterEach(() => { - jest.restoreAllMocks(); - }); - ``` -- **View Model Testing**: Test the public interface rather than internal implementation: - ```javascript - // Test computed properties return expected values - expect(computedFn()).toBe("expected-value"); - - // Test methods exist and are callable - expect(typeof viewModel.methodName).toBe("function"); - expect(viewModel.methodName).toBeDefined(); - - // Test reactive state objects are returned - expect(viewModel.property).toBe(mockRefObject); - ``` -- **Test Structure**: - - Use `beforeEach` to reset mock state between tests - - Use `afterEach` to clean up mocks and destroy wrappers - - Group related tests in `describe` blocks - - Test public interfaces, not internal implementation details -- **Props Testing**: Test component behavior with different prop combinations -- **Event Testing**: Verify component emits correct events with proper data -- **State Testing**: Test computed properties and reactive state changes -- **User Interaction**: Mock user actions and verify component responses -- **Error Handling**: Test error states and error recovery -- **Lifecycle Testing**: Test component mounting, updating, and destruction -- **Async Testing**: Use `async/await` for asynchronous operations -- **Mock Validation**: Ensure mocks match actual component interfaces - -## Client SDK Structure (extralit/) -``` -extralit/ -├── src/ -│ ├── extralit/ # Main SDK package -│ │ ├── cli/ # CLI commands -│ │ └── client/ # API client -│ └── extralit/ # Extralit-specific extensions -├── tests/ # Test suite -├── docs/ # Documentation -└── pyproject.toml # PDM configuration -``` - -## Examples and Deployments -``` -examples/ -├── custom_field/ # Custom field examples -├── document_extraction/ # Document processing examples -├── deployments/ -│ ├── docker/ # Docker Compose setups -│ └── k8s/ # Kubernetes manifests -└── webhooks/ # Webhook integration examples -``` - -## Configuration Files -- **Backend**: `extralit-server/pyproject.toml` (PDM), `.env.dev`, `.env.test` -- **Frontend**: `extralit-frontend/package.json` (npm), `nuxt.config.ts` -- **SDK**: `extralit/pyproject.toml` (PDM) -- **Docker**: `docker-compose.yaml` for local development -- **K8s**: `Tiltfile` for Kubernetes development - -## Development Workflow -1. **Backend changes**: Work in `extralit-server/src/extralit_server/` -2. **Frontend changes**: Work in `extralit-frontend/components/` or `extralit-frontend/pages/` -3. **SDK changes**: Work in `extralit/src/extralit/` -4. **Tests**: Each package has its own `tests/` directory -5. **Documentation**: Use `extralit/docs/` for SDK docs - -## File Naming Conventions -- **Python**: snake_case for files and modules -- **Vue/TypeScript**: PascalCase for components, camelCase for utilities -- **API endpoints**: kebab-case in URLs, snake_case in Python -- **Database**: snake_case for tables and columns -- **CSS classes**: kebab-case with BEM methodology where applicable diff --git a/.kiro/steering/tech.md b/.kiro/steering/tech.md deleted file mode 100644 index 1de5b217e..000000000 --- a/.kiro/steering/tech.md +++ /dev/null @@ -1,130 +0,0 @@ ---- -inclusion: always ---- - -# Technology Stack & Development Guidelines - -## Architecture Overview -Extralit is a monorepo with 3 main packages: -- **extralit-server/**: FastAPI backend with PostgreSQL/SQLAlchemy -- **extralit-frontend/**: Nuxt.js 2.17 (Vue.js 2.7) web UI -- **extralit/**: Python SDK and CLI - -## Code Style & Conventions - -### Python (Backend & SDK) -- **Naming**: snake_case for files, modules, functions, variables -- **Type Hints**: Always use type hints with Pydantic models -- **Async/Await**: Use async patterns for database and external API calls -- **Error Handling**: Use custom exceptions from `_exceptions` modules -- **Database**: SQLAlchemy 2.0 async patterns, Alembic migrations -- **API**: FastAPI with Pydantic schemas, dependency injection -- **Build**: PDM for dependency management, not pip - -### Frontend (Vue.js/Nuxt.js) -- **Naming**: PascalCase for components, camelCase for methods/props -- **Components**: Auto-imported from `~/components` directory -- **TypeScript**: Use `