Compare commits
8 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
96ad505d48 | ||
|
|
ccb514c7cc | ||
|
|
6fb48f606f | ||
| 4b3ecfb122 | |||
| 6ab09eb94e | |||
| c993b3e048 | |||
|
|
69c4d520a3 | ||
| 1515ee0589 |
79 changed files with 5493 additions and 530 deletions
101
.github/workflows/build-and-deploy-fallback.yml
vendored
Normal file
101
.github/workflows/build-and-deploy-fallback.yml
vendored
Normal file
|
|
@ -0,0 +1,101 @@
|
||||||
|
# .github/workflows/build-and-deploy-fallback.yml
|
||||||
|
|
||||||
|
name: Build and Deploy Fallback
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: ["gemini"]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-deploy:
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
name: Build Images and Deploy to Server
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set repo name to lowercase
|
||||||
|
id: repo_name
|
||||||
|
run: echo "name=$(echo '${{ github.repository }}' | tr '[:upper:]' '[:lower:]')" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Log in to GitHub Container Registry
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
registry: ghcr.io
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Create web-app .env file
|
||||||
|
run: echo 'GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }}' > web-app/.env
|
||||||
|
|
||||||
|
- name: Build and push web-app image 🚀
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: ./web-app
|
||||||
|
push: true
|
||||||
|
tags: ghcr.io/${{ steps.repo_name.outputs.name }}/web-app:${{ github.sha }}
|
||||||
|
cache-from: type=gha,scope=web-app
|
||||||
|
cache-to: type=gha,mode=max,scope=web-app
|
||||||
|
|
||||||
|
- name: Ensure remote deploy directory exists
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.SERVER_HOST }}
|
||||||
|
username: ${{ secrets.SERVER_USERNAME }}
|
||||||
|
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||||
|
script: |
|
||||||
|
mkdir -p /home/github-actions/codered-astra
|
||||||
|
|
||||||
|
- name: Upload compose files to server
|
||||||
|
uses: appleboy/scp-action@v0.1.7
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.SERVER_HOST }}
|
||||||
|
username: ${{ secrets.SERVER_USERNAME }}
|
||||||
|
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||||
|
source: "docker-compose.yml,docker-compose.prod.yml"
|
||||||
|
target: "/home/github-actions/codered-astra/"
|
||||||
|
|
||||||
|
- name: Deploy to server via SSH ☁️
|
||||||
|
uses: appleboy/ssh-action@v1.0.3
|
||||||
|
env:
|
||||||
|
RUNNER_GH_ACTOR: ${{ github.actor }}
|
||||||
|
RUNNER_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
with:
|
||||||
|
host: ${{ secrets.SERVER_HOST }}
|
||||||
|
username: ${{ secrets.SERVER_USERNAME }}
|
||||||
|
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||||
|
# pass selected env vars to the remote shell so docker login works
|
||||||
|
envs: RUNNER_GITHUB_TOKEN,RUNNER_GH_ACTOR
|
||||||
|
debug: true
|
||||||
|
script: |
|
||||||
|
cd /home/github-actions/codered-astra
|
||||||
|
chmod -R o+rX rust-engine/demo-data
|
||||||
|
# wrapper to support both Docker Compose v2 and legacy v1
|
||||||
|
compose() { docker compose "$@" || docker-compose "$@"; }
|
||||||
|
# Log in to GHCR using the run's GITHUB_TOKEN so compose can pull images.
|
||||||
|
if [ -n "$RUNNER_GITHUB_TOKEN" ] && [ -n "$RUNNER_GH_ACTOR" ]; then
|
||||||
|
echo "$RUNNER_GITHUB_TOKEN" | docker login ghcr.io -u "$RUNNER_GH_ACTOR" --password-stdin || true
|
||||||
|
fi
|
||||||
|
export REPO_NAME_LOWER='${{ steps.repo_name.outputs.name }}'
|
||||||
|
export GEMINI_API_KEY='${{ secrets.GEMINI_API_KEY }}'
|
||||||
|
export MYSQL_DATABASE='${{ secrets.MYSQL_DATABASE }}'
|
||||||
|
export MYSQL_USER='${{ secrets.MYSQL_USER }}'
|
||||||
|
export MYSQL_PASSWORD='${{ secrets.MYSQL_PASSWORD }}'
|
||||||
|
export MYSQL_ROOT_PASSWORD='${{ secrets.MYSQL_ROOT_PASSWORD }}'
|
||||||
|
export IMAGE_TAG=${{ github.sha }}
|
||||||
|
# Stop and remove old containers before pulling new images
|
||||||
|
compose -f docker-compose.prod.yml down
|
||||||
|
# Clear previous logs for a clean deployment log
|
||||||
|
: > ~/astra-logs/astra-errors.log || true
|
||||||
|
compose -f docker-compose.prod.yml pull
|
||||||
|
compose -f docker-compose.prod.yml up -d
|
||||||
|
# Security hygiene: remove GHCR credentials after pulling
|
||||||
|
docker logout ghcr.io || true
|
||||||
3
.github/workflows/build-and-deploy.yml
vendored
3
.github/workflows/build-and-deploy.yml
vendored
|
|
@ -69,7 +69,7 @@ jobs:
|
||||||
host: ${{ secrets.SERVER_HOST }}
|
host: ${{ secrets.SERVER_HOST }}
|
||||||
username: ${{ secrets.SERVER_USERNAME }}
|
username: ${{ secrets.SERVER_USERNAME }}
|
||||||
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||||
source: "docker-compose.yml,docker-compose.prod.yml,rust-engine/demo-data"
|
source: "docker-compose.yml,docker-compose.prod.yml"
|
||||||
target: "/home/github-actions/codered-astra/"
|
target: "/home/github-actions/codered-astra/"
|
||||||
|
|
||||||
- name: Deploy to server via SSH ☁️
|
- name: Deploy to server via SSH ☁️
|
||||||
|
|
@ -86,7 +86,6 @@ jobs:
|
||||||
debug: true
|
debug: true
|
||||||
script: |
|
script: |
|
||||||
cd /home/github-actions/codered-astra
|
cd /home/github-actions/codered-astra
|
||||||
chmod -R o+rX rust-engine/demo-data
|
|
||||||
# wrapper to support both Docker Compose v2 and legacy v1
|
# wrapper to support both Docker Compose v2 and legacy v1
|
||||||
compose() { docker compose "$@" || docker-compose "$@"; }
|
compose() { docker compose "$@" || docker-compose "$@"; }
|
||||||
# Log in to GHCR using the run's GITHUB_TOKEN so compose can pull images.
|
# Log in to GHCR using the run's GITHUB_TOKEN so compose can pull images.
|
||||||
|
|
|
||||||
268
ARCHITECTURE.md
Normal file
268
ARCHITECTURE.md
Normal file
|
|
@ -0,0 +1,268 @@
|
||||||
|
# CodeRED-Astra Architecture
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
CodeRED-Astra is a Retrieval-Augmented Generation (RAG) system for querying ISS technical documentation using vector search, MySQL metadata storage, and Gemini AI for analysis and response generation.
|
||||||
|
|
||||||
|
## System Components
|
||||||
|
|
||||||
|
### 1. **Rust Backend** (`rust-engine/`)
|
||||||
|
High-performance Rust backend using Warp for HTTP, SQLx for MySQL, and Reqwest for external API calls.
|
||||||
|
|
||||||
|
#### Modules
|
||||||
|
|
||||||
|
**`main.rs`** - Entry point
|
||||||
|
- Initializes tracing, database, storage
|
||||||
|
- Spawns FileWorker and QueryWorker background tasks
|
||||||
|
- Serves API routes on port 8000
|
||||||
|
|
||||||
|
**`db.rs`** - Database initialization
|
||||||
|
- Connects to MySQL
|
||||||
|
- Creates `files` table (id, filename, path, description, pending_analysis, analysis_status)
|
||||||
|
- Creates `queries` table (id, status, payload, result, timestamps)
|
||||||
|
|
||||||
|
**`api.rs`** - HTTP endpoints
|
||||||
|
- `POST /api/files` - Upload file (multipart/form-data)
|
||||||
|
- `POST /api/files/import-demo` - Bulk import from demo-data directory
|
||||||
|
- `GET /api/files/list` - List all files with status
|
||||||
|
- `GET /api/files/delete?id=` - Delete file and remove from Qdrant
|
||||||
|
- `POST /api/query/create` - Create new query (returns query ID)
|
||||||
|
- `GET /api/query/status?id=` - Check query status
|
||||||
|
- `GET /api/query/result?id=` - Get query result
|
||||||
|
- `GET /api/query/cancel?id=` - Cancel in-progress query
|
||||||
|
|
||||||
|
**`file_worker.rs`** - File analysis pipeline
|
||||||
|
- **Background worker** that processes files with `pending_analysis = TRUE`
|
||||||
|
- Claims stale/queued files (requeues if stuck >10 min)
|
||||||
|
- **Stage 1**: Call Gemini 1.5 Flash for initial description
|
||||||
|
- **Stage 2**: Call Gemini 1.5 Pro for deep vector graph data (keywords, relationships)
|
||||||
|
- **Stage 3**: Generate embedding and upsert to Qdrant
|
||||||
|
- **Stage 4**: Mark file as ready (`pending_analysis = FALSE`, `analysis_status = 'Completed'`)
|
||||||
|
- Resumable: Can recover from crashes/restarts
|
||||||
|
|
||||||
|
**`worker.rs`** - Query processing pipeline
|
||||||
|
- **Background worker** that processes queries with `status = 'Queued'`
|
||||||
|
- Requeues stale InProgress jobs (>10 min)
|
||||||
|
- **Stage 1**: Embed query text
|
||||||
|
- **Stage 2**: Search top-K similar vectors in Qdrant
|
||||||
|
- **Stage 3**: Fetch file metadata from MySQL (only completed files)
|
||||||
|
- **Stage 4**: Call Gemini to analyze relationships between files
|
||||||
|
- **Stage 5**: Call Gemini for final answer synthesis (strict: no speculation)
|
||||||
|
- **Stage 6**: Save results to database
|
||||||
|
- Supports cancellation checks between stages
|
||||||
|
|
||||||
|
**`gemini_client.rs`** - Gemini API integration
|
||||||
|
- `generate_text(prompt)` - Text generation with model switching via GEMINI_MODEL env var
|
||||||
|
- `demo_text_embedding(text)` - Demo 64-dim embeddings (replace with real Gemini embeddings)
|
||||||
|
- Falls back to demo responses if GEMINI_API_KEY not set
|
||||||
|
|
||||||
|
**`vector_db.rs`** - Qdrant client
|
||||||
|
- `ensure_files_collection(dim)` - Create 'files' collection with Cosine distance
|
||||||
|
- `upsert_point(id, vector)` - Store file embedding
|
||||||
|
- `search_top_k(vector, k)` - Find k nearest neighbors
|
||||||
|
- `delete_point(id)` - Remove file from index
|
||||||
|
|
||||||
|
**`storage.rs`** - File storage utilities
|
||||||
|
- `storage_dir()` - Get storage path from ASTRA_STORAGE env or default `/app/storage`
|
||||||
|
- `ensure_storage_dir()` - Create storage directory if missing
|
||||||
|
- `save_file(filename, contents)` - Save file to storage
|
||||||
|
- `delete_file(path)` - Remove file from storage
|
||||||
|
|
||||||
|
**`models.rs`** - Data structures
|
||||||
|
- `FileRecord` - File metadata (mirrors files table)
|
||||||
|
- `QueryRecord` - Query metadata (mirrors queries table)
|
||||||
|
- `QueryStatus` enum - Queued, InProgress, Completed, Cancelled, Failed
|
||||||
|
|
||||||
|
### 2. **Web App** (`web-app/`)
|
||||||
|
React + Vite frontend with Express backend for API proxying.
|
||||||
|
|
||||||
|
#### Backend (`server.mjs`)
|
||||||
|
- Express server that proxies API calls to rust-engine:8000
|
||||||
|
- Serves React static build from `/dist`
|
||||||
|
- **Why needed**: Docker networking - React can't call rust-engine directly from browser
|
||||||
|
|
||||||
|
#### Frontend (`src/`)
|
||||||
|
- `App.jsx` - Main chat interface component
|
||||||
|
- `components/ui/chat/chat-header.jsx` - Header with debug-only "Seed Demo Data" button (visible with `?debug=1`)
|
||||||
|
- Calls `/api/files/import-demo` endpoint to bulk-load ISS PDFs
|
||||||
|
|
||||||
|
### 3. **MySQL Database**
|
||||||
|
Two tables for metadata storage:
|
||||||
|
|
||||||
|
**`files` table**
|
||||||
|
```sql
|
||||||
|
id VARCHAR(36) PRIMARY KEY
|
||||||
|
filename TEXT NOT NULL
|
||||||
|
path TEXT NOT NULL
|
||||||
|
description TEXT
|
||||||
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||||
|
pending_analysis BOOLEAN DEFAULT TRUE
|
||||||
|
analysis_status VARCHAR(32) DEFAULT 'Queued'
|
||||||
|
```
|
||||||
|
|
||||||
|
**`queries` table**
|
||||||
|
```sql
|
||||||
|
id VARCHAR(36) PRIMARY KEY
|
||||||
|
status VARCHAR(32) NOT NULL
|
||||||
|
payload JSON
|
||||||
|
result JSON
|
||||||
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||||
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. **Qdrant Vector Database**
|
||||||
|
- Collection: `files`
|
||||||
|
- Dimension: 64 (demo) - replace with real Gemini embedding dimension
|
||||||
|
- Distance: Cosine similarity
|
||||||
|
- Stores file embeddings for semantic search
|
||||||
|
|
||||||
|
### 5. **Demo Data** (`rust-engine/demo-data/`)
|
||||||
|
~20 ISS technical PDFs organized by subsystem:
|
||||||
|
- Electrical Power System (EPS)
|
||||||
|
- Environmental Control & Life Support (ECLSS)
|
||||||
|
- Command & Data Handling (C&DH)
|
||||||
|
- Structures & Mechanisms
|
||||||
|
|
||||||
|
## Data Flow
|
||||||
|
|
||||||
|
### File Upload & Analysis
|
||||||
|
```
|
||||||
|
1. User uploads PDF → POST /api/files
|
||||||
|
2. API saves file to storage, inserts DB record (pending_analysis=true)
|
||||||
|
3. FileWorker claims pending file
|
||||||
|
4. Gemini 1.5 Flash generates description
|
||||||
|
5. Gemini 1.5 Pro generates vector graph data
|
||||||
|
6. Embed text → upsert to Qdrant
|
||||||
|
7. Mark file as ready (pending_analysis=false)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Query Processing
|
||||||
|
```
|
||||||
|
1. User submits query → POST /api/query/create
|
||||||
|
2. API inserts query record (status='Queued')
|
||||||
|
3. QueryWorker claims queued query
|
||||||
|
4. Embed query text
|
||||||
|
5. Search Qdrant for top-K similar files
|
||||||
|
6. Fetch file metadata from MySQL
|
||||||
|
7. Gemini analyzes relationships between files
|
||||||
|
8. Gemini synthesizes final answer (no speculation)
|
||||||
|
9. Save results to database
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
### Development (`docker-compose.yml`)
|
||||||
|
- Local testing with hot-reload
|
||||||
|
- Bind mounts for code
|
||||||
|
|
||||||
|
### Production (`docker-compose.prod.yml`)
|
||||||
|
- Used by GitHub Actions for deployment
|
||||||
|
- Runs rust-engine as user "1004" (github-actions)
|
||||||
|
- Docker volume: `rust-storage` → `/app/storage`
|
||||||
|
- Bind mount: `/var/www/codered-astra/rust-engine/demo-data` → `/app/demo-data:ro`
|
||||||
|
- Environment variables:
|
||||||
|
- `ASTRA_STORAGE=/app/storage`
|
||||||
|
- `DEMO_DATA_DIR=/app/demo-data`
|
||||||
|
- `QDRANT_URL=http://qdrant:6333`
|
||||||
|
- `GEMINI_API_KEY=<secret>`
|
||||||
|
- `DATABASE_URL=mysql://astraadmin:password@mysql:3306/astra`
|
||||||
|
|
||||||
|
## Key Design Decisions
|
||||||
|
|
||||||
|
### 1. **Two-Stage Analysis (Flash → Pro)**
|
||||||
|
- Flash is faster/cheaper for initial description
|
||||||
|
- Pro is better for deep analysis and relationship extraction
|
||||||
|
- Enables cost-effective scaling
|
||||||
|
|
||||||
|
### 2. **Resumable Workers**
|
||||||
|
- Workers requeue stale jobs (>10 min in InProgress)
|
||||||
|
- Survives container restarts without data loss
|
||||||
|
- Atomic state transitions via SQL
|
||||||
|
|
||||||
|
### 3. **Separation of Concerns**
|
||||||
|
- FileWorker: Makes files searchable
|
||||||
|
- QueryWorker: Answers user queries
|
||||||
|
- Independent scaling and failure isolation
|
||||||
|
|
||||||
|
### 4. **Strict Answer Generation**
|
||||||
|
- Gemini prompted to not speculate
|
||||||
|
- Must state uncertainty when info is insufficient
|
||||||
|
- Prevents hallucination in critical ISS documentation
|
||||||
|
|
||||||
|
### 5. **Demo Embeddings**
|
||||||
|
- Current: 64-dim deterministic embeddings from text hash
|
||||||
|
- Production: Replace with real Gemini text embeddings API
|
||||||
|
- Allows development/testing without embedding API credits
|
||||||
|
|
||||||
|
## API Usage Examples
|
||||||
|
|
||||||
|
### Upload File
|
||||||
|
```bash
|
||||||
|
curl -F "file=@document.pdf" http://localhost:3001/api/files
|
||||||
|
```
|
||||||
|
|
||||||
|
### Import Demo Data
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3001/api/files/import-demo
|
||||||
|
```
|
||||||
|
|
||||||
|
### Create Query
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:3001/api/query/create \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"q": "What is the voltage of the ISS main bus?", "top_k": 5}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Check Status
|
||||||
|
```bash
|
||||||
|
curl http://localhost:3001/api/query/status?id=<query-id>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get Result
|
||||||
|
```bash
|
||||||
|
curl http://localhost:3001/api/query/result?id=<query-id>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Future Enhancements
|
||||||
|
|
||||||
|
### High Priority
|
||||||
|
1. Real Gemini text embeddings (replace demo embeddings)
|
||||||
|
2. File status UI panel (show processing progress)
|
||||||
|
3. Health check endpoint (`/health`)
|
||||||
|
4. Data purge endpoint (clear all files/queries)
|
||||||
|
|
||||||
|
### Medium Priority
|
||||||
|
1. Streaming query responses (SSE/WebSocket)
|
||||||
|
2. Query result caching
|
||||||
|
3. File chunking for large PDFs
|
||||||
|
4. User authentication
|
||||||
|
|
||||||
|
### Low Priority
|
||||||
|
1. Multi-collection support (different document types)
|
||||||
|
2. Query history UI
|
||||||
|
3. File preview in chat
|
||||||
|
4. Export results to PDF
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Storage Permission Errors
|
||||||
|
- Ensure `/app/storage` is owned by container user
|
||||||
|
- Docker volume must be writable by user 1004 in production
|
||||||
|
|
||||||
|
### SQL Syntax Errors
|
||||||
|
- MySQL requires separate `CREATE TABLE` statements
|
||||||
|
- Cannot combine multiple DDL statements in one `sqlx::query()`
|
||||||
|
|
||||||
|
### Qdrant Connection Issues
|
||||||
|
- Check QDRANT_URL environment variable
|
||||||
|
- Ensure qdrant service is running and healthy
|
||||||
|
- Verify network connectivity between containers
|
||||||
|
|
||||||
|
### Worker Not Processing
|
||||||
|
- Check logs: `docker logs rust-engine`
|
||||||
|
- Verify database connectivity
|
||||||
|
- Look for stale InProgress jobs in queries/files tables
|
||||||
|
|
||||||
|
## Demo Presentation (3 minutes)
|
||||||
|
|
||||||
|
See `rust-engine/DEMODETAILS.md` for curated demo script with example queries.
|
||||||
219
QUICK_REFERENCE.md
Normal file
219
QUICK_REFERENCE.md
Normal file
|
|
@ -0,0 +1,219 @@
|
||||||
|
# CodeRED-Astra Quick Reference
|
||||||
|
|
||||||
|
## System Overview
|
||||||
|
|
||||||
|
**Two-worker architecture for ISS document RAG:**
|
||||||
|
|
||||||
|
1. **FileWorker**: Analyzes uploaded files (Flash → Pro → Embed → Qdrant)
|
||||||
|
2. **QueryWorker**: Answers queries (Embed → Search → Relationships → Answer)
|
||||||
|
|
||||||
|
Both workers are **resumable** and automatically recover from crashes.
|
||||||
|
|
||||||
|
## Core Data Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Upload PDF → Storage → MySQL (pending) → FileWorker → Qdrant → MySQL (ready)
|
||||||
|
↓
|
||||||
|
User Query → MySQL (queued) → QueryWorker → Search Qdrant → Gemini → Result
|
||||||
|
```
|
||||||
|
|
||||||
|
## Module Map
|
||||||
|
|
||||||
|
| Module | Purpose | Key Functions |
|
||||||
|
|--------|---------|---------------|
|
||||||
|
| `main.rs` | Entry point | Spawns workers, serves API |
|
||||||
|
| `db.rs` | Database init | Creates files/queries tables |
|
||||||
|
| `api.rs` | HTTP endpoints | Upload, list, delete, query CRUD |
|
||||||
|
| `file_worker.rs` | File analysis | Flash→Pro→embed→upsert |
|
||||||
|
| `worker.rs` | Query processing | Search→relationships→answer |
|
||||||
|
| `gemini_client.rs` | AI integration | Text generation, embeddings |
|
||||||
|
| `vector_db.rs` | Qdrant client | Upsert, search, delete |
|
||||||
|
| `storage.rs` | File management | Save/delete files |
|
||||||
|
| `models.rs` | Data structures | FileRecord, QueryRecord |
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Files
|
||||||
|
- `POST /api/files` - Upload file
|
||||||
|
- `POST /api/files/import-demo?force=1` - Bulk import demo PDFs
|
||||||
|
- `GET /api/files/list` - List all files with status
|
||||||
|
- `GET /api/files/delete?id=<uuid>` - Delete file
|
||||||
|
|
||||||
|
### Queries
|
||||||
|
- `POST /api/query/create` - Create query
|
||||||
|
- `GET /api/query/status?id=<uuid>` - Check status
|
||||||
|
- `GET /api/query/result?id=<uuid>` - Get result
|
||||||
|
- `GET /api/query/cancel?id=<uuid>` - Cancel query
|
||||||
|
|
||||||
|
## Database Schema
|
||||||
|
|
||||||
|
### files
|
||||||
|
- `id` - UUID primary key
|
||||||
|
- `filename` - Original filename
|
||||||
|
- `path` - Storage path
|
||||||
|
- `description` - Gemini Flash description
|
||||||
|
- `pending_analysis` - FALSE when ready for search
|
||||||
|
- `analysis_status` - Queued/InProgress/Completed/Failed
|
||||||
|
|
||||||
|
### queries
|
||||||
|
- `id` - UUID primary key
|
||||||
|
- `status` - Queued/InProgress/Completed/Cancelled/Failed
|
||||||
|
- `payload` - JSON query params `{"q": "...", "top_k": 5}`
|
||||||
|
- `result` - JSON result `{"summary": "...", "related_files": [...], "relationships": "...", "final_answer": "..."}`
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
### Required
|
||||||
|
- `GEMINI_API_KEY` - Gemini API key
|
||||||
|
- `DATABASE_URL` - MySQL connection string
|
||||||
|
- `QDRANT_URL` - Qdrant URL (default: http://qdrant:6333)
|
||||||
|
|
||||||
|
### Optional
|
||||||
|
- `ASTRA_STORAGE` - Storage directory (default: /app/storage)
|
||||||
|
- `DEMO_DATA_DIR` - Demo data directory (default: /app/demo-data)
|
||||||
|
- `GEMINI_MODEL` - Override Gemini model (default: gemini-1.5-pro)
|
||||||
|
|
||||||
|
## Worker States
|
||||||
|
|
||||||
|
### FileWorker
|
||||||
|
1. **Queued** - File uploaded, awaiting processing
|
||||||
|
2. **InProgress** - Currently being analyzed
|
||||||
|
3. **Completed** - Ready for search (pending_analysis=FALSE)
|
||||||
|
4. **Failed** - Error during processing
|
||||||
|
|
||||||
|
### QueryWorker
|
||||||
|
1. **Queued** - Query created, awaiting processing
|
||||||
|
2. **InProgress** - Currently searching/analyzing
|
||||||
|
3. **Completed** - Result available
|
||||||
|
4. **Cancelled** - User cancelled
|
||||||
|
5. **Failed** - Error during processing
|
||||||
|
|
||||||
|
## Gemini Prompts
|
||||||
|
|
||||||
|
### FileWorker Stage 1 (Flash)
|
||||||
|
```
|
||||||
|
Describe the file '{filename}' and extract all key components, keywords,
|
||||||
|
and details for later vectorization. Be comprehensive and factual.
|
||||||
|
```
|
||||||
|
|
||||||
|
### FileWorker Stage 2 (Pro)
|
||||||
|
```
|
||||||
|
Given the file '{filename}' and its description: {desc}
|
||||||
|
Generate a set of vector graph data (keywords, use cases, relationships)
|
||||||
|
that can be used for broad and precise search. Only include what is
|
||||||
|
directly supported by the file.
|
||||||
|
```
|
||||||
|
|
||||||
|
### QueryWorker Stage 4 (Relationships)
|
||||||
|
```
|
||||||
|
You are an assistant analyzing relationships STRICTLY within the provided files.
|
||||||
|
Query: {query}
|
||||||
|
Files: {file_list}
|
||||||
|
Tasks:
|
||||||
|
1) Summarize key details from the files relevant to the query.
|
||||||
|
2) Describe relationships and linkages strictly supported by these files.
|
||||||
|
3) List important follow-up questions that could be answered only using the provided files.
|
||||||
|
Rules: Do NOT guess or invent. If information is insufficient in the files, explicitly state that.
|
||||||
|
```
|
||||||
|
|
||||||
|
### QueryWorker Stage 5 (Final Answer)
|
||||||
|
```
|
||||||
|
You are to compose a final answer to the user query using only the information from the files.
|
||||||
|
Query: {query}
|
||||||
|
Files considered: {file_list}
|
||||||
|
Relationship analysis: {relationships}
|
||||||
|
Requirements:
|
||||||
|
- Use only information present in the files and analysis above.
|
||||||
|
- If the answer is uncertain or cannot be determined from the files, clearly state that limitation.
|
||||||
|
- Avoid speculation or assumptions.
|
||||||
|
Provide a concise, structured answer.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker Architecture
|
||||||
|
|
||||||
|
### Services
|
||||||
|
- **rust-engine** - Warp API + workers (port 8000)
|
||||||
|
- **web-app** - Express + React (port 3001)
|
||||||
|
- **mysql** - MySQL 9.1 (port 3306)
|
||||||
|
- **qdrant** - Qdrant vector DB (port 6333)
|
||||||
|
- **phpmyadmin** - DB admin UI (port 8080)
|
||||||
|
|
||||||
|
### Volumes (Production)
|
||||||
|
- `rust-storage:/app/storage` - File storage (writable)
|
||||||
|
- `/var/www/codered-astra/rust-engine/demo-data:/app/demo-data:ro` - Demo PDFs (read-only)
|
||||||
|
- `~/astra-logs:/var/log` - Log files
|
||||||
|
|
||||||
|
## Common Issues
|
||||||
|
|
||||||
|
### 1. SQL Syntax Error
|
||||||
|
**Problem**: `error near 'CREATE TABLE'`
|
||||||
|
**Cause**: Multiple CREATE TABLE in one query
|
||||||
|
**Fix**: Split into separate `sqlx::query()` calls
|
||||||
|
|
||||||
|
### 2. Permission Denied
|
||||||
|
**Problem**: `Permission denied (os error 13)`
|
||||||
|
**Cause**: Container user can't write to storage
|
||||||
|
**Fix**: Use Docker volume, ensure ownership matches container user
|
||||||
|
|
||||||
|
### 3. Worker Not Processing
|
||||||
|
**Problem**: Files/queries stuck in Queued
|
||||||
|
**Cause**: Worker crashed or not started
|
||||||
|
**Fix**: Check logs, ensure workers spawned in main.rs
|
||||||
|
|
||||||
|
### 4. Qdrant Connection Failed
|
||||||
|
**Problem**: `qdrant upsert/search failed`
|
||||||
|
**Cause**: Qdrant not running or wrong URL
|
||||||
|
**Fix**: Verify QDRANT_URL, check qdrant container health
|
||||||
|
|
||||||
|
## Development Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build and run locally
|
||||||
|
cd rust-engine
|
||||||
|
cargo build
|
||||||
|
cargo run
|
||||||
|
|
||||||
|
# Check code
|
||||||
|
cargo check
|
||||||
|
|
||||||
|
# Run with logs
|
||||||
|
RUST_LOG=info cargo run
|
||||||
|
|
||||||
|
# Docker compose (dev)
|
||||||
|
docker-compose up --build
|
||||||
|
|
||||||
|
# Docker compose (production)
|
||||||
|
docker-compose -f docker-compose.prod.yml up -d
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
docker logs rust-engine -f
|
||||||
|
|
||||||
|
# Rebuild single service
|
||||||
|
docker-compose build rust-engine
|
||||||
|
docker-compose up -d rust-engine
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing Flow
|
||||||
|
|
||||||
|
1. Start services: `docker-compose up -d`
|
||||||
|
2. Import demo data: `curl -X POST http://localhost:3001/api/files/import-demo`
|
||||||
|
3. Wait for FileWorker to complete (~30 seconds for 20 files)
|
||||||
|
4. Check file status: `curl http://localhost:3001/api/files/list`
|
||||||
|
5. Create query: `curl -X POST http://localhost:3001/api/query/create -H "Content-Type: application/json" -d '{"q": "ISS main bus voltage", "top_k": 5}'`
|
||||||
|
6. Check status: `curl http://localhost:3001/api/query/status?id=<id>`
|
||||||
|
7. Get result: `curl http://localhost:3001/api/query/result?id=<id>`
|
||||||
|
|
||||||
|
## Performance Notes
|
||||||
|
|
||||||
|
- FileWorker: ~1-2 sec per file (demo embeddings)
|
||||||
|
- QueryWorker: ~3-5 sec per query (search + 2 Gemini calls)
|
||||||
|
- Qdrant search: <100ms for 1000s of vectors
|
||||||
|
- MySQL queries: <10ms for simple selects
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
- Store GEMINI_API_KEY in GitHub Secrets (production)
|
||||||
|
- Use environment variables for all credentials
|
||||||
|
- Don't commit `.env` files
|
||||||
|
- Restrict phpmyadmin to internal network only
|
||||||
|
- Use HTTPS in production deployment
|
||||||
75
README.md
Normal file
75
README.md
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
# CodeRED-Astra 🚀
|
||||||
|
|
||||||
|
A hackathon-ready project with React frontend and Rust backend engine.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Setup environment
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your credentials
|
||||||
|
|
||||||
|
# 2. Start everything with Docker
|
||||||
|
docker-compose up --build
|
||||||
|
|
||||||
|
# 3. Access your app
|
||||||
|
# Frontend: http://localhost
|
||||||
|
# API: http://localhost:8000
|
||||||
|
# Database Admin: http://127.0.0.1:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
**Frontend (React + Vite)**:
|
||||||
|
```bash
|
||||||
|
cd web-app
|
||||||
|
npm install
|
||||||
|
npm run dev # http://localhost:5173
|
||||||
|
```
|
||||||
|
|
||||||
|
**Backend (Rust)**:
|
||||||
|
```bash
|
||||||
|
cd rust-engine
|
||||||
|
cargo run # http://localhost:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
- **Frontend**: React 18 + Vite + Tailwind CSS
|
||||||
|
- **Backend**: Rust + Warp + SQLx
|
||||||
|
- **Database**: MySQL 8.0 + phpMyAdmin
|
||||||
|
- **API**: RESTful endpoints with CORS enabled
|
||||||
|
- **Docker**: Full containerization for easy deployment
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
├── web-app/ # React frontend
|
||||||
|
│ ├── src/
|
||||||
|
│ │ ├── App.jsx # Main component
|
||||||
|
│ │ └── main.jsx # Entry point
|
||||||
|
│ └── Dockerfile
|
||||||
|
├── rust-engine/ # Rust backend
|
||||||
|
│ ├── src/
|
||||||
|
│ │ └── main.rs # API server
|
||||||
|
│ └── Dockerfile
|
||||||
|
├── docker-compose.yml # Full stack orchestration
|
||||||
|
└── .env.example # Environment template
|
||||||
|
```
|
||||||
|
|
||||||
|
## Team Workflow
|
||||||
|
|
||||||
|
- **Frontend devs**: Work in `web-app/src/`, use `/api/*` for backend calls
|
||||||
|
- **Backend devs**: Work in `rust-engine/src/`, add endpoints to main.rs
|
||||||
|
- **Database**: Access phpMyAdmin at http://127.0.0.1:8080
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
✅ Hot reload for both frontend and backend
|
||||||
|
✅ Automatic API proxying from React to Rust
|
||||||
|
✅ Database connection with graceful fallback
|
||||||
|
✅ CORS configured for cross-origin requests
|
||||||
|
✅ Production-ready Docker containers
|
||||||
|
✅ Health monitoring and status dashboard
|
||||||
|
|
||||||
|
Ready for your hackathon! See `DEVELOPMENT.md` for detailed setup instructions.
|
||||||
8
docker-compose.prod.yml
Normal file
8
docker-compose.prod.yml
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
services:
|
||||||
|
fallback-web-app:
|
||||||
|
image: ghcr.io/${REPO_NAME_LOWER}/web-app:${IMAGE_TAG}
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:3034:3000"
|
||||||
|
environment:
|
||||||
|
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||||
|
|
@ -13,8 +13,6 @@ services:
|
||||||
- DATABASE_URL=mysql://${MYSQL_USER}:${MYSQL_PASSWORD}@mysql:3306/${MYSQL_DATABASE}
|
- DATABASE_URL=mysql://${MYSQL_USER}:${MYSQL_PASSWORD}@mysql:3306/${MYSQL_DATABASE}
|
||||||
- RUST_ENGINE_URL=http://rust-engine:8000
|
- RUST_ENGINE_URL=http://rust-engine:8000
|
||||||
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
- GEMINI_API_KEY=${GEMINI_API_KEY}
|
||||||
volumes:
|
|
||||||
- rust-storage:/app/storage:ro
|
|
||||||
depends_on:
|
depends_on:
|
||||||
- mysql # <-- Updated dependency
|
- mysql # <-- Updated dependency
|
||||||
- rust-engine
|
- rust-engine
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
|
|
||||||
|
|
@ -1,244 +0,0 @@
|
||||||
import React, {
|
|
||||||
useCallback,
|
|
||||||
useEffect,
|
|
||||||
useMemo,
|
|
||||||
useRef,
|
|
||||||
useState,
|
|
||||||
} from "react";
|
|
||||||
import ChatHeader from "src/components/ui/chat/chat-header";
|
|
||||||
import ChatWindow from "src/components/ui/chat/chat-window";
|
|
||||||
import MessageInput from "src/components/ui/chat/message-input";
|
|
||||||
import {
|
|
||||||
createQuery,
|
|
||||||
getQueryResult,
|
|
||||||
getQueryStatus,
|
|
||||||
listFiles,
|
|
||||||
} from "src/lib/api";
|
|
||||||
|
|
||||||
const createId = () =>
|
|
||||||
globalThis.crypto?.randomUUID?.() ?? `id-${Date.now()}-${Math.random()}`;
|
|
||||||
|
|
||||||
const INTRO_MESSAGE = {
|
|
||||||
id: "intro",
|
|
||||||
role: "assistant",
|
|
||||||
content:
|
|
||||||
"Ask me about the demo PDFs and I'll respond with the best matches pulled from the processed files.",
|
|
||||||
};
|
|
||||||
|
|
||||||
export default function ChatLayout() {
|
|
||||||
const [messages, setMessages] = useState([INTRO_MESSAGE]);
|
|
||||||
const [isProcessing, setIsProcessing] = useState(false);
|
|
||||||
const [files, setFiles] = useState([]);
|
|
||||||
const [errorToast, setErrorToast] = useState("");
|
|
||||||
const pollAbortRef = useRef(null);
|
|
||||||
|
|
||||||
const showError = useCallback((message) => {
|
|
||||||
setErrorToast(message);
|
|
||||||
window.setTimeout(() => setErrorToast(""), 5000);
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const refreshFiles = useCallback(async () => {
|
|
||||||
try {
|
|
||||||
const latest = await listFiles();
|
|
||||||
setFiles(latest);
|
|
||||||
} catch (error) {
|
|
||||||
showError(error.message ?? "Failed to load files");
|
|
||||||
}
|
|
||||||
}, [showError]);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
refreshFiles();
|
|
||||||
}, [refreshFiles]);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
return () => {
|
|
||||||
if (pollAbortRef.current) {
|
|
||||||
pollAbortRef.current.aborted = true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const buildAssistantMarkdown = useCallback((result) => {
|
|
||||||
if (!result || typeof result !== "object") {
|
|
||||||
return "I could not find a response for that request.";
|
|
||||||
}
|
|
||||||
|
|
||||||
const finalAnswer = result.final_answer?.trim();
|
|
||||||
const relationships = result.relationships?.trim();
|
|
||||||
const relatedFiles = Array.isArray(result.related_files)
|
|
||||||
? result.related_files
|
|
||||||
: [];
|
|
||||||
|
|
||||||
const fileLines = relatedFiles
|
|
||||||
.filter((f) => f && typeof f === "object")
|
|
||||||
.map((file) => {
|
|
||||||
const filename = file.filename || file.id || "download";
|
|
||||||
const storageUrl = file.storage_url || `/storage/${filename}`;
|
|
||||||
const linkTarget = storageUrl.startsWith("/storage/")
|
|
||||||
? `/storage/${encodeURIComponent(storageUrl.replace("/storage/", ""))}`
|
|
||||||
: storageUrl;
|
|
||||||
const description = file.description?.trim();
|
|
||||||
const score =
|
|
||||||
typeof file.score === "number"
|
|
||||||
? ` _(score: ${file.score.toFixed(3)})_`
|
|
||||||
: "";
|
|
||||||
const detail = description ? ` — ${description}` : "";
|
|
||||||
return `- [${filename}](${linkTarget})${detail}${score}`;
|
|
||||||
});
|
|
||||||
|
|
||||||
let content =
|
|
||||||
finalAnswer ||
|
|
||||||
"I could not determine an answer from the indexed documents yet.";
|
|
||||||
|
|
||||||
if (fileLines.length) {
|
|
||||||
content += `\n\n**Related Files**\n${fileLines.join("\n")}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (relationships && relationships !== finalAnswer) {
|
|
||||||
content += `\n\n---\n${relationships}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fileLines.length && (!finalAnswer || finalAnswer.length < 10)) {
|
|
||||||
content +=
|
|
||||||
"\n\n_No analyzed documents matched yet. Try seeding demo data or wait for processing to finish._";
|
|
||||||
}
|
|
||||||
|
|
||||||
return content;
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const waitForResult = useCallback(async (id) => {
|
|
||||||
const abortState = { aborted: false };
|
|
||||||
pollAbortRef.current = abortState;
|
|
||||||
const timeoutMs = 120_000;
|
|
||||||
const intervalMs = 1_500;
|
|
||||||
const started = Date.now();
|
|
||||||
|
|
||||||
while (!abortState.aborted) {
|
|
||||||
if (Date.now() - started > timeoutMs) {
|
|
||||||
throw new Error("Timed out waiting for the query to finish");
|
|
||||||
}
|
|
||||||
|
|
||||||
const statusPayload = await getQueryStatus(id);
|
|
||||||
const status = statusPayload?.status;
|
|
||||||
|
|
||||||
if (status === "Completed") {
|
|
||||||
const resultPayload = await getQueryResult(id);
|
|
||||||
return resultPayload?.result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (status === "Failed") {
|
|
||||||
const resultPayload = await getQueryResult(id);
|
|
||||||
const reason = resultPayload?.result?.error || "Query failed";
|
|
||||||
throw new Error(reason);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (status === "Cancelled") {
|
|
||||||
throw new Error("Query was cancelled");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (status === "not_found") {
|
|
||||||
throw new Error("Query was not found");
|
|
||||||
}
|
|
||||||
|
|
||||||
await new Promise((resolve) => window.setTimeout(resolve, intervalMs));
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error("Query polling was aborted");
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const handleSend = useCallback(
|
|
||||||
async (text) => {
|
|
||||||
if (isProcessing) {
|
|
||||||
showError("Please wait for the current response to finish.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const userEntry = {
|
|
||||||
id: createId(),
|
|
||||||
role: "user",
|
|
||||||
content: text,
|
|
||||||
};
|
|
||||||
setMessages((prev) => [...prev, userEntry]);
|
|
||||||
|
|
||||||
const placeholderId = createId();
|
|
||||||
setMessages((prev) => [
|
|
||||||
...prev,
|
|
||||||
{
|
|
||||||
id: placeholderId,
|
|
||||||
role: "assistant",
|
|
||||||
content: "_Analyzing indexed documents..._",
|
|
||||||
pending: true,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
|
|
||||||
setIsProcessing(true);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const payload = { q: text, top_k: 5 };
|
|
||||||
const created = await createQuery(payload);
|
|
||||||
const result = await waitForResult(created.id);
|
|
||||||
const content = buildAssistantMarkdown(result);
|
|
||||||
setMessages((prev) =>
|
|
||||||
prev.map((message) =>
|
|
||||||
message.id === placeholderId
|
|
||||||
? { ...message, content, pending: false }
|
|
||||||
: message,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
} catch (error) {
|
|
||||||
const message = error?.message || "Something went wrong.";
|
|
||||||
setMessages((prev) =>
|
|
||||||
prev.map((entry) =>
|
|
||||||
entry.id === placeholderId
|
|
||||||
? {
|
|
||||||
...entry,
|
|
||||||
content: `⚠️ ${message}`,
|
|
||||||
pending: false,
|
|
||||||
error: true,
|
|
||||||
}
|
|
||||||
: entry,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
showError(message);
|
|
||||||
} finally {
|
|
||||||
pollAbortRef.current = null;
|
|
||||||
setIsProcessing(false);
|
|
||||||
refreshFiles();
|
|
||||||
}
|
|
||||||
},
|
|
||||||
[
|
|
||||||
isProcessing,
|
|
||||||
showError,
|
|
||||||
refreshFiles,
|
|
||||||
waitForResult,
|
|
||||||
buildAssistantMarkdown,
|
|
||||||
],
|
|
||||||
);
|
|
||||||
|
|
||||||
const handleDeleteAll = useCallback(() => {
|
|
||||||
if (!window.confirm("Delete all messages?")) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
setMessages([INTRO_MESSAGE]);
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const latestFileSummary = useMemo(() => {
|
|
||||||
if (!files.length) return "No files indexed yet.";
|
|
||||||
const pending = files.filter((f) => f.pending_analysis).length;
|
|
||||||
const ready = files.length - pending;
|
|
||||||
return `${ready} ready • ${pending} processing`;
|
|
||||||
}, [files]);
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="flex flex-col flex-start w-full max-w-3xl gap-4 p-4">
|
|
||||||
<ChatHeader
|
|
||||||
onClear={handleDeleteAll}
|
|
||||||
busy={isProcessing}
|
|
||||||
fileSummary={latestFileSummary}
|
|
||||||
errorMessage={errorToast}
|
|
||||||
/>
|
|
||||||
<ChatWindow messages={messages} />
|
|
||||||
<MessageInput onSend={handleSend} disabled={isProcessing} />
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
import { Flame } from "lucide-react";
|
|
||||||
import { motion } from "motion/react";
|
|
||||||
|
|
||||||
export default function FlameButton({ onClick, disabled = false }) {
|
|
||||||
return (
|
|
||||||
<motion.button
|
|
||||||
onClick={onClick}
|
|
||||||
className={`bg-gray-700 p-2 rounded-2xl border-2 border-gray-600 ${
|
|
||||||
disabled ? "cursor-not-allowed" : "cursor-pointer"
|
|
||||||
}`}
|
|
||||||
whileHover={disabled ? undefined : { scale: 1.1 }}
|
|
||||||
whileTap={disabled ? undefined : { scale: 0.9 }}
|
|
||||||
disabled={disabled}
|
|
||||||
style={{ opacity: disabled ? 0.5 : 1 }}
|
|
||||||
>
|
|
||||||
<Flame />
|
|
||||||
</motion.button>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
@ -1,89 +0,0 @@
|
||||||
import React, { useEffect, useMemo, useState } from "react";
|
|
||||||
import { motion } from "motion/react";
|
|
||||||
import { Rocket } from "lucide-react";
|
|
||||||
import DeleteButton from "src/components/ui/button/delete-button";
|
|
||||||
import SchematicButton from "src/components/ui/button/schematic-button";
|
|
||||||
|
|
||||||
export default function ChatHeader({
|
|
||||||
title = "Title of Chat",
|
|
||||||
onClear,
|
|
||||||
busy = false,
|
|
||||||
fileSummary,
|
|
||||||
errorMessage,
|
|
||||||
}) {
|
|
||||||
const isDebug = useMemo(() => {
|
|
||||||
const p = new URLSearchParams(window.location.search);
|
|
||||||
return p.get("debug") === "1";
|
|
||||||
}, []);
|
|
||||||
const [ingesting, setIngesting] = useState(false);
|
|
||||||
const [toast, setToast] = useState("");
|
|
||||||
const [externalToast, setExternalToast] = useState("");
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (!errorMessage) return;
|
|
||||||
setExternalToast(errorMessage);
|
|
||||||
const timer = window.setTimeout(() => setExternalToast(""), 5000);
|
|
||||||
return () => window.clearTimeout(timer);
|
|
||||||
}, [errorMessage]);
|
|
||||||
|
|
||||||
async function triggerDemoIngest() {
|
|
||||||
try {
|
|
||||||
setIngesting(true);
|
|
||||||
const res = await fetch("/api/files/import-demo", { method: "POST" });
|
|
||||||
const json = await res.json().catch(() => ({}));
|
|
||||||
const imported = json.imported ?? "?";
|
|
||||||
const skipped = json.skipped ?? "?";
|
|
||||||
const summary = `Imported: ${imported}, Skipped: ${skipped}`;
|
|
||||||
setToast(json.error ? `${summary} - ${json.error}` : summary);
|
|
||||||
setTimeout(() => setToast(""), 4000);
|
|
||||||
} catch (e) {
|
|
||||||
setToast("Import failed");
|
|
||||||
setTimeout(() => setToast(""), 4000);
|
|
||||||
} finally {
|
|
||||||
setIngesting(false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="w-full flex justify-center">
|
|
||||||
<header className="text-slate-100 fixed top-4 max-w-3xl w-full px-4">
|
|
||||||
<div className="flex justify-between items-center gap-4">
|
|
||||||
<SchematicButton />
|
|
||||||
<div className="flex items-center gap-3">
|
|
||||||
<h1 className="text-lg font-semibold shadow-md shadow-indigo-600 bg-gray-900 px-6 py-2 rounded-4xl border-2 border-gray-800">
|
|
||||||
{title}
|
|
||||||
</h1>
|
|
||||||
{fileSummary && (
|
|
||||||
<div className="text-xs text-slate-300 bg-gray-800/80 border border-gray-700 rounded px-3 py-1">
|
|
||||||
{fileSummary}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
<DeleteButton onClick={onClear} disabled={busy} />
|
|
||||||
{isDebug && (
|
|
||||||
<motion.button
|
|
||||||
onClick={triggerDemoIngest}
|
|
||||||
className="bg-gray-800 border-2 border-gray-700 rounded-xl px-3 py-2 flex items-center gap-2"
|
|
||||||
whileHover={{ scale: 1.05 }}
|
|
||||||
whileTap={{ scale: 0.95 }}
|
|
||||||
disabled={ingesting}
|
|
||||||
>
|
|
||||||
<Rocket size={16} />
|
|
||||||
{ingesting ? "Seeding…" : "Seed Demo Data"}
|
|
||||||
</motion.button>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{toast && (
|
|
||||||
<div className="mt-2 text-xs text-slate-300 bg-gray-800/80 border border-gray-700 rounded px-2 py-1 inline-block">
|
|
||||||
{toast}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
{externalToast && (
|
|
||||||
<div className="mt-2 text-xs text-red-300 bg-red-900/40 border border-red-700 rounded px-2 py-1 inline-block">
|
|
||||||
{externalToast}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</header>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
@ -1,119 +0,0 @@
|
||||||
import React, { useState, useRef, useEffect } from "react";
|
|
||||||
import DownButton from "src/components/ui/button/down-button";
|
|
||||||
import { motion } from "motion/react";
|
|
||||||
import { BotMessageSquare } from "lucide-react";
|
|
||||||
|
|
||||||
export default function MessageInput({ onSend, disabled = false }) {
|
|
||||||
const [text, setText] = useState("");
|
|
||||||
const textareaRef = useRef(null);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
// ensure correct initial height
|
|
||||||
if (textareaRef.current) textareaRef.current.style.height = "auto";
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
async function handleSubmit(e) {
|
|
||||||
e.preventDefault();
|
|
||||||
if (!text.trim() || disabled) return;
|
|
||||||
onSend(text.trim());
|
|
||||||
|
|
||||||
// create query on backend
|
|
||||||
try {
|
|
||||||
if (onMessage)
|
|
||||||
onMessage("assistant", "Queued: sending request to server...");
|
|
||||||
const createRes = await fetch(`/api/query/create`, {
|
|
||||||
method: "POST",
|
|
||||||
headers: { "Content-Type": "application/json" },
|
|
||||||
body: JSON.stringify({ q: text, top_k: 5 }),
|
|
||||||
});
|
|
||||||
const createJson = await createRes.json();
|
|
||||||
const id = createJson.id;
|
|
||||||
if (!id) throw new Error("no id returned");
|
|
||||||
|
|
||||||
// poll status
|
|
||||||
let status = "Queued";
|
|
||||||
if (onMessage) onMessage("assistant", `Status: ${status}`);
|
|
||||||
while (status !== "Completed" && status !== "Failed") {
|
|
||||||
await new Promise((r) => setTimeout(r, 1000));
|
|
||||||
const sRes = await fetch(`/api/query/status?id=${id}`);
|
|
||||||
const sJson = await sRes.json();
|
|
||||||
status = sJson.status;
|
|
||||||
if (onMessage) onMessage("assistant", `Status: ${status}`);
|
|
||||||
if (status === "Cancelled") break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (status === "Completed") {
|
|
||||||
const resultRes = await fetch(`/api/query/result?id=${id}`);
|
|
||||||
const resultJson = await resultRes.json();
|
|
||||||
const final =
|
|
||||||
resultJson?.result?.final_answer ||
|
|
||||||
JSON.stringify(resultJson?.result || {});
|
|
||||||
if (onMessage) onMessage("assistant", final);
|
|
||||||
} else {
|
|
||||||
if (onMessage)
|
|
||||||
onMessage("assistant", `Query status ended as: ${status}`);
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.error(err);
|
|
||||||
if (onMessage) onMessage("assistant", `Error: ${err.message}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
setText("");
|
|
||||||
}
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="w-full flex justify-center">
|
|
||||||
<footer className="fixed bottom-6 max-w-3xl w-full px-4">
|
|
||||||
<div className="flex flex-col gap-4">
|
|
||||||
<div>
|
|
||||||
<DownButton></DownButton>
|
|
||||||
</div>
|
|
||||||
<form
|
|
||||||
onSubmit={handleSubmit}
|
|
||||||
className="bg-gray-900 rounded-2xl border-2 border-gray-800 shadow-lg shadow-indigo-600"
|
|
||||||
>
|
|
||||||
<div className="flex p-2 shadow-xl items-center">
|
|
||||||
<textarea
|
|
||||||
ref={textareaRef}
|
|
||||||
value={text}
|
|
||||||
onChange={(e) => {
|
|
||||||
if (disabled) return;
|
|
||||||
setText(e.target.value);
|
|
||||||
// auto-resize
|
|
||||||
const ta = textareaRef.current;
|
|
||||||
if (ta) {
|
|
||||||
ta.style.height = "auto";
|
|
||||||
ta.style.height = `${ta.scrollHeight}px`;
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
onKeyDown={(e) => {
|
|
||||||
// Enter to submit, Shift+Enter for newline
|
|
||||||
if (e.key === "Enter" && !e.shiftKey) {
|
|
||||||
e.preventDefault();
|
|
||||||
handleSubmit(e);
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
placeholder="Type a message..."
|
|
||||||
rows={1}
|
|
||||||
className="flex-1 mx-2 rounded-md shadow-2sx border-none focus:border-none focus:outline-none resize-none overflow-auto max-h-40"
|
|
||||||
disabled={disabled}
|
|
||||||
/>
|
|
||||||
<motion.button
|
|
||||||
type="submit"
|
|
||||||
className={`flex gap-2 px-4 py-2 bg-gray-700 rounded-xl ml-4 items-center ${
|
|
||||||
disabled ? "cursor-not-allowed" : ""
|
|
||||||
}`}
|
|
||||||
whileHover={disabled ? undefined : { scale: 1.1 }}
|
|
||||||
whileTap={disabled ? undefined : { scale: 0.9 }}
|
|
||||||
disabled={disabled}
|
|
||||||
style={{ opacity: disabled ? 0.5 : 1 }}
|
|
||||||
>
|
|
||||||
<BotMessageSquare />
|
|
||||||
</motion.button>
|
|
||||||
</div>
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
import { StrictMode } from "react";
|
|
||||||
import { createRoot } from "react-dom/client";
|
|
||||||
import "./index.css";
|
|
||||||
import App from "./app/index.jsx";
|
|
||||||
|
|
||||||
createRoot(document.getElementById("root")).render(
|
|
||||||
<StrictMode>
|
|
||||||
<App />
|
|
||||||
</StrictMode>
|
|
||||||
);
|
|
||||||
3032
rust-engine/Cargo.lock
generated
Normal file
3032
rust-engine/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
25
rust-engine/Cargo.toml
Normal file
25
rust-engine/Cargo.toml
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
# rust-engine/Cargo.toml
|
||||||
|
|
||||||
|
[package]
|
||||||
|
name = "rust-engine"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
tokio = { version = "1.38.0", features = ["full"] }
|
||||||
|
warp = { version = "0.4.2", features = ["server", "multipart"] }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
sqlx = { version = "0.8.6", features = ["runtime-tokio-rustls", "mysql", "chrono", "uuid", "macros"] }
|
||||||
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
|
tracing = "0.1"
|
||||||
|
tracing-subscriber = "0.3"
|
||||||
|
dotenvy = "0.15.7" # Switched from unmaintained 'dotenv'
|
||||||
|
anyhow = "1.0"
|
||||||
|
uuid = { version = "1", features = ["serde", "v4"] }
|
||||||
|
reqwest = { version = "0.12.24", features = ["json", "rustls-tls"] }
|
||||||
|
async-trait = "0.1"
|
||||||
|
tokio-util = "0.7"
|
||||||
|
futures-util = "0.3"
|
||||||
|
lazy_static = "1.4"
|
||||||
|
bytes = "1.4"
|
||||||
48
rust-engine/DEMODETAILS.md
Normal file
48
rust-engine/DEMODETAILS.md
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
## Demo Runbook: ISS Systems (3-minute showcase)
|
||||||
|
|
||||||
|
This demo uses ~20 public NASA PDFs covering ISS Electrical Power, ECLSS, Avionics, and Structures. They live in `rust-engine/demo-data` and are automatically ingested via the server.
|
||||||
|
|
||||||
|
### 1) Seed demo data (one-click)
|
||||||
|
|
||||||
|
- Trigger ingestion (cloud): POST `/api/files/import-demo` (UI button available when `?debug=1` is present)
|
||||||
|
- The backend copies PDFs into storage, inserts DB rows with `pending_analysis = true`, and the FileWorker processes them.
|
||||||
|
- Processing pipeline per file:
|
||||||
|
- Gemini Flash → comprehensive description (facts/keywords/components)
|
||||||
|
- Gemini Pro → deep vector graph data (keywords/use cases/relationships)
|
||||||
|
- Embed + upsert to Qdrant, mark file ready (`pending_analysis = false`)
|
||||||
|
|
||||||
|
Tip: You can list files at `GET /api/files/list`. Ready files will start to appear as analysis completes.
|
||||||
|
|
||||||
|
### 2) Showcase flow (suggested script)
|
||||||
|
|
||||||
|
1. “We ingested real ISS technical PDFs. The worker analyzes each file with Gemini and builds vector graph data for robust retrieval.”
|
||||||
|
2. Show the files list. Point out a couple of recognizable titles.
|
||||||
|
3. Run two queries (examples below) and open their results (the app calls `POST /api/query/create` then polls `/api/query/result`).
|
||||||
|
4. Highlight the grounded answer: ‘related_files’, ‘relationships’, and ‘final_answer’ fields.
|
||||||
|
5. Call out that if info isn’t present in the PDFs, the system explicitly states uncertainty (no guessing).
|
||||||
|
|
||||||
|
### 3) Demo queries (pick 2–3)
|
||||||
|
|
||||||
|
- Electrical Power System (EPS)
|
||||||
|
- “Trace the power path from the P6 solar array to the BCDU. Where are likely ground fault points?”
|
||||||
|
- “What is the role of the DC Switching Unit in array power management?”
|
||||||
|
- ECLSS
|
||||||
|
- “Which modules are part of water recovery, and how does the Oxygen Generator Assembly interface?”
|
||||||
|
- “Summarize the CDRA cycle and downstream subsystems it impacts.”
|
||||||
|
- C&DH / Avionics
|
||||||
|
- “In the US Lab, a blue/white wire connects to MDM ‘LAB1’. What are possible data pathways?”
|
||||||
|
- “Describe the onboard LAN segments and links to MDMs.”
|
||||||
|
- Structures / Robotics
|
||||||
|
- “Where does the Latching End Effector connect on S1 truss?”
|
||||||
|
- “What is the Mobile Transporter’s role in SSRMS operations?”
|
||||||
|
|
||||||
|
### 4) Reset/refresh (optional)
|
||||||
|
|
||||||
|
- POST `/api/files/import-demo?force=1` to overwrite by filename and re-queue analysis.
|
||||||
|
|
||||||
|
### Appendix: Example sources
|
||||||
|
|
||||||
|
- EPS: 20110014867, 20040171627, 19900007297, 20120002931, 20100029672
|
||||||
|
- ECLSS: 20170008316, 20070019910, 20080039691, 20100029191, 20070019929
|
||||||
|
- C&DH: 20000012543, 20100029690, 19950014639, 20010023477, 19980227289
|
||||||
|
- Structures/Robotics: 20020054238, 20010035542, 20140001008, Destiny fact sheet, 20020088289
|
||||||
90
rust-engine/Dockerfile
Normal file
90
rust-engine/Dockerfile
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
# syntax=docker/dockerfile:1.7
|
||||||
|
# rust-engine/Dockerfile
|
||||||
|
|
||||||
|
# --- Stage 1: Builder ---
|
||||||
|
# Use a stable Rust version
|
||||||
|
FROM rust:slim AS builder
|
||||||
|
WORKDIR /usr/src/app
|
||||||
|
|
||||||
|
# Install build dependencies needed for sqlx
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
pkg-config \
|
||||||
|
libssl-dev \
|
||||||
|
curl \
|
||||||
|
build-essential \
|
||||||
|
ca-certificates \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|
||||||
|
# Allow optional override of toolchain (e.g., nightly or a pinned version). Leave empty to use image default.
|
||||||
|
ARG RUSTUP_TOOLCHAIN=
|
||||||
|
|
||||||
|
# Use rustup and cargo from the official Rust image location
|
||||||
|
ENV PATH="/usr/local/cargo/bin:${PATH}"
|
||||||
|
|
||||||
|
# Copy manifest files first to leverage Docker layer caching for dependencies
|
||||||
|
COPY Cargo.toml Cargo.lock rust-toolchain.toml ./
|
||||||
|
|
||||||
|
# Ensure the pinned toolchain from rust-toolchain.toml (or provided ARG) is installed only if missing
|
||||||
|
RUN set -eux; \
|
||||||
|
if [ -n "${RUSTUP_TOOLCHAIN}" ]; then \
|
||||||
|
if ! rustup toolchain list | grep -q "^${RUSTUP_TOOLCHAIN}"; then \
|
||||||
|
rustup toolchain install "${RUSTUP_TOOLCHAIN}"; \
|
||||||
|
fi; \
|
||||||
|
rustup default "${RUSTUP_TOOLCHAIN}"; \
|
||||||
|
else \
|
||||||
|
if [ -f rust-toolchain.toml ]; then \
|
||||||
|
TOOLCHAIN=$(sed -n 's/^channel *= *"\(.*\)"/\1/p' rust-toolchain.toml | head -n1); \
|
||||||
|
if [ -n "$TOOLCHAIN" ]; then \
|
||||||
|
if ! rustup toolchain list | grep -q "^$TOOLCHAIN"; then \
|
||||||
|
rustup toolchain install "$TOOLCHAIN"; \
|
||||||
|
fi; \
|
||||||
|
rustup default "$TOOLCHAIN"; \
|
||||||
|
fi; \
|
||||||
|
fi; \
|
||||||
|
fi; \
|
||||||
|
rustup show active-toolchain || true
|
||||||
|
|
||||||
|
# Create a dummy src to allow cargo to download dependencies into the cache layer
|
||||||
|
RUN mkdir -p src && echo "fn main() { println!(\"cargo cache build\"); }" > src/main.rs
|
||||||
|
|
||||||
|
# Warm up dependency caches without compiling a dummy binary
|
||||||
|
RUN --mount=type=cache,target=/usr/local/cargo/registry,sharing=locked \
|
||||||
|
--mount=type=cache,target=/usr/local/cargo/git,sharing=locked \
|
||||||
|
cargo fetch
|
||||||
|
|
||||||
|
|
||||||
|
# Remove dummy main.rs before copying the real source
|
||||||
|
RUN rm -f src/main.rs
|
||||||
|
COPY src ./src
|
||||||
|
# Build the real binary
|
||||||
|
RUN cargo build --release --locked
|
||||||
|
|
||||||
|
# --- Stage 2: Final, small image ---
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
# Install only necessary runtime dependencies (no upgrade, just ca-certificates)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Add a non-root user for security
|
||||||
|
RUN useradd --system --uid 10001 --no-create-home --shell /usr/sbin/nologin appuser
|
||||||
|
|
||||||
|
# Copy the compiled binary from the builder stage
|
||||||
|
|
||||||
|
COPY --from=builder /usr/src/app/target/release/rust-engine /usr/local/bin/rust-engine
|
||||||
|
|
||||||
|
# Create writable storage and logs directories for appuser
|
||||||
|
RUN chown appuser:appuser /usr/local/bin/rust-engine \
|
||||||
|
&& mkdir -p /var/log /app/storage /app/demo-data \
|
||||||
|
&& touch /var/log/astra-errors.log \
|
||||||
|
&& chown -R appuser:appuser /var/log /app
|
||||||
|
|
||||||
|
# Set working directory to a writable location
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Switch to non-root user
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
# Redirect all output to /var/log/astra-errors.log for easy monitoring
|
||||||
|
ENTRYPOINT ["/bin/sh", "-c", "/usr/local/bin/rust-engine >> /var/log/astra-errors.log 2>&1"]
|
||||||
81
rust-engine/README.md
Normal file
81
rust-engine/README.md
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
# Rust Engine API and Worker
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
- HTTP API (warp) under /api for file management and query lifecycle
|
||||||
|
- MySQL for metadata, Qdrant for vector similarity
|
||||||
|
- Background worker resumes queued work and re-queues stale InProgress jobs at startup
|
||||||
|
|
||||||
|
## Environment variables
|
||||||
|
|
||||||
|
- DATABASE_URL: mysql://USER:PASS@HOST:3306/DB
|
||||||
|
- QDRANT_URL: default <http://qdrant:6333>
|
||||||
|
- GEMINI_API_KEY: used for Gemini content generation (optional in demo)
|
||||||
|
|
||||||
|
## Endpoints (JSON)
|
||||||
|
|
||||||
|
- POST /api/files (multipart)
|
||||||
|
- Form: file=@path
|
||||||
|
- Response: {"success": true}
|
||||||
|
|
||||||
|
- GET /api/files/list
|
||||||
|
- Response: {"files": [{"id","filename","path","description"}]}
|
||||||
|
|
||||||
|
- GET /api/files/delete?id=<file_id>
|
||||||
|
- Response: {"deleted": true|false}
|
||||||
|
|
||||||
|
- POST /api/query/create
|
||||||
|
- Body: {"q": "text", "top_k": 5}
|
||||||
|
- Response: {"id": "uuid"}
|
||||||
|
|
||||||
|
- GET /api/query/status?id=<query_id>
|
||||||
|
- Response: {"status": "Queued"|"InProgress"|"Completed"|"Cancelled"|"Failed"|"not_found"}
|
||||||
|
|
||||||
|
- GET /api/query/result?id=<query_id>
|
||||||
|
- Response (Completed):
|
||||||
|
{
|
||||||
|
"result": {
|
||||||
|
"summary": "Found N related files",
|
||||||
|
"related_files": [
|
||||||
|
{"id","filename","path","description","score"}
|
||||||
|
],
|
||||||
|
"relationships": "...",
|
||||||
|
"final_answer": "..."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- GET /api/query/cancel?id=<query_id>
|
||||||
|
- Response: {"cancelled": true}
|
||||||
|
|
||||||
|
## Worker behavior
|
||||||
|
|
||||||
|
- Ensures Qdrant collection exists (dim 64, cosine)
|
||||||
|
- Re-queues InProgress older than 10 minutes
|
||||||
|
- Processing stages:
|
||||||
|
1) Set InProgress
|
||||||
|
2) Embed query text (demo now; pluggable Gemini later)
|
||||||
|
3) Search Qdrant top_k (default 5)
|
||||||
|
4) Join file metadata (MySQL)
|
||||||
|
5) Gemini step: relationship analysis (strictly from provided files)
|
||||||
|
6) Gemini step: final answer (no speculation; say unknown if insufficient)
|
||||||
|
7) Persist result (JSON) and set Completed
|
||||||
|
- Checks for cancellation between stages
|
||||||
|
|
||||||
|
## Local quickstart
|
||||||
|
|
||||||
|
1. docker compose up -d mysql qdrant
|
||||||
|
2. set env DATABASE_URL and QDRANT_URL
|
||||||
|
3. cargo run
|
||||||
|
4. (optional) import demo PDFs
|
||||||
|
- Ensure demo files are located in `rust-engine/demo-data` (default) or set `DEMO_DATA_DIR` env var to a folder containing PDFs.
|
||||||
|
- Call the endpoint:
|
||||||
|
- POST <http://localhost:8000/api/files/import-demo>
|
||||||
|
- Optional query `?force=1` to overwrite existing by filename
|
||||||
|
- Or run the PowerShell helper:
|
||||||
|
- `./scripts/import_demo.ps1` (adds all PDFs in demo-data)
|
||||||
|
- `./scripts/import_demo.ps1 -Force` (overwrite existing)
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Replace demo embeddings with real Gemini calls for production
|
||||||
|
- Add auth to endpoints if needed (API key/JWT)
|
||||||
BIN
rust-engine/demo-data/132-1-Final.pdf
Normal file
BIN
rust-engine/demo-data/132-1-Final.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/179225main_iss_poster_back.pdf
Normal file
BIN
rust-engine/demo-data/179225main_iss_poster_back.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/19790004570.pdf
Normal file
BIN
rust-engine/demo-data/19790004570.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/19880012104.pdf
Normal file
BIN
rust-engine/demo-data/19880012104.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/19890016674.pdf
Normal file
BIN
rust-engine/demo-data/19890016674.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/19920015843.pdf
Normal file
BIN
rust-engine/demo-data/19920015843.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/19950014639.pdf
Normal file
BIN
rust-engine/demo-data/19950014639.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20040171627.pdf
Normal file
BIN
rust-engine/demo-data/20040171627.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20050207388.pdf
Normal file
BIN
rust-engine/demo-data/20050207388.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20050210002.pdf
Normal file
BIN
rust-engine/demo-data/20050210002.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20080014096.pdf
Normal file
BIN
rust-engine/demo-data/20080014096.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20100029672.pdf
Normal file
BIN
rust-engine/demo-data/20100029672.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20110014867.pdf
Normal file
BIN
rust-engine/demo-data/20110014867.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20120002931.pdf
Normal file
BIN
rust-engine/demo-data/20120002931.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20190028718.pdf
Normal file
BIN
rust-engine/demo-data/20190028718.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/20200003149.pdf
Normal file
BIN
rust-engine/demo-data/20200003149.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/473486main_iss_atcs_overview.pdf
Normal file
BIN
rust-engine/demo-data/473486main_iss_atcs_overview.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/8Mod6Prob1.pdf
Normal file
BIN
rust-engine/demo-data/8Mod6Prob1.pdf
Normal file
Binary file not shown.
BIN
rust-engine/demo-data/ICES_2023_311 final 5 15 23.pdf
Normal file
BIN
rust-engine/demo-data/ICES_2023_311 final 5 15 23.pdf
Normal file
Binary file not shown.
Binary file not shown.
4
rust-engine/rust-toolchain.toml
Normal file
4
rust-engine/rust-toolchain.toml
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
[toolchain]
|
||||||
|
channel = "1.88.0"
|
||||||
|
# components = ["rustfmt", "clippy"]
|
||||||
|
# targets = ["x86_64-unknown-linux-gnu"]
|
||||||
346
rust-engine/src/api.rs
Normal file
346
rust-engine/src/api.rs
Normal file
|
|
@ -0,0 +1,346 @@
|
||||||
|
use crate::vector_db::QdrantClient;
|
||||||
|
use crate::storage;
|
||||||
|
use anyhow::Result;
|
||||||
|
use bytes::Buf;
|
||||||
|
use futures_util::TryStreamExt;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use sqlx::{MySqlPool, Row};
|
||||||
|
use warp::{multipart::FormData, Filter, Rejection, Reply};
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
struct DeleteQuery {
|
||||||
|
id: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn routes(pool: MySqlPool) -> impl Filter<Extract = impl Reply, Error = Rejection> + Clone {
|
||||||
|
let pool_filter = warp::any().map(move || pool.clone());
|
||||||
|
|
||||||
|
// Import demo files from demo-data directory
|
||||||
|
let import_demo = warp::path!("files" / "import-demo")
|
||||||
|
.and(warp::post())
|
||||||
|
.and(
|
||||||
|
warp::query::<std::collections::HashMap<String, String>>()
|
||||||
|
.or(warp::any().map(|| std::collections::HashMap::new()))
|
||||||
|
.unify()
|
||||||
|
)
|
||||||
|
.and(pool_filter.clone())
|
||||||
|
.and_then(handle_import_demo);
|
||||||
|
|
||||||
|
// Upload file
|
||||||
|
let upload = warp::path("files")
|
||||||
|
.and(warp::post())
|
||||||
|
.and(warp::multipart::form().max_length(50_000_000)) // 50MB per part default; storage is filesystem-backed
|
||||||
|
.and(pool_filter.clone())
|
||||||
|
.and_then(handle_upload);
|
||||||
|
|
||||||
|
// Delete file
|
||||||
|
let delete = warp::path!("files" / "delete")
|
||||||
|
.and(warp::get())
|
||||||
|
.and(warp::query::<DeleteQuery>())
|
||||||
|
.and(pool_filter.clone())
|
||||||
|
.and_then(handle_delete);
|
||||||
|
|
||||||
|
// List files
|
||||||
|
let list = warp::path!("files" / "list")
|
||||||
|
.and(warp::get())
|
||||||
|
.and(pool_filter.clone())
|
||||||
|
.and_then(handle_list);
|
||||||
|
|
||||||
|
// Create query
|
||||||
|
let create_q = warp::path!("query" / "create")
|
||||||
|
.and(warp::post())
|
||||||
|
.and(warp::body::json())
|
||||||
|
.and(pool_filter.clone())
|
||||||
|
.and_then(handle_create_query);
|
||||||
|
|
||||||
|
// Query status
|
||||||
|
let status = warp::path!("query" / "status")
|
||||||
|
.and(warp::get())
|
||||||
|
.and(warp::query::<DeleteQuery>())
|
||||||
|
.and(pool_filter.clone())
|
||||||
|
.and_then(handle_query_status);
|
||||||
|
|
||||||
|
// Query result
|
||||||
|
let result = warp::path!("query" / "result")
|
||||||
|
.and(warp::get())
|
||||||
|
.and(warp::query::<DeleteQuery>())
|
||||||
|
.and(pool_filter.clone())
|
||||||
|
.and_then(handle_query_result);
|
||||||
|
|
||||||
|
// Cancel
|
||||||
|
let cancel = warp::path!("query" / "cancel")
|
||||||
|
.and(warp::get())
|
||||||
|
.and(warp::query::<DeleteQuery>())
|
||||||
|
.and(pool_filter.clone())
|
||||||
|
.and_then(handle_cancel_query);
|
||||||
|
|
||||||
|
let api = upload.or(import_demo).or(delete).or(list).or(create_q).or(status).or(result).or(cancel);
|
||||||
|
warp::path("api").and(api)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_upload(mut form: FormData, pool: MySqlPool) -> Result<impl Reply, Rejection> {
|
||||||
|
let mut created_files = Vec::new();
|
||||||
|
while let Some(field) = form.try_next().await.map_err(|_| warp::reject())? {
|
||||||
|
let _name = field.name().to_string();
|
||||||
|
let filename = field
|
||||||
|
.filename()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.unwrap_or_else(|| format!("upload-{}", uuid::Uuid::new_v4()));
|
||||||
|
|
||||||
|
// Read stream of Buf into a Vec<u8>
|
||||||
|
let data = field
|
||||||
|
.stream()
|
||||||
|
.map_ok(|mut buf| {
|
||||||
|
let mut v = Vec::new();
|
||||||
|
while buf.has_remaining() {
|
||||||
|
let chunk = buf.chunk();
|
||||||
|
v.extend_from_slice(chunk);
|
||||||
|
let n = chunk.len();
|
||||||
|
buf.advance(n);
|
||||||
|
}
|
||||||
|
v
|
||||||
|
})
|
||||||
|
.try_fold(Vec::new(), |mut acc, chunk_vec| async move {
|
||||||
|
acc.extend_from_slice(&chunk_vec);
|
||||||
|
Ok(acc)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|_| warp::reject())?;
|
||||||
|
|
||||||
|
// Save file
|
||||||
|
let path = storage::save_file(&filename, &data).map_err(|_| warp::reject())?;
|
||||||
|
|
||||||
|
// Insert file record with pending_analysis = true, description = NULL
|
||||||
|
let id = uuid::Uuid::new_v4().to_string();
|
||||||
|
sqlx::query("INSERT INTO files (id, filename, path, description, pending_analysis, analysis_status) VALUES (?, ?, ?, ?, ?, 'Queued')")
|
||||||
|
.bind(&id)
|
||||||
|
.bind(&filename)
|
||||||
|
.bind(path.to_str().unwrap())
|
||||||
|
.bind(Option::<String>::None)
|
||||||
|
.bind(true)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
tracing::error!("DB insert error: {}", e);
|
||||||
|
warp::reject()
|
||||||
|
})?;
|
||||||
|
created_files.push(serde_json::json!({
|
||||||
|
"id": id,
|
||||||
|
"filename": filename,
|
||||||
|
"pending_analysis": true,
|
||||||
|
"analysis_status": "Queued"
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(warp::reply::json(&serde_json::json!({
|
||||||
|
"uploaded": created_files.len(),
|
||||||
|
"files": created_files
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_import_demo(params: std::collections::HashMap<String, String>, pool: MySqlPool) -> Result<impl Reply, Rejection> {
|
||||||
|
use std::fs;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
let force = params.get("force").map(|v| v == "1" || v.eq_ignore_ascii_case("true")).unwrap_or(false);
|
||||||
|
let demo_dir_setting = std::env::var("DEMO_DATA_DIR").unwrap_or_else(|_| "demo-data".to_string());
|
||||||
|
let base = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
|
||||||
|
|
||||||
|
// Build a list of plausible demo-data locations so local runs and containers both work.
|
||||||
|
let mut candidates: Vec<PathBuf> = Vec::new();
|
||||||
|
let configured = PathBuf::from(&demo_dir_setting);
|
||||||
|
let mut push_candidate = |path: PathBuf| {
|
||||||
|
if !candidates.iter().any(|existing| existing == &path) {
|
||||||
|
candidates.push(path);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
push_candidate(base.join(&configured));
|
||||||
|
push_candidate(PathBuf::from(&demo_dir_setting));
|
||||||
|
push_candidate(base.join("rust-engine").join(&configured));
|
||||||
|
push_candidate(base.join("rust-engine").join("demo-data"));
|
||||||
|
push_candidate(base.join("demo-data"));
|
||||||
|
if let Ok(exe_path) = std::env::current_exe() {
|
||||||
|
if let Some(exe_dir) = exe_path.parent() {
|
||||||
|
push_candidate(exe_dir.join(&configured));
|
||||||
|
push_candidate(exe_dir.join("demo-data"));
|
||||||
|
push_candidate(exe_dir.join("rust-engine").join(&configured));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut attempted: Vec<PathBuf> = Vec::new();
|
||||||
|
let mut resolved_dir: Option<PathBuf> = None;
|
||||||
|
for candidate in candidates {
|
||||||
|
if candidate.exists() && candidate.is_dir() {
|
||||||
|
resolved_dir = Some(candidate);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
attempted.push(candidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
let src_dir = match resolved_dir {
|
||||||
|
Some(path) => path,
|
||||||
|
None => {
|
||||||
|
let attempted_paths: Vec<String> = attempted
|
||||||
|
.into_iter()
|
||||||
|
.map(|p| p.display().to_string())
|
||||||
|
.collect();
|
||||||
|
return Ok(warp::reply::json(&serde_json::json!({
|
||||||
|
"imported": 0,
|
||||||
|
"skipped": 0,
|
||||||
|
"error": format!("demo dir not found (checked: {})", attempted_paths.join(", "))
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let mut imported = 0;
|
||||||
|
let mut skipped = 0;
|
||||||
|
for entry in fs::read_dir(&src_dir).map_err(|_| warp::reject())? {
|
||||||
|
let entry = entry.map_err(|_| warp::reject())?;
|
||||||
|
let path = entry.path();
|
||||||
|
if path.extension().and_then(|e| e.to_str()).map(|e| e.eq_ignore_ascii_case("pdf")).unwrap_or(false) {
|
||||||
|
let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("unknown.pdf").to_string();
|
||||||
|
|
||||||
|
// check if exists
|
||||||
|
if !force {
|
||||||
|
if let Some(_) = sqlx::query("SELECT id FROM files WHERE filename = ?")
|
||||||
|
.bind(&filename)
|
||||||
|
.fetch_optional(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|_| warp::reject())? {
|
||||||
|
skipped += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// read and save to storage
|
||||||
|
let data = fs::read(&path).map_err(|_| warp::reject())?;
|
||||||
|
let stored_path = storage::save_file(&filename, &data).map_err(|_| warp::reject())?;
|
||||||
|
|
||||||
|
// insert or upsert db record
|
||||||
|
let id = uuid::Uuid::new_v4().to_string();
|
||||||
|
if force {
|
||||||
|
let _ = sqlx::query("DELETE FROM files WHERE filename = ?")
|
||||||
|
.bind(&filename)
|
||||||
|
.execute(&pool)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
sqlx::query("INSERT INTO files (id, filename, path, description, pending_analysis, analysis_status) VALUES (?, ?, ?, ?, ?, 'Queued')")
|
||||||
|
.bind(&id)
|
||||||
|
.bind(&filename)
|
||||||
|
.bind(stored_path.to_str().unwrap())
|
||||||
|
.bind(Option::<String>::None)
|
||||||
|
.bind(true)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
tracing::error!("DB insert error: {}", e);
|
||||||
|
warp::reject()
|
||||||
|
})?;
|
||||||
|
imported += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(warp::reply::json(&serde_json::json!({ "imported": imported, "skipped": skipped })))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_delete(q: DeleteQuery, pool: MySqlPool) -> Result<impl Reply, Rejection> {
|
||||||
|
if let Some(row) = sqlx::query("SELECT path FROM files WHERE id = ?")
|
||||||
|
.bind(&q.id)
|
||||||
|
.fetch_optional(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|_| warp::reject())?
|
||||||
|
{
|
||||||
|
let path: String = row.get("path");
|
||||||
|
let _ = storage::delete_file(std::path::Path::new(&path));
|
||||||
|
// Remove from Qdrant
|
||||||
|
let qdrant_url = std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://qdrant:6333".to_string());
|
||||||
|
let qdrant = QdrantClient::new(&qdrant_url);
|
||||||
|
let _ = qdrant.delete_point(&q.id).await;
|
||||||
|
let _ = sqlx::query("DELETE FROM files WHERE id = ?").bind(&q.id).execute(&pool).await;
|
||||||
|
return Ok(warp::reply::json(&serde_json::json!({"deleted": true})));
|
||||||
|
}
|
||||||
|
Ok(warp::reply::json(&serde_json::json!({"deleted": false})))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_list(pool: MySqlPool) -> Result<impl Reply, Rejection> {
|
||||||
|
let rows = sqlx::query("SELECT id, filename, path, description, pending_analysis, analysis_status FROM files ORDER BY created_at DESC LIMIT 500")
|
||||||
|
.fetch_all(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
tracing::error!("DB list error: {}", e);
|
||||||
|
warp::reject()
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let files: Vec<serde_json::Value> = rows
|
||||||
|
.into_iter()
|
||||||
|
.map(|r| {
|
||||||
|
let id: String = r.get("id");
|
||||||
|
let filename: String = r.get("filename");
|
||||||
|
let path: String = r.get("path");
|
||||||
|
let description: Option<String> = r.get("description");
|
||||||
|
let pending: bool = r.get("pending_analysis");
|
||||||
|
let status: Option<String> = r.try_get("analysis_status").ok();
|
||||||
|
serde_json::json!({
|
||||||
|
"id": id,
|
||||||
|
"filename": filename,
|
||||||
|
"path": path,
|
||||||
|
"description": description,
|
||||||
|
"pending_analysis": pending,
|
||||||
|
"analysis_status": status
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(warp::reply::json(&serde_json::json!({"files": files})))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_create_query(body: serde_json::Value, pool: MySqlPool) -> Result<impl Reply, Rejection> {
|
||||||
|
// Insert query as queued, worker will pick it up
|
||||||
|
let id = uuid::Uuid::new_v4().to_string();
|
||||||
|
let payload = body;
|
||||||
|
sqlx::query("INSERT INTO queries (id, status, payload) VALUES (?, 'Queued', ?)")
|
||||||
|
.bind(&id)
|
||||||
|
.bind(payload)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|e| {
|
||||||
|
tracing::error!("DB insert query error: {}", e);
|
||||||
|
warp::reject()
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(warp::reply::json(&serde_json::json!({"id": id})))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_query_status(q: DeleteQuery, pool: MySqlPool) -> Result<impl Reply, Rejection> {
|
||||||
|
if let Some(row) = sqlx::query("SELECT status FROM queries WHERE id = ?")
|
||||||
|
.bind(&q.id)
|
||||||
|
.fetch_optional(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|_| warp::reject())?
|
||||||
|
{
|
||||||
|
let status: String = row.get("status");
|
||||||
|
return Ok(warp::reply::json(&serde_json::json!({"status": status})));
|
||||||
|
}
|
||||||
|
Ok(warp::reply::json(&serde_json::json!({"status": "not_found"})))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_query_result(q: DeleteQuery, pool: MySqlPool) -> Result<impl Reply, Rejection> {
|
||||||
|
if let Some(row) = sqlx::query("SELECT result FROM queries WHERE id = ?")
|
||||||
|
.bind(&q.id)
|
||||||
|
.fetch_optional(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|_| warp::reject())?
|
||||||
|
{
|
||||||
|
let result: Option<serde_json::Value> = row.get("result");
|
||||||
|
return Ok(warp::reply::json(&serde_json::json!({"result": result})));
|
||||||
|
}
|
||||||
|
Ok(warp::reply::json(&serde_json::json!({"result": null})))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_cancel_query(q: DeleteQuery, pool: MySqlPool) -> Result<impl Reply, Rejection> {
|
||||||
|
// Mark as cancelled; worker must check status before heavy steps
|
||||||
|
sqlx::query("UPDATE queries SET status = 'Cancelled' WHERE id = ?")
|
||||||
|
.bind(&q.id)
|
||||||
|
.execute(&pool)
|
||||||
|
.await
|
||||||
|
.map_err(|_| warp::reject())?;
|
||||||
|
Ok(warp::reply::json(&serde_json::json!({"cancelled": true})))
|
||||||
|
}
|
||||||
42
rust-engine/src/db.rs
Normal file
42
rust-engine/src/db.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
use sqlx::MySqlPool;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
pub async fn init_db(database_url: &str) -> Result<MySqlPool, sqlx::Error> {
|
||||||
|
let pool = MySqlPool::connect(database_url).await?;
|
||||||
|
|
||||||
|
// Create tables if they don't exist. Simple schema for demo/hackathon use.
|
||||||
|
// Note: MySQL requires separate statements for each CREATE TABLE
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
CREATE TABLE IF NOT EXISTS files (
|
||||||
|
id VARCHAR(36) PRIMARY KEY,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
pending_analysis BOOLEAN DEFAULT TRUE,
|
||||||
|
analysis_status VARCHAR(32) DEFAULT 'Queued'
|
||||||
|
)
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.execute(&pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
sqlx::query(
|
||||||
|
r#"
|
||||||
|
CREATE TABLE IF NOT EXISTS queries (
|
||||||
|
id VARCHAR(36) PRIMARY KEY,
|
||||||
|
status VARCHAR(32) NOT NULL,
|
||||||
|
payload JSON,
|
||||||
|
result JSON,
|
||||||
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.execute(&pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
info!("Database initialized");
|
||||||
|
Ok(pool)
|
||||||
|
}
|
||||||
133
rust-engine/src/file_worker.rs
Normal file
133
rust-engine/src/file_worker.rs
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
use crate::gemini_client::{demo_text_embedding, generate_text_with_model, DEMO_EMBED_DIM};
|
||||||
|
use crate::vector;
|
||||||
|
use crate::vector_db::QdrantClient;
|
||||||
|
use sqlx::MySqlPool;
|
||||||
|
use anyhow::Result;
|
||||||
|
use tracing::{info, error};
|
||||||
|
|
||||||
|
pub struct FileWorker {
|
||||||
|
pool: MySqlPool,
|
||||||
|
qdrant: QdrantClient,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileWorker {
|
||||||
|
pub fn new(pool: MySqlPool) -> Self {
|
||||||
|
let qdrant_url = std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://qdrant:6333".to_string());
|
||||||
|
let qdrant = QdrantClient::new(&qdrant_url);
|
||||||
|
Self { pool, qdrant }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn run(&self) {
|
||||||
|
info!("FileWorker starting");
|
||||||
|
if let Err(e) = self.qdrant.ensure_files_collection(DEMO_EMBED_DIM).await {
|
||||||
|
error!("Failed to ensure Qdrant collection: {}", e);
|
||||||
|
}
|
||||||
|
loop {
|
||||||
|
match self.fetch_and_claim().await {
|
||||||
|
Ok(Some(fid)) => {
|
||||||
|
info!("Processing file {}", fid);
|
||||||
|
if let Err(e) = self.process_file(&fid).await {
|
||||||
|
error!("Error processing file {}: {}", fid, e);
|
||||||
|
if let Err(mark_err) = self.mark_failed(&fid, &format!("{}", e)).await {
|
||||||
|
error!("Failed to mark file {} as failed: {}", fid, mark_err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("FileWorker fetch error: {}", e);
|
||||||
|
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_and_claim(&self) -> Result<Option<String>> {
|
||||||
|
// Claim files that are queued or stuck in progress for >10min
|
||||||
|
if let Some(row) = sqlx::query(
|
||||||
|
"SELECT id FROM files WHERE (analysis_status = 'Queued' OR (analysis_status = 'InProgress' AND created_at < (NOW() - INTERVAL 10 MINUTE))) AND pending_analysis = TRUE LIMIT 1"
|
||||||
|
)
|
||||||
|
.fetch_optional(&self.pool)
|
||||||
|
.await? {
|
||||||
|
use sqlx::Row;
|
||||||
|
let id: String = row.get("id");
|
||||||
|
// Mark as in-progress
|
||||||
|
let _ = sqlx::query("UPDATE files SET analysis_status = 'InProgress' WHERE id = ?")
|
||||||
|
.bind(&id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
Ok(Some(id))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_file(&self, file_id: &str) -> Result<()> {
|
||||||
|
use sqlx::Row;
|
||||||
|
let row = sqlx::query("SELECT filename, path FROM files WHERE id = ?")
|
||||||
|
.bind(file_id)
|
||||||
|
.fetch_one(&self.pool)
|
||||||
|
.await?;
|
||||||
|
let filename: String = row.get("filename");
|
||||||
|
let _path: String = row.get("path");
|
||||||
|
|
||||||
|
// Stage 1: Gemini 2.5 Flash for description
|
||||||
|
let desc = generate_text_with_model(
|
||||||
|
"gemini-2.5-flash",
|
||||||
|
&format!(
|
||||||
|
"Describe the file '{filename}' and extract all key components, keywords, and details for later vectorization. Be comprehensive and factual."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap_or_else(|e| format!("[desc error: {}]", e));
|
||||||
|
sqlx::query("UPDATE files SET description = ?, analysis_status = 'InProgress' WHERE id = ?")
|
||||||
|
.bind(&desc)
|
||||||
|
.bind(file_id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// Stage 2: Gemini 2.5 Pro for deep vector graph data
|
||||||
|
let vector_graph = generate_text_with_model(
|
||||||
|
"gemini-2.5-pro",
|
||||||
|
&format!(
|
||||||
|
"Given the file '{filename}' and its description: {desc}\nGenerate a set of vector graph data (keywords, use cases, relationships) that can be used for broad and precise search. Only include what is directly supported by the file."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap_or_else(|e| format!("[vector error: {}]", e));
|
||||||
|
|
||||||
|
// Stage 3: Embed and upsert to Qdrant
|
||||||
|
let emb = demo_text_embedding(&vector_graph).await?;
|
||||||
|
match self.qdrant.upsert_point(file_id, emb.clone()).await {
|
||||||
|
Ok(_) => {
|
||||||
|
let _ = vector::store_embedding(file_id, emb.clone());
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
error!("Qdrant upsert failed for {}: {}", file_id, err);
|
||||||
|
let _ = vector::store_embedding(file_id, emb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark file as ready
|
||||||
|
sqlx::query("UPDATE files SET pending_analysis = FALSE, analysis_status = 'Completed' WHERE id = ?")
|
||||||
|
.bind(file_id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn mark_failed(&self, file_id: &str, reason: &str) -> Result<()> {
|
||||||
|
sqlx::query("UPDATE files SET analysis_status = 'Failed', pending_analysis = TRUE WHERE id = ?")
|
||||||
|
.bind(file_id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
sqlx::query("UPDATE files SET description = COALESCE(description, ?) WHERE id = ?")
|
||||||
|
.bind(format!("[analysis failed: {}]", reason))
|
||||||
|
.bind(file_id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
89
rust-engine/src/gemini_client.rs
Normal file
89
rust-engine/src/gemini_client.rs
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
// NOTE: This file provides lightweight helpers around the Gemini API. For the
|
||||||
|
// hackathon demo we fall back to deterministic strings when the API key is not
|
||||||
|
// configured so the flows still work end-to-end.
|
||||||
|
|
||||||
|
pub const DEMO_EMBED_DIM: usize = 64;
|
||||||
|
|
||||||
|
/// Demo text embedding (replace with real Gemini text embedding API)
|
||||||
|
pub async fn demo_text_embedding(text: &str) -> Result<Vec<f32>> {
|
||||||
|
let mut v = vec![0f32; DEMO_EMBED_DIM];
|
||||||
|
for (i, b) in text.as_bytes().iter().enumerate() {
|
||||||
|
let idx = i % v.len();
|
||||||
|
v[idx] += (*b as f32) / 255.0;
|
||||||
|
}
|
||||||
|
Ok(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate text using the default model (GEMINI_MODEL or gemini-2.5-pro).
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub async fn generate_text(prompt: &str) -> Result<String> {
|
||||||
|
let model = std::env::var("GEMINI_MODEL").unwrap_or_else(|_| "gemini-2.5-pro".to_string());
|
||||||
|
generate_text_with_model(&model, prompt).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate text with an explicit Gemini model. Falls back to a deterministic
|
||||||
|
/// response when the API key is not set so the demo still runs.
|
||||||
|
pub async fn generate_text_with_model(model: &str, prompt: &str) -> Result<String> {
|
||||||
|
let api_key = match std::env::var("GEMINI_API_KEY") {
|
||||||
|
Ok(k) if !k.is_empty() => k,
|
||||||
|
_ => {
|
||||||
|
return Ok(format!(
|
||||||
|
"[demo] Gemini ({}) not configured. Prompt preview: {}",
|
||||||
|
model,
|
||||||
|
truncate(prompt, 240)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let url = format!(
|
||||||
|
"https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent?key={}",
|
||||||
|
model, api_key
|
||||||
|
);
|
||||||
|
|
||||||
|
let body = json!({
|
||||||
|
"contents": [ { "parts": [ { "text": prompt } ] } ]
|
||||||
|
});
|
||||||
|
|
||||||
|
let client = Client::new();
|
||||||
|
let resp = client.post(&url).json(&body).send().await?;
|
||||||
|
let status = resp.status();
|
||||||
|
let txt = resp.text().await?;
|
||||||
|
if !status.is_success() {
|
||||||
|
return Ok(format!(
|
||||||
|
"[demo] Gemini ({}) error {}: {}",
|
||||||
|
model,
|
||||||
|
status,
|
||||||
|
truncate(&txt, 240)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct Part { text: Option<String> }
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct Content { parts: Vec<Part> }
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct Candidate { content: Content }
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct Response { candidates: Option<Vec<Candidate>> }
|
||||||
|
|
||||||
|
let data: Response = serde_json::from_str(&txt).unwrap_or(Response { candidates: None });
|
||||||
|
let out = data
|
||||||
|
.candidates
|
||||||
|
.and_then(|mut v| v.pop())
|
||||||
|
.and_then(|c| c.content.parts.into_iter().find_map(|p| p.text))
|
||||||
|
.unwrap_or_else(|| "[demo] Gemini returned empty response".to_string());
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn truncate(s: &str, max: usize) -> String {
|
||||||
|
if s.len() <= max {
|
||||||
|
s.to_string()
|
||||||
|
} else {
|
||||||
|
format!("{}…", &s[..max])
|
||||||
|
}
|
||||||
|
}
|
||||||
58
rust-engine/src/main.rs
Normal file
58
rust-engine/src/main.rs
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
mod file_worker;
|
||||||
|
mod api;
|
||||||
|
mod db;
|
||||||
|
mod gemini_client;
|
||||||
|
mod models;
|
||||||
|
mod storage;
|
||||||
|
mod vector;
|
||||||
|
mod worker;
|
||||||
|
mod vector_db;
|
||||||
|
|
||||||
|
use std::env;
|
||||||
|
use std::error::Error;
|
||||||
|
use tracing::info;
|
||||||
|
use warp::Filter;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
// Initialize tracing
|
||||||
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
|
// Load environment variables
|
||||||
|
dotenvy::dotenv().ok();
|
||||||
|
|
||||||
|
let database_url = env::var("DATABASE_URL")
|
||||||
|
.unwrap_or_else(|_| "mysql://astraadmin:password@mysql:3306/astra".to_string());
|
||||||
|
|
||||||
|
info!("Starting Rust Engine...");
|
||||||
|
|
||||||
|
// Ensure storage dir
|
||||||
|
storage::ensure_storage_dir().expect("storage dir");
|
||||||
|
|
||||||
|
// Initialize DB
|
||||||
|
let pool = db::init_db(&database_url).await.map_err(|e| -> Box<dyn Error> { Box::new(e) })?;
|
||||||
|
|
||||||
|
// Spawn query worker
|
||||||
|
let worker = worker::Worker::new(pool.clone());
|
||||||
|
tokio::spawn(async move { worker.run().await });
|
||||||
|
|
||||||
|
// Spawn file analysis worker
|
||||||
|
let file_worker = file_worker::FileWorker::new(pool.clone());
|
||||||
|
tokio::spawn(async move { file_worker.run().await });
|
||||||
|
|
||||||
|
// API routes
|
||||||
|
let api_routes = api::routes(pool.clone())
|
||||||
|
.with(warp::cors()
|
||||||
|
.allow_any_origin()
|
||||||
|
.allow_headers(vec!["content-type", "authorization"])
|
||||||
|
.allow_methods(vec!["GET", "POST", "PUT", "DELETE", "OPTIONS"]))
|
||||||
|
.with(warp::log("rust_engine"));
|
||||||
|
|
||||||
|
info!("Rust Engine started on http://0.0.0.0:8000");
|
||||||
|
|
||||||
|
warp::serve(api_routes)
|
||||||
|
.run(([0, 0, 0, 0], 8000))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
61
rust-engine/src/models.rs
Normal file
61
rust-engine/src/models.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
use chrono::{DateTime, Utc};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
|
pub struct FileRecord {
|
||||||
|
pub id: String,
|
||||||
|
pub filename: String,
|
||||||
|
pub path: String,
|
||||||
|
pub description: Option<String>,
|
||||||
|
pub created_at: Option<DateTime<Utc>>,
|
||||||
|
pub pending_analysis: bool, // true if file is not yet ready for search
|
||||||
|
pub analysis_status: String, // 'Queued', 'InProgress', 'Completed', 'Failed'
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileRecord {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn new(filename: impl Into<String>, path: impl Into<String>, description: Option<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
id: Uuid::new_v4().to_string(),
|
||||||
|
filename: filename.into(),
|
||||||
|
path: path.into(),
|
||||||
|
description,
|
||||||
|
created_at: None,
|
||||||
|
pending_analysis: true,
|
||||||
|
analysis_status: "Queued".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
|
pub enum QueryStatus {
|
||||||
|
Queued,
|
||||||
|
InProgress,
|
||||||
|
Completed,
|
||||||
|
Cancelled,
|
||||||
|
Failed,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||||
|
pub struct QueryRecord {
|
||||||
|
pub id: String,
|
||||||
|
pub status: QueryStatus,
|
||||||
|
pub payload: serde_json::Value,
|
||||||
|
pub result: Option<serde_json::Value>,
|
||||||
|
pub created_at: Option<DateTime<Utc>>,
|
||||||
|
pub updated_at: Option<DateTime<Utc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueryRecord {
|
||||||
|
pub fn new(payload: serde_json::Value) -> Self {
|
||||||
|
Self {
|
||||||
|
id: Uuid::new_v4().to_string(),
|
||||||
|
status: QueryStatus::Queued,
|
||||||
|
payload,
|
||||||
|
result: None,
|
||||||
|
created_at: None,
|
||||||
|
updated_at: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
34
rust-engine/src/storage.rs
Normal file
34
rust-engine/src/storage.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use std::fs;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
pub fn storage_dir() -> PathBuf {
|
||||||
|
std::env::var("ASTRA_STORAGE")
|
||||||
|
.map(PathBuf::from)
|
||||||
|
.unwrap_or_else(|_| PathBuf::from("/app/storage"))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn ensure_storage_dir() -> Result<()> {
|
||||||
|
let dir = storage_dir();
|
||||||
|
if !dir.exists() {
|
||||||
|
fs::create_dir_all(&dir)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn save_file(filename: &str, contents: &[u8]) -> Result<PathBuf> {
|
||||||
|
ensure_storage_dir()?;
|
||||||
|
let mut path = storage_dir();
|
||||||
|
path.push(filename);
|
||||||
|
let mut f = fs::File::create(&path)?;
|
||||||
|
f.write_all(contents)?;
|
||||||
|
Ok(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delete_file(path: &Path) -> Result<()> {
|
||||||
|
if path.exists() {
|
||||||
|
fs::remove_file(path)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
24
rust-engine/src/vector.rs
Normal file
24
rust-engine/src/vector.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref VECTOR_STORE: Mutex<HashMap<String, Vec<f32>>> = Mutex::new(HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn store_embedding(id: &str, emb: Vec<f32>) -> Result<()> {
|
||||||
|
let mut s = VECTOR_STORE.lock().unwrap();
|
||||||
|
s.insert(id.to_string(), emb);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn query_top_k(_query_emb: &[f32], k: usize) -> Result<Vec<String>> {
|
||||||
|
// Very naive: return up to k ids from the store.
|
||||||
|
let s = VECTOR_STORE.lock().unwrap();
|
||||||
|
let mut out = Vec::new();
|
||||||
|
for key in s.keys().take(k) {
|
||||||
|
out.push(key.clone());
|
||||||
|
}
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
103
rust-engine/src/vector_db.rs
Normal file
103
rust-engine/src/vector_db.rs
Normal file
|
|
@ -0,0 +1,103 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use reqwest::Client;
|
||||||
|
use serde_json::json;
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct QdrantClient {
|
||||||
|
base: String,
|
||||||
|
client: Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QdrantClient {
|
||||||
|
|
||||||
|
/// Delete a point from collection 'files' by id
|
||||||
|
pub async fn delete_point(&self, id: &str) -> Result<()> {
|
||||||
|
let url = format!("{}/collections/files/points/delete", self.base);
|
||||||
|
let body = json!({
|
||||||
|
"points": [id]
|
||||||
|
});
|
||||||
|
let resp = self.client.post(&url).json(&body).send().await?;
|
||||||
|
let status = resp.status();
|
||||||
|
if status.is_success() {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
let t = resp.text().await.unwrap_or_default();
|
||||||
|
Err(anyhow::anyhow!("qdrant delete failed: {} - {}", status, t))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn new(base: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
base: base.trim_end_matches('/').to_string(),
|
||||||
|
client: Client::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Upsert a point into collection `files` with id and vector
|
||||||
|
pub async fn upsert_point(&self, id: &str, vector: Vec<f32>) -> Result<()> {
|
||||||
|
let url = format!("{}/collections/files/points", self.base);
|
||||||
|
let body = json!({
|
||||||
|
"points": [{
|
||||||
|
"id": id,
|
||||||
|
"vector": vector,
|
||||||
|
"payload": {"type": "file"}
|
||||||
|
}]
|
||||||
|
});
|
||||||
|
|
||||||
|
let resp = self.client.post(&url).json(&body).send().await?;
|
||||||
|
let status = resp.status();
|
||||||
|
if status.is_success() {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
let t = resp.text().await.unwrap_or_default();
|
||||||
|
Err(anyhow::anyhow!("qdrant upsert failed: {} - {}", status, t))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ensure the 'files' collection exists with the given dimension and distance metric
|
||||||
|
pub async fn ensure_files_collection(&self, dim: usize) -> Result<()> {
|
||||||
|
let url = format!("{}/collections/files", self.base);
|
||||||
|
let body = json!({
|
||||||
|
"vectors": {"size": dim, "distance": "Cosine"}
|
||||||
|
});
|
||||||
|
let resp = self.client.put(&url).json(&body).send().await?;
|
||||||
|
// 200 OK or 201 Created means ready; 409 Conflict means already exists
|
||||||
|
if resp.status().is_success() || resp.status().as_u16() == 409 {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
let status = resp.status();
|
||||||
|
let t = resp.text().await.unwrap_or_default();
|
||||||
|
Err(anyhow::anyhow!("qdrant ensure collection failed: {} - {}", status, t))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Search top-k nearest points from 'files', return (id, score)
|
||||||
|
pub async fn search_top_k(&self, vector: Vec<f32>, k: usize) -> Result<Vec<(String, f32)>> {
|
||||||
|
let url = format!("{}/collections/files/points/search", self.base);
|
||||||
|
let body = json!({
|
||||||
|
"vector": vector,
|
||||||
|
"limit": k
|
||||||
|
});
|
||||||
|
let resp = self.client.post(&url).json(&body).send().await?;
|
||||||
|
let status = resp.status();
|
||||||
|
if !status.is_success() {
|
||||||
|
let t = resp.text().await.unwrap_or_default();
|
||||||
|
return Err(anyhow::anyhow!("qdrant search failed: {} - {}", status, t));
|
||||||
|
}
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct Hit { id: serde_json::Value, score: f32 }
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct Data { result: Vec<Hit> }
|
||||||
|
let data: Data = resp.json().await?;
|
||||||
|
let mut out = Vec::new();
|
||||||
|
for h in data.result {
|
||||||
|
// id can be string or number; handle string
|
||||||
|
if let Some(s) = h.id.as_str() {
|
||||||
|
out.push((s.to_string(), h.score));
|
||||||
|
} else {
|
||||||
|
out.push((h.id.to_string(), h.score));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
}
|
||||||
266
rust-engine/src/worker.rs
Normal file
266
rust-engine/src/worker.rs
Normal file
|
|
@ -0,0 +1,266 @@
|
||||||
|
use crate::gemini_client::{demo_text_embedding, generate_text_with_model, DEMO_EMBED_DIM};
|
||||||
|
use crate::models::{QueryRecord, QueryStatus};
|
||||||
|
use crate::vector;
|
||||||
|
use crate::vector_db::QdrantClient;
|
||||||
|
use anyhow::Result;
|
||||||
|
use sqlx::MySqlPool;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tracing::{error, info};
|
||||||
|
|
||||||
|
pub struct Worker {
|
||||||
|
pool: MySqlPool,
|
||||||
|
qdrant: QdrantClient,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Worker {
|
||||||
|
pub fn new(pool: MySqlPool) -> Self {
|
||||||
|
let qdrant_url = std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://qdrant:6333".to_string());
|
||||||
|
let qdrant = QdrantClient::new(&qdrant_url);
|
||||||
|
Self { pool, qdrant }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn run(&self) {
|
||||||
|
info!("Worker starting");
|
||||||
|
|
||||||
|
// Ensure qdrant collection exists
|
||||||
|
if let Err(e) = self.qdrant.ensure_files_collection(DEMO_EMBED_DIM).await {
|
||||||
|
error!("Failed to ensure Qdrant collection: {}", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Requeue stale InProgress jobs older than cutoff (e.g., 10 minutes)
|
||||||
|
if let Err(e) = self.requeue_stale_inprogress(10 * 60).await {
|
||||||
|
error!("Failed to requeue stale jobs: {}", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// Claim next queued query
|
||||||
|
match self.fetch_and_claim().await {
|
||||||
|
Ok(Some(mut q)) => {
|
||||||
|
info!("Processing query {}", q.id);
|
||||||
|
if let Err(e) = self.process_query(&mut q).await {
|
||||||
|
error!("Error processing {}: {}", q.id, e);
|
||||||
|
let _ = self.mark_failed(&q.id, &format!("{}", e)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None) => {
|
||||||
|
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Worker fetch error: {}", e);
|
||||||
|
tokio::time::sleep(Duration::from_secs(5)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_and_claim(&self) -> Result<Option<QueryRecord>> {
|
||||||
|
// Note: MySQL transactional SELECT FOR UPDATE handling is more complex; for this hackathon scaffold
|
||||||
|
// we do a simple two-step: select one queued id, then update it to InProgress if it is still queued.
|
||||||
|
if let Some(row) = sqlx::query("SELECT id, payload FROM queries WHERE status = 'Queued' ORDER BY created_at LIMIT 1")
|
||||||
|
.fetch_optional(&self.pool)
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
use sqlx::Row;
|
||||||
|
let id: String = row.get("id");
|
||||||
|
let payload: serde_json::Value = row.get("payload");
|
||||||
|
|
||||||
|
let updated = sqlx::query("UPDATE queries SET status = 'InProgress' WHERE id = ? AND status = 'Queued'")
|
||||||
|
.bind(&id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if updated.rows_affected() == 1 {
|
||||||
|
let mut q = QueryRecord::new(payload);
|
||||||
|
q.id = id;
|
||||||
|
q.status = QueryStatus::InProgress;
|
||||||
|
return Ok(Some(q));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn process_query(&self, q: &mut QueryRecord) -> Result<()> {
|
||||||
|
// Stage 1: set InProgress (idempotent)
|
||||||
|
self.update_status(&q.id, QueryStatus::InProgress).await?;
|
||||||
|
|
||||||
|
// Stage 2: embed query text
|
||||||
|
let text = q.payload.get("q").and_then(|v| v.as_str()).unwrap_or("");
|
||||||
|
let emb = demo_text_embedding(text).await?;
|
||||||
|
let top_k = q.payload.get("top_k").and_then(|v| v.as_u64()).unwrap_or(5) as usize;
|
||||||
|
let top_k = top_k.max(1).min(20);
|
||||||
|
|
||||||
|
// Check cancellation
|
||||||
|
if self.is_cancelled(&q.id).await? { return Ok(()); }
|
||||||
|
|
||||||
|
// Stage 3: search top-K in Qdrant
|
||||||
|
let hits = match self.qdrant.search_top_k(emb.clone(), top_k).await {
|
||||||
|
Ok(list) if !list.is_empty() => list,
|
||||||
|
Ok(_) => Vec::new(),
|
||||||
|
Err(err) => {
|
||||||
|
error!("Qdrant search failed for query {}: {}", q.id, err);
|
||||||
|
Vec::new()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let hits = if hits.is_empty() {
|
||||||
|
match vector::query_top_k(&emb, top_k) {
|
||||||
|
Ok(fallback_ids) if !fallback_ids.is_empty() => {
|
||||||
|
info!("Using in-memory fallback for query {}", q.id);
|
||||||
|
fallback_ids.into_iter().map(|id| (id, 0.0)).collect()
|
||||||
|
}
|
||||||
|
_ => Vec::new(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
hits
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check cancellation
|
||||||
|
if self.is_cancelled(&q.id).await? { return Ok(()); }
|
||||||
|
|
||||||
|
// Stage 4: fetch file metadata for IDs
|
||||||
|
let mut files_json = Vec::new();
|
||||||
|
for (fid, score) in hits {
|
||||||
|
if let Some(row) = sqlx::query("SELECT id, filename, path, description FROM files WHERE id = ? AND pending_analysis = FALSE")
|
||||||
|
.bind(&fid)
|
||||||
|
.fetch_optional(&self.pool)
|
||||||
|
.await? {
|
||||||
|
use sqlx::Row;
|
||||||
|
let id: String = row.get("id");
|
||||||
|
let filename: String = row.get("filename");
|
||||||
|
let path: String = row.get("path");
|
||||||
|
let description: Option<String> = row.get("description");
|
||||||
|
files_json.push(serde_json::json!({
|
||||||
|
"id": id, "filename": filename, "path": path, "description": description, "score": score
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stage 5: call Gemini to analyze relationships and propose follow-up details strictly from provided files
|
||||||
|
let relationships_prompt = build_relationships_prompt(text, &files_json);
|
||||||
|
let (relationships, final_answer) = if files_json.is_empty() {
|
||||||
|
(
|
||||||
|
"No analyzed files are ready yet. Try seeding demo data or wait for processing to finish.".to_string(),
|
||||||
|
"I could not find any relevant documents yet. Once files finish analysis I will be able to answer.".to_string(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
let relationships = generate_text_with_model("gemini-2.5-pro", &relationships_prompt)
|
||||||
|
.await
|
||||||
|
.unwrap_or_else(|e| format!("[demo] relationships error: {}", e));
|
||||||
|
|
||||||
|
// Stage 6: final answer synthesis with strict constraints (no speculation; say unknown when insufficient)
|
||||||
|
let final_prompt = build_final_answer_prompt(text, &files_json, &relationships);
|
||||||
|
let final_answer = generate_text_with_model("gemini-2.5-pro", &final_prompt)
|
||||||
|
.await
|
||||||
|
.unwrap_or_else(|e| format!("[demo] final answer error: {}", e));
|
||||||
|
(relationships, final_answer)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Stage 7: persist results
|
||||||
|
let result = serde_json::json!({
|
||||||
|
"summary": format!("Found {} related files", files_json.len()),
|
||||||
|
"related_files": files_json,
|
||||||
|
"relationships": relationships,
|
||||||
|
"final_answer": final_answer,
|
||||||
|
});
|
||||||
|
sqlx::query("UPDATE queries SET status = 'Completed', result = ? WHERE id = ?")
|
||||||
|
.bind(result)
|
||||||
|
.bind(&q.id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_status(&self, id: &str, status: QueryStatus) -> Result<()> {
|
||||||
|
let s = match status {
|
||||||
|
QueryStatus::Queued => "Queued",
|
||||||
|
QueryStatus::InProgress => "InProgress",
|
||||||
|
QueryStatus::Completed => "Completed",
|
||||||
|
QueryStatus::Cancelled => "Cancelled",
|
||||||
|
QueryStatus::Failed => "Failed",
|
||||||
|
};
|
||||||
|
sqlx::query("UPDATE queries SET status = ? WHERE id = ?")
|
||||||
|
.bind(s)
|
||||||
|
.bind(id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn mark_failed(&self, id: &str, message: &str) -> Result<()> {
|
||||||
|
let result = serde_json::json!({"error": message});
|
||||||
|
sqlx::query("UPDATE queries SET status = 'Failed', result = ? WHERE id = ?")
|
||||||
|
.bind(result)
|
||||||
|
.bind(id)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn requeue_stale_inprogress(&self, age_secs: i64) -> Result<()> {
|
||||||
|
// MySQL: requeue items updated_at < now()-age and status = InProgress
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE queries SET status = 'Queued' WHERE status = 'InProgress' AND updated_at < (NOW() - INTERVAL ? SECOND)"
|
||||||
|
)
|
||||||
|
.bind(age_secs)
|
||||||
|
.execute(&self.pool)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn is_cancelled(&self, id: &str) -> Result<bool> {
|
||||||
|
if let Some(row) = sqlx::query("SELECT status FROM queries WHERE id = ?")
|
||||||
|
.bind(id)
|
||||||
|
.fetch_optional(&self.pool)
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
use sqlx::Row;
|
||||||
|
let s: String = row.get("status");
|
||||||
|
return Ok(s == "Cancelled");
|
||||||
|
}
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_relationships_prompt(query: &str, files: &Vec<serde_json::Value>) -> String {
|
||||||
|
let files_snippets: Vec<String> = files.iter().map(|f| format!(
|
||||||
|
"- id: {id}, filename: {name}, path: {path}, desc: {desc}",
|
||||||
|
id=f.get("id").and_then(|v| v.as_str()).unwrap_or(""),
|
||||||
|
name=f.get("filename").and_then(|v| v.as_str()).unwrap_or(""),
|
||||||
|
path=f.get("path").and_then(|v| v.as_str()).unwrap_or(""),
|
||||||
|
desc=f.get("description").and_then(|v| v.as_str()).unwrap_or("")
|
||||||
|
)).collect();
|
||||||
|
format!(
|
||||||
|
"You are an assistant analyzing relationships STRICTLY within the provided files.\n\
|
||||||
|
Query: {query}\n\
|
||||||
|
Files:\n{files}\n\
|
||||||
|
Tasks:\n\
|
||||||
|
1) Summarize key details from the files relevant to the query.\n\
|
||||||
|
2) Describe relationships and linkages strictly supported by these files.\n\
|
||||||
|
3) List important follow-up questions that could be answered only using the provided files.\n\
|
||||||
|
Rules: Do NOT guess or invent. If information is insufficient in the files, explicitly state that.",
|
||||||
|
query=query,
|
||||||
|
files=files_snippets.join("\n")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_final_answer_prompt(query: &str, files: &Vec<serde_json::Value>, relationships: &str) -> String {
|
||||||
|
let files_short: Vec<String> = files.iter().map(|f| format!(
|
||||||
|
"- {name} ({id})",
|
||||||
|
id=f.get("id").and_then(|v| v.as_str()).unwrap_or(""),
|
||||||
|
name=f.get("filename").and_then(|v| v.as_str()).unwrap_or("")
|
||||||
|
)).collect();
|
||||||
|
format!(
|
||||||
|
"You are to compose a final answer to the user query using only the information from the files.\n\
|
||||||
|
Query: {query}\n\
|
||||||
|
Files considered:\n{files}\n\
|
||||||
|
Relationship analysis:\n{rels}\n\
|
||||||
|
Requirements:\n\
|
||||||
|
- Use only information present in the files and analysis above.\n\
|
||||||
|
- If the answer is uncertain or cannot be determined from the files, clearly state that limitation.\n\
|
||||||
|
- Avoid speculation or assumptions.\n\
|
||||||
|
Provide a concise, structured answer.",
|
||||||
|
query=query,
|
||||||
|
files=files_short.join("\n"),
|
||||||
|
rels=relationships
|
||||||
|
)
|
||||||
|
}
|
||||||
4
src/features/gemini/gemini.js
Normal file
4
src/features/gemini/gemini.js
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
import { createPartFromUri, GoogleGenAI } from "@google/genai"
|
||||||
|
import 'dotenv/config'
|
||||||
|
|
||||||
|
const ai = new GoogleGenAI({ apiKey: ${} })
|
||||||
|
|
@ -2,7 +2,7 @@ FROM node:23-alpine
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm i
|
RUN npm ci
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run format && npm run build
|
RUN npm run format && npm run build
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
16
web-app/README.md
Normal file
16
web-app/README.md
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
# React + Vite
|
||||||
|
|
||||||
|
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
|
||||||
|
|
||||||
|
Currently, two official plugins are available:
|
||||||
|
|
||||||
|
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh
|
||||||
|
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
|
||||||
|
|
||||||
|
## React Compiler
|
||||||
|
|
||||||
|
The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
|
||||||
|
|
||||||
|
## Expanding the ESLint configuration
|
||||||
|
|
||||||
|
If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project.
|
||||||
15
web-app/components.json
Normal file
15
web-app/components.json
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
{
|
||||||
|
"style": "default",
|
||||||
|
"tailwind": {
|
||||||
|
"config": "tailwind.config.js",
|
||||||
|
"css": "src/app/globals.css",
|
||||||
|
"baseColor": "zinc",
|
||||||
|
"cssVariables": true
|
||||||
|
},
|
||||||
|
"rsc": false,
|
||||||
|
"tsx": false,
|
||||||
|
"aliases": {
|
||||||
|
"utils": "~/lib/utils",
|
||||||
|
"components": "~/components"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,15 +1,18 @@
|
||||||
import js from "@eslint/js";
|
import js from "@eslint/js";
|
||||||
import globals from "globals";
|
import globals from "globals";
|
||||||
import reactHooks from "eslint-plugin-react-hooks";
|
import reactHooks from "eslint-plugin-react-hooks"; // Or import { configs as reactHooks } from "eslint-plugin-react-hooks";
|
||||||
import reactRefresh from "eslint-plugin-react-refresh";
|
import reactRefresh from "eslint-plugin-react-refresh";
|
||||||
|
import { defineConfig, globalIgnores } from "eslint/config";
|
||||||
|
|
||||||
export default [
|
export default defineConfig([
|
||||||
|
globalIgnores(["dist"]),
|
||||||
{
|
{
|
||||||
ignores: ["dist/**"],
|
files: ["**/*{js,jsx}"],
|
||||||
},
|
extends: [
|
||||||
js.configs.recommended,
|
js.configs.recommended,
|
||||||
{
|
reactHooks.configs["recommended-latest"],
|
||||||
files: ["**/*.{js,jsx}"],
|
reactRefresh.configs.vite,
|
||||||
|
],
|
||||||
languageOptions: {
|
languageOptions: {
|
||||||
ecmaVersion: 2020,
|
ecmaVersion: 2020,
|
||||||
globals: globals.browser,
|
globals: globals.browser,
|
||||||
|
|
@ -19,17 +22,8 @@ export default [
|
||||||
sourceType: "module",
|
sourceType: "module",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
plugins: {
|
|
||||||
"react-hooks": reactHooks,
|
|
||||||
"react-refresh": reactRefresh,
|
|
||||||
},
|
|
||||||
rules: {
|
rules: {
|
||||||
...reactHooks.configs.recommended.rules,
|
|
||||||
"react-refresh/only-export-components": [
|
|
||||||
"warn",
|
|
||||||
{ allowConstantExport: true },
|
|
||||||
],
|
|
||||||
"no-unused-vars": ["error", { varsIgnorePattern: "^[A-Z_]" }],
|
"no-unused-vars": ["error", { varsIgnorePattern: "^[A-Z_]" }],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
];
|
]);
|
||||||
7
web-app/jsconfig.json
Normal file
7
web-app/jsconfig.json
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"baseUrl": "./",
|
||||||
|
"lib": ["es2015", "dom"]
|
||||||
|
},
|
||||||
|
"include": ["src"]
|
||||||
|
}
|
||||||
|
|
@ -33,7 +33,6 @@
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@eslint/js": "^9.38.0",
|
"@eslint/js": "^9.38.0",
|
||||||
"daisyui": "^5.4.7",
|
|
||||||
"eslint": "^9.38.0",
|
"eslint": "^9.38.0",
|
||||||
"eslint-plugin-import": "^2.32.0",
|
"eslint-plugin-import": "^2.32.0",
|
||||||
"eslint-plugin-react": "^7.37.5",
|
"eslint-plugin-react": "^7.37.5",
|
||||||
|
|
@ -2907,16 +2906,6 @@
|
||||||
"integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
|
"integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/daisyui": {
|
|
||||||
"version": "5.4.7",
|
|
||||||
"resolved": "https://registry.npmjs.org/daisyui/-/daisyui-5.4.7.tgz",
|
|
||||||
"integrity": "sha512-2wYO61vTPCXk7xEBgnzLZAYoE0xS5IRLu/GSq0vORpB+cTrtubdx69NnA0loc0exvCY1s2fYL4lGZtFHe2ohNQ==",
|
|
||||||
"dev": true,
|
|
||||||
"license": "MIT",
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/saadeghi/daisyui?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/data-uri-to-buffer": {
|
"node_modules/data-uri-to-buffer": {
|
||||||
"version": "4.0.1",
|
"version": "4.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
|
||||||
|
|
@ -40,7 +40,6 @@
|
||||||
"packageManager": ">=npm@10.9.0",
|
"packageManager": ">=npm@10.9.0",
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@eslint/js": "^9.38.0",
|
"@eslint/js": "^9.38.0",
|
||||||
"daisyui": "^5.4.7",
|
|
||||||
"eslint": "^9.38.0",
|
"eslint": "^9.38.0",
|
||||||
"eslint-plugin-import": "^2.32.0",
|
"eslint-plugin-import": "^2.32.0",
|
||||||
"eslint-plugin-react": "^7.37.5",
|
"eslint-plugin-react": "^7.37.5",
|
||||||
BIN
web-app/public/pdfs/SRHB.pdf
Normal file
BIN
web-app/public/pdfs/SRHB.pdf
Normal file
Binary file not shown.
BIN
web-app/public/pdfs/falcon-users-guide-2025-05-09.pdf
Normal file
BIN
web-app/public/pdfs/falcon-users-guide-2025-05-09.pdf
Normal file
Binary file not shown.
BIN
web-app/public/pdfs/spacex-falcon-9-data-sheet.pdf
Normal file
BIN
web-app/public/pdfs/spacex-falcon-9-data-sheet.pdf
Normal file
Binary file not shown.
BIN
web-app/public/pdfs/system-safety-handbook.pdf
Normal file
BIN
web-app/public/pdfs/system-safety-handbook.pdf
Normal file
Binary file not shown.
54
web-app/server.mjs
Normal file
54
web-app/server.mjs
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
import express from 'express';
|
||||||
|
import path from 'node:path';
|
||||||
|
import helmet from 'helmet';
|
||||||
|
import cors from 'cors';
|
||||||
|
import fetch from 'node-fetch';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = path.dirname(__filename);
|
||||||
|
|
||||||
|
const app = express();
|
||||||
|
const PORT = Number(process.env.PORT) || 3000;
|
||||||
|
const HOST = process.env.HOST || '0.0.0.0';
|
||||||
|
const RUST_ENGINE_BASE =
|
||||||
|
process.env.RUST_ENGINE_BASE ||
|
||||||
|
process.env.RUST_ENGINE_URL ||
|
||||||
|
'http://rust-engine:8000';
|
||||||
|
|
||||||
|
app.set('trust proxy', true);
|
||||||
|
app.use(helmet({ contentSecurityPolicy: false }));
|
||||||
|
app.use(cors());
|
||||||
|
app.use(express.json());
|
||||||
|
|
||||||
|
app.get('/api/healthz', (_req, res) => {
|
||||||
|
res.json({ status: 'ok', upstream: RUST_ENGINE_BASE });
|
||||||
|
});
|
||||||
|
|
||||||
|
// Proxy minimal API needed by the UI to the rust-engine container
|
||||||
|
app.post('/api/files/import-demo', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const qs = req.url.includes('?') ? req.url.substring(req.url.indexOf('?')) : '';
|
||||||
|
const url = `${RUST_ENGINE_BASE}/api/files/import-demo${qs}`;
|
||||||
|
const upstream = await fetch(url, { method: 'POST', headers: { 'content-type': 'application/json' }, body: req.body ? JSON.stringify(req.body) : undefined });
|
||||||
|
const text = await upstream.text();
|
||||||
|
res.status(upstream.status).type(upstream.headers.get('content-type') || 'application/json').send(text);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('import-demo proxy failed:', err);
|
||||||
|
res.status(502).json({ error: 'proxy_failed' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Serve static frontend
|
||||||
|
const distDir = path.resolve(__dirname, 'dist');
|
||||||
|
app.use(express.static(distDir));
|
||||||
|
|
||||||
|
// SPA fallback (Express 5 requires middleware instead of bare '*')
|
||||||
|
app.use((req, res) => {
|
||||||
|
res.sendFile(path.join(distDir, 'index.html'));
|
||||||
|
});
|
||||||
|
|
||||||
|
app.listen(PORT, HOST, () => {
|
||||||
|
console.log(`Web app server listening on http://${HOST}:${PORT}`);
|
||||||
|
console.log(`Proxying to rust engine at ${RUST_ENGINE_BASE}`);
|
||||||
|
});
|
||||||
|
|
@ -5,7 +5,6 @@ function App() {
|
||||||
return (
|
return (
|
||||||
<div className="dark min-h-screen bg-gray-950 text-white flex justify-center pt-12">
|
<div className="dark min-h-screen bg-gray-950 text-white flex justify-center pt-12">
|
||||||
<ChatLayout />
|
<ChatLayout />
|
||||||
<div></div>
|
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
50
web-app/src/components/layouts/chat-layout.jsx
Normal file
50
web-app/src/components/layouts/chat-layout.jsx
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
import React, { useState } from "react";
|
||||||
|
import ChatHeader from "src/components/ui/chat/chat-header";
|
||||||
|
import ChatWindow from "src/components/ui/chat/chat-window";
|
||||||
|
import MessageInput from "src/components/ui/chat/message-input";
|
||||||
|
import { GoogleGenAI } from "@google/genai";
|
||||||
|
|
||||||
|
let userInput = [];
|
||||||
|
|
||||||
|
const ai = new GoogleGenAI({ apiKey: import.meta.env.GEMINI_API_KEY });
|
||||||
|
|
||||||
|
async function AIResponse(userInputArray) {
|
||||||
|
const response = await ai.models.generateContent({
|
||||||
|
model: "gemini-2.5-flash",
|
||||||
|
contents: userInputArray,
|
||||||
|
});
|
||||||
|
|
||||||
|
return response.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function ChatLayout() {
|
||||||
|
const [messages, setMessages] = useState([
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: "Hello — I can help you with code, explanations, and more.",
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
|
async function handleSend(text) {
|
||||||
|
const userMsg = { role: "user", content: text };
|
||||||
|
userInput.push(text);
|
||||||
|
const res = await AIResponse(userInput);
|
||||||
|
setMessages((s) => [...s, userMsg]);
|
||||||
|
setTimeout(() => {
|
||||||
|
setMessages((s) => [...s, { role: "assistant", content: res }]);
|
||||||
|
}, 600);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleDeleteAll() {
|
||||||
|
if (!window.confirm("Delete all messages?")) return;
|
||||||
|
setMessages([]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="flex flex-col flex-start w-full max-w-3xl gap-4 p-4">
|
||||||
|
<ChatHeader onDeleteAll={handleDeleteAll} />
|
||||||
|
<ChatWindow messages={messages} />
|
||||||
|
<MessageInput onSend={handleSend} onDeleteAll={handleDeleteAll} />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
15
web-app/src/components/ui/button/delete-button.jsx
Normal file
15
web-app/src/components/ui/button/delete-button.jsx
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
import { Flame } from "lucide-react";
|
||||||
|
import { motion } from "motion/react";
|
||||||
|
|
||||||
|
export default function FlameButton({ onClick }) {
|
||||||
|
return (
|
||||||
|
<motion.button
|
||||||
|
onClick={onClick}
|
||||||
|
className="bg-gray-700 cursor-pointer p-2 rounded-2xl border-2 border-gray-600"
|
||||||
|
whileHover={{ scale: 1.1 }}
|
||||||
|
whileTap={{ scale: 0.9 }}
|
||||||
|
>
|
||||||
|
<Flame />
|
||||||
|
</motion.button>
|
||||||
|
);
|
||||||
|
}
|
||||||
65
web-app/src/components/ui/chat/chat-header.jsx
Normal file
65
web-app/src/components/ui/chat/chat-header.jsx
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
import React, { useMemo, useState } from "react";
|
||||||
|
import { motion } from "motion/react";
|
||||||
|
import { Rocket } from "lucide-react";
|
||||||
|
import DeleteButton from "src/components/ui/button/delete-button";
|
||||||
|
import FileList from "src/components/ui/file/file-list";
|
||||||
|
|
||||||
|
export default function ChatHeader({
|
||||||
|
title = "Schematic Spelunker",
|
||||||
|
onDeleteAll,
|
||||||
|
}) {
|
||||||
|
const isDebug = useMemo(() => {
|
||||||
|
const p = new URLSearchParams(window.location.search);
|
||||||
|
return p.get("debug") === "1";
|
||||||
|
}, []);
|
||||||
|
const [ingesting, setIngesting] = useState(false);
|
||||||
|
const [toast, setToast] = useState("");
|
||||||
|
|
||||||
|
async function triggerDemoIngest() {
|
||||||
|
try {
|
||||||
|
setIngesting(true);
|
||||||
|
const res = await fetch("/api/files/import-demo", { method: "POST" });
|
||||||
|
const json = await res.json().catch(() => ({}));
|
||||||
|
setToast(
|
||||||
|
`Imported: ${json.imported ?? "?"}, Skipped: ${json.skipped ?? "?"}`
|
||||||
|
);
|
||||||
|
setTimeout(() => setToast(""), 4000);
|
||||||
|
} catch (e) {
|
||||||
|
setToast("Import failed");
|
||||||
|
setTimeout(() => setToast(""), 4000);
|
||||||
|
} finally {
|
||||||
|
setIngesting(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="w-full flex justify-center">
|
||||||
|
<header className="text-slate-100 fixed top-4 max-w-3xl w-full px-4">
|
||||||
|
<div className="flex justify-between items-center gap-4">
|
||||||
|
<FileList />
|
||||||
|
<h1 className=" text-sm lg:text-lg font-semibold shadow-md shadow-indigo-600 bg-gray-900 px-6 py-2 rounded-4xl border-2 border-gray-800">
|
||||||
|
{title}
|
||||||
|
</h1>
|
||||||
|
<DeleteButton onClick={onDeleteAll} />
|
||||||
|
{isDebug && (
|
||||||
|
<motion.button
|
||||||
|
onClick={triggerDemoIngest}
|
||||||
|
className="bg-gray-800 border-2 border-gray-700 rounded-xl px-3 py-2 flex items-center gap-2"
|
||||||
|
whileHover={{ scale: 1.05 }}
|
||||||
|
whileTap={{ scale: 0.95 }}
|
||||||
|
disabled={ingesting}
|
||||||
|
>
|
||||||
|
<Rocket size={16} />
|
||||||
|
{ingesting ? "Seeding…" : "Seed Demo Data"}
|
||||||
|
</motion.button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
{toast && (
|
||||||
|
<div className="mt-2 text-xs text-slate-300 bg-gray-800/80 border border-gray-700 rounded px-2 py-1 inline-block">
|
||||||
|
{toast}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</header>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -4,11 +4,10 @@ import { MARKDOWN_COMPONENTS } from "src/config/markdown";
|
||||||
|
|
||||||
function MessageBubble({ message }) {
|
function MessageBubble({ message }) {
|
||||||
const isUser = message.role === "user";
|
const isUser = message.role === "user";
|
||||||
const isError = !!message.error;
|
|
||||||
return (
|
return (
|
||||||
<div className={`flex ${isUser ? "justify-end" : "justify-start"} py-2`}>
|
<div className={`flex ${isUser ? "justify-end" : "justify-start"} py-2`}>
|
||||||
<div
|
<div
|
||||||
className={`p-3 rounded-xl ${isUser ? "bg-indigo-600 text-white rounded-tr-sm" : "bg-gray-700 text-slate-100 rounded-tl-sm"} ${isError ? "border border-red-500/60 bg-red-900/50" : ""}`}
|
className={`p-3 rounded-xl ${isUser ? "bg-indigo-600 text-white rounded-tr-sm" : "bg-gray-700 text-slate-100 rounded-tl-sm"}`}
|
||||||
>
|
>
|
||||||
{isUser ? (
|
{isUser ? (
|
||||||
<div className="text-sm">{message.content}</div>
|
<div className="text-sm">{message.content}</div>
|
||||||
|
|
@ -23,21 +22,12 @@ function MessageBubble({ message }) {
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function ChatWindow({ messages }) {
|
export default function ChatWindow({ messages }) {
|
||||||
const bottomRef = useRef(null);
|
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
if (bottomRef.current) {
|
|
||||||
bottomRef.current.scrollIntoView({ behavior: "smooth" });
|
|
||||||
}
|
|
||||||
}, [messages]);
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex-1 overflow-auto px-2 pt-4 pb-32">
|
<div className="flex-1 overflow-auto px-2 pt-4 pb-32">
|
||||||
<div className="">
|
<div className="">
|
||||||
{messages.map((m, i) => (
|
{messages.map((m, i) => (
|
||||||
<MessageBubble key={m.id ?? i} message={m} />
|
<MessageBubble key={i} message={m} />
|
||||||
))}
|
))}
|
||||||
<div ref={bottomRef} />
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
79
web-app/src/components/ui/chat/message-input.jsx
Normal file
79
web-app/src/components/ui/chat/message-input.jsx
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
import React, { useState, useRef, useEffect } from "react";
|
||||||
|
import DownButton from "src/components/ui/button/down-button";
|
||||||
|
import { motion } from "motion/react";
|
||||||
|
import { BotMessageSquare } from "lucide-react";
|
||||||
|
|
||||||
|
import { GoogleGenAI } from "@google/genai";
|
||||||
|
|
||||||
|
const ai = new GoogleGenAI({ apiKey: import.meta.env.GEMINI_API_KEY });
|
||||||
|
|
||||||
|
export default function MessageInput({ onSend }) {
|
||||||
|
const [text, setText] = useState("");
|
||||||
|
const textareaRef = useRef(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
// ensure correct initial height
|
||||||
|
if (textareaRef.current) textareaRef.current.style.height = "auto";
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
async function handleSubmit(e) {
|
||||||
|
// send user message locally
|
||||||
|
e.preventDefault();
|
||||||
|
if (text.trim() === "") return;
|
||||||
|
onSend(text.trim());
|
||||||
|
setText("");
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="w-full flex justify-center">
|
||||||
|
<footer className="fixed bottom-6 max-w-3xl w-full px-4">
|
||||||
|
<div className="flex flex-col gap-4">
|
||||||
|
<div className="flex justify-between items-center">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<DownButton />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<form
|
||||||
|
onSubmit={handleSubmit}
|
||||||
|
className="bg-gray-900 rounded-2xl border-2 border-gray-800 shadow-lg shadow-indigo-600"
|
||||||
|
>
|
||||||
|
<div className="flex p-2 shadow-xl items-center">
|
||||||
|
<textarea
|
||||||
|
ref={textareaRef}
|
||||||
|
value={text}
|
||||||
|
onChange={(e) => {
|
||||||
|
setText(e.target.value);
|
||||||
|
// auto-resize
|
||||||
|
const ta = textareaRef.current;
|
||||||
|
if (ta) {
|
||||||
|
ta.style.height = "auto";
|
||||||
|
ta.style.height = `${ta.scrollHeight}px`;
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
onKeyDown={(e) => {
|
||||||
|
// Enter to submit, Shift+Enter for newline
|
||||||
|
if (e.key === "Enter" && !e.shiftKey) {
|
||||||
|
e.preventDefault();
|
||||||
|
handleSubmit(e);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
placeholder="Type a message..."
|
||||||
|
rows={1}
|
||||||
|
className="flex-1 mx-2 rounded-md shadow-2sx border-none focus:border-none focus:outline-none resize-none overflow-auto max-h-40"
|
||||||
|
/>
|
||||||
|
<motion.button
|
||||||
|
type="submit"
|
||||||
|
className="flex gap-2 px-4 py-2 bg-gray-700 rounded-xl ml-4 items-center"
|
||||||
|
whileHover={{ scale: 1.1 }}
|
||||||
|
whileTap={{ scale: 0.9 }}
|
||||||
|
>
|
||||||
|
<BotMessageSquare />
|
||||||
|
</motion.button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</footer>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
@import "tailwindcss";
|
@import "tailwindcss";
|
||||||
@import "daisyui";
|
|
||||||
|
|
||||||
.dark {
|
.dark {
|
||||||
--paragraph: 235, 236, 239;
|
--paragraph: 235, 236, 239;
|
||||||
60
web-app/src/index.js
Normal file
60
web-app/src/index.js
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
import express from "express";
|
||||||
|
import bodyParser from "body-parser";
|
||||||
|
import axios from "axios";
|
||||||
|
import multer from "multer";
|
||||||
|
import path from "path";
|
||||||
|
import { fileURLToPath } from 'url';
|
||||||
|
import fs from "fs";
|
||||||
|
|
||||||
|
const app = new express();
|
||||||
|
const port = 3000;
|
||||||
|
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dirname = path.dirname(__filename);
|
||||||
|
|
||||||
|
app.use(express.static('public'));
|
||||||
|
app.use('/uploads', express.static('uploads'));
|
||||||
|
app.use(bodyParser.urlencoded({extended: true}));
|
||||||
|
app.use(bodyParser.json());
|
||||||
|
|
||||||
|
const storage = multer.diskStorage({
|
||||||
|
destination: function (req, file, cb) {
|
||||||
|
cb(null, path.join(__dirname,'/uploads'));
|
||||||
|
},
|
||||||
|
filename: function (req, file, cb) {
|
||||||
|
cb(null, file.originalname);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
const upload = multer({ storage: storage })
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//Render the main page
|
||||||
|
app.get("/", async (req, res) => {
|
||||||
|
try{
|
||||||
|
const response = await axios.get(`${API_URL}/all`);
|
||||||
|
res.render("file", { data: response.data });
|
||||||
|
}catch(error){
|
||||||
|
console.error(error);
|
||||||
|
res.status(500).json("Error fetching items");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
app.post("/upload", upload.single('image'), async (req, res) => {
|
||||||
|
const data = {
|
||||||
|
...req.body,
|
||||||
|
fileName: req.file.originalname,
|
||||||
|
path: req.file.path
|
||||||
|
}
|
||||||
|
try{
|
||||||
|
await axios.post(`${API_URL}/add`, data);
|
||||||
|
res.redirect("/");
|
||||||
|
}catch(error){
|
||||||
|
console.error(error);
|
||||||
|
res.status(500).json("Error uploading item");
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
app.listen(port, () => {
|
||||||
|
console.log("API is listening on port " + port);
|
||||||
|
})
|
||||||
5
web-app/src/main.jsx
Normal file
5
web-app/src/main.jsx
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
import { createRoot } from "react-dom/client";
|
||||||
|
import "./index.css";
|
||||||
|
import App from "./app/index.jsx";
|
||||||
|
|
||||||
|
createRoot(document.getElementById("root")).render(<App />);
|
||||||
|
|
@ -9,6 +9,7 @@ try {
|
||||||
console.log("Env file not found!\n" + error)
|
console.log("Env file not found!\n" + error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://vite.dev/config/
|
||||||
export default defineConfig({
|
export default defineConfig({
|
||||||
plugins: [tailwindcss(), react(), jsconfigPaths()],
|
plugins: [tailwindcss(), react(), jsconfigPaths()],
|
||||||
resolve: {
|
resolve: {
|
||||||
Loading…
Add table
Add a link
Reference in a new issue