Merge branch 'main' into diagram

2025-10-19 07:52:47 -05:00 · 2025-10-19 07:52:47 -05:00 · 309e0f6b73
commit 309e0f6b73
parent 8917a4d1a5 4ddd3e2dce
12 changed files with 649 additions and 69 deletions
--- a/.github/workflows/build-and-deploy.yml
+++ b/.github/workflows/build-and-deploy.yml
@ -4,7 +4,7 @@ name: Build and Deploy

 on:
  push:
-    branches: ["main"]
+    branches: ["main", "rust-dev"]

 jobs:
  build-and-deploy:
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@ -0,0 +1,268 @@
+# CodeRED-Astra Architecture
+
+## Overview
+
+CodeRED-Astra is a Retrieval-Augmented Generation (RAG) system for querying ISS technical documentation using vector search, MySQL metadata storage, and Gemini AI for analysis and response generation.
+
+## System Components
+
+### 1. **Rust Backend** (`rust-engine/`)
+High-performance Rust backend using Warp for HTTP, SQLx for MySQL, and Reqwest for external API calls.
+
+#### Modules
+
+**`main.rs`** - Entry point
+- Initializes tracing, database, storage
+- Spawns FileWorker and QueryWorker background tasks
+- Serves API routes on port 8000
+
+**`db.rs`** - Database initialization
+- Connects to MySQL
+- Creates `files` table (id, filename, path, description, pending_analysis, analysis_status)
+- Creates `queries` table (id, status, payload, result, timestamps)
+
+**`api.rs`** - HTTP endpoints
+- `POST /api/files` - Upload file (multipart/form-data)
+- `POST /api/files/import-demo` - Bulk import from demo-data directory
+- `GET /api/files/list` - List all files with status
+- `GET /api/files/delete?id=` - Delete file and remove from Qdrant
+- `POST /api/query/create` - Create new query (returns query ID)
+- `GET /api/query/status?id=` - Check query status
+- `GET /api/query/result?id=` - Get query result
+- `GET /api/query/cancel?id=` - Cancel in-progress query
+
+**`file_worker.rs`** - File analysis pipeline
+- **Background worker** that processes files with `pending_analysis = TRUE`
+- Claims stale/queued files (requeues if stuck >10 min)
+- **Stage 1**: Call Gemini 1.5 Flash for initial description
+- **Stage 2**: Call Gemini 1.5 Pro for deep vector graph data (keywords, relationships)
+- **Stage 3**: Generate embedding and upsert to Qdrant
+- **Stage 4**: Mark file as ready (`pending_analysis = FALSE`, `analysis_status = 'Completed'`)
+- Resumable: Can recover from crashes/restarts
+
+**`worker.rs`** - Query processing pipeline
+- **Background worker** that processes queries with `status = 'Queued'`
+- Requeues stale InProgress jobs (>10 min)
+- **Stage 1**: Embed query text
+- **Stage 2**: Search top-K similar vectors in Qdrant
+- **Stage 3**: Fetch file metadata from MySQL (only completed files)
+- **Stage 4**: Call Gemini to analyze relationships between files
+- **Stage 5**: Call Gemini for final answer synthesis (strict: no speculation)
+- **Stage 6**: Save results to database
+- Supports cancellation checks between stages
+
+**`gemini_client.rs`** - Gemini API integration
+- `generate_text(prompt)` - Text generation with model switching via GEMINI_MODEL env var
+- `demo_text_embedding(text)` - Demo 64-dim embeddings (replace with real Gemini embeddings)
+- Falls back to demo responses if GEMINI_API_KEY not set
+
+**`vector_db.rs`** - Qdrant client
+- `ensure_files_collection(dim)` - Create 'files' collection with Cosine distance
+- `upsert_point(id, vector)` - Store file embedding
+- `search_top_k(vector, k)` - Find k nearest neighbors
+- `delete_point(id)` - Remove file from index
+
+**`storage.rs`** - File storage utilities
+- `storage_dir()` - Get storage path from ASTRA_STORAGE env or default `/app/storage`
+- `ensure_storage_dir()` - Create storage directory if missing
+- `save_file(filename, contents)` - Save file to storage
+- `delete_file(path)` - Remove file from storage
+
+**`models.rs`** - Data structures
+- `FileRecord` - File metadata (mirrors files table)
+- `QueryRecord` - Query metadata (mirrors queries table)
+- `QueryStatus` enum - Queued, InProgress, Completed, Cancelled, Failed
+
+### 2. **Web App** (`web-app/`)
+React + Vite frontend with Express backend for API proxying.
+
+#### Backend (`server.mjs`)
+- Express server that proxies API calls to rust-engine:8000
+- Serves React static build from `/dist`
+- **Why needed**: Docker networking - React can't call rust-engine directly from browser
+
+#### Frontend (`src/`)
+- `App.jsx` - Main chat interface component
+- `components/ui/chat/chat-header.jsx` - Header with debug-only "Seed Demo Data" button (visible with `?debug=1`)
+- Calls `/api/files/import-demo` endpoint to bulk-load ISS PDFs
+
+### 3. **MySQL Database**
+Two tables for metadata storage:
+
+**`files` table**
+```sql
+id VARCHAR(36) PRIMARY KEY
+filename TEXT NOT NULL
+path TEXT NOT NULL
+description TEXT
+created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+pending_analysis BOOLEAN DEFAULT TRUE
+analysis_status VARCHAR(32) DEFAULT 'Queued'
+```
+
+**`queries` table**
+```sql
+id VARCHAR(36) PRIMARY KEY
+status VARCHAR(32) NOT NULL
+payload JSON
+result JSON
+created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
+```
+
+### 4. **Qdrant Vector Database**
+- Collection: `files`
+- Dimension: 64 (demo) - replace with real Gemini embedding dimension
+- Distance: Cosine similarity
+- Stores file embeddings for semantic search
+
+### 5. **Demo Data** (`rust-engine/demo-data/`)
+~20 ISS technical PDFs organized by subsystem:
+- Electrical Power System (EPS)
+- Environmental Control & Life Support (ECLSS)
+- Command & Data Handling (C&DH)
+- Structures & Mechanisms
+
+## Data Flow
+
+### File Upload & Analysis
+```
+1. User uploads PDF → POST /api/files
+2. API saves file to storage, inserts DB record (pending_analysis=true)
+3. FileWorker claims pending file
+4. Gemini 1.5 Flash generates description
+5. Gemini 1.5 Pro generates vector graph data
+6. Embed text → upsert to Qdrant
+7. Mark file as ready (pending_analysis=false)
+```
+
+### Query Processing
+```
+1. User submits query → POST /api/query/create
+2. API inserts query record (status='Queued')
+3. QueryWorker claims queued query
+4. Embed query text
+5. Search Qdrant for top-K similar files
+6. Fetch file metadata from MySQL
+7. Gemini analyzes relationships between files
+8. Gemini synthesizes final answer (no speculation)
+9. Save results to database
+```
+
+## Deployment
+
+### Development (`docker-compose.yml`)
+- Local testing with hot-reload
+- Bind mounts for code
+
+### Production (`docker-compose.prod.yml`)
+- Used by GitHub Actions for deployment
+- Runs rust-engine as user "1004" (github-actions)
+- Docker volume: `rust-storage` → `/app/storage`
+- Bind mount: `/var/www/codered-astra/rust-engine/demo-data` → `/app/demo-data:ro`
+- Environment variables:
+  - `ASTRA_STORAGE=/app/storage`
+  - `DEMO_DATA_DIR=/app/demo-data`
+  - `QDRANT_URL=http://qdrant:6333`
+  - `GEMINI_API_KEY=<secret>`
+  - `DATABASE_URL=mysql://astraadmin:password@mysql:3306/astra`
+
+## Key Design Decisions
+
+### 1. **Two-Stage Analysis (Flash → Pro)**
+- Flash is faster/cheaper for initial description
+- Pro is better for deep analysis and relationship extraction
+- Enables cost-effective scaling
+
+### 2. **Resumable Workers**
+- Workers requeue stale jobs (>10 min in InProgress)
+- Survives container restarts without data loss
+- Atomic state transitions via SQL
+
+### 3. **Separation of Concerns**
+- FileWorker: Makes files searchable
+- QueryWorker: Answers user queries
+- Independent scaling and failure isolation
+
+### 4. **Strict Answer Generation**
+- Gemini prompted to not speculate
+- Must state uncertainty when info is insufficient
+- Prevents hallucination in critical ISS documentation
+
+### 5. **Demo Embeddings**
+- Current: 64-dim deterministic embeddings from text hash
+- Production: Replace with real Gemini text embeddings API
+- Allows development/testing without embedding API credits
+
+## API Usage Examples
+
+### Upload File
+```bash
+curl -F "file=@document.pdf" http://localhost:3001/api/files
+```
+
+### Import Demo Data
+```bash
+curl -X POST http://localhost:3001/api/files/import-demo
+```
+
+### Create Query
+```bash
+curl -X POST http://localhost:3001/api/query/create \
+  -H "Content-Type: application/json" \
+  -d '{"q": "What is the voltage of the ISS main bus?", "top_k": 5}'
+```
+
+### Check Status
+```bash
+curl http://localhost:3001/api/query/status?id=<query-id>
+```
+
+### Get Result
+```bash
+curl http://localhost:3001/api/query/result?id=<query-id>
+```
+
+## Future Enhancements
+
+### High Priority
+1. Real Gemini text embeddings (replace demo embeddings)
+2. File status UI panel (show processing progress)
+3. Health check endpoint (`/health`)
+4. Data purge endpoint (clear all files/queries)
+
+### Medium Priority
+1. Streaming query responses (SSE/WebSocket)
+2. Query result caching
+3. File chunking for large PDFs
+4. User authentication
+
+### Low Priority
+1. Multi-collection support (different document types)
+2. Query history UI
+3. File preview in chat
+4. Export results to PDF
+
+## Troubleshooting
+
+### Storage Permission Errors
+- Ensure `/app/storage` is owned by container user
+- Docker volume must be writable by user 1004 in production
+
+### SQL Syntax Errors
+- MySQL requires separate `CREATE TABLE` statements
+- Cannot combine multiple DDL statements in one `sqlx::query()`
+
+### Qdrant Connection Issues
+- Check QDRANT_URL environment variable
+- Ensure qdrant service is running and healthy
+- Verify network connectivity between containers
+
+### Worker Not Processing
+- Check logs: `docker logs rust-engine`
+- Verify database connectivity
+- Look for stale InProgress jobs in queries/files tables
+
+## Demo Presentation (3 minutes)
+
+See `rust-engine/DEMODETAILS.md` for curated demo script with example queries.
--- a/QUICK_REFERENCE.md
+++ b/QUICK_REFERENCE.md
@ -0,0 +1,219 @@
+# CodeRED-Astra Quick Reference
+
+## System Overview
+
+**Two-worker architecture for ISS document RAG:**
+
+1. **FileWorker**: Analyzes uploaded files (Flash → Pro → Embed → Qdrant)
+2. **QueryWorker**: Answers queries (Embed → Search → Relationships → Answer)
+
+Both workers are **resumable** and automatically recover from crashes.
+
+## Core Data Flow
+
+```
+Upload PDF → Storage → MySQL (pending) → FileWorker → Qdrant → MySQL (ready)
+                                                          ↓
+User Query → MySQL (queued) → QueryWorker → Search Qdrant → Gemini → Result
+```
+
+## Module Map
+
+| Module | Purpose | Key Functions |
+|--------|---------|---------------|
+| `main.rs` | Entry point | Spawns workers, serves API |
+| `db.rs` | Database init | Creates files/queries tables |
+| `api.rs` | HTTP endpoints | Upload, list, delete, query CRUD |
+| `file_worker.rs` | File analysis | Flash→Pro→embed→upsert |
+| `worker.rs` | Query processing | Search→relationships→answer |
+| `gemini_client.rs` | AI integration | Text generation, embeddings |
+| `vector_db.rs` | Qdrant client | Upsert, search, delete |
+| `storage.rs` | File management | Save/delete files |
+| `models.rs` | Data structures | FileRecord, QueryRecord |
+
+## API Endpoints
+
+### Files
+- `POST /api/files` - Upload file
+- `POST /api/files/import-demo?force=1` - Bulk import demo PDFs
+- `GET /api/files/list` - List all files with status
+- `GET /api/files/delete?id=<uuid>` - Delete file
+
+### Queries
+- `POST /api/query/create` - Create query
+- `GET /api/query/status?id=<uuid>` - Check status
+- `GET /api/query/result?id=<uuid>` - Get result
+- `GET /api/query/cancel?id=<uuid>` - Cancel query
+
+## Database Schema
+
+### files
+- `id` - UUID primary key
+- `filename` - Original filename
+- `path` - Storage path
+- `description` - Gemini Flash description
+- `pending_analysis` - FALSE when ready for search
+- `analysis_status` - Queued/InProgress/Completed/Failed
+
+### queries
+- `id` - UUID primary key
+- `status` - Queued/InProgress/Completed/Cancelled/Failed
+- `payload` - JSON query params `{"q": "...", "top_k": 5}`
+- `result` - JSON result `{"summary": "...", "related_files": [...], "relationships": "...", "final_answer": "..."}`
+
+## Environment Variables
+
+### Required
+- `GEMINI_API_KEY` - Gemini API key
+- `DATABASE_URL` - MySQL connection string
+- `QDRANT_URL` - Qdrant URL (default: http://qdrant:6333)
+
+### Optional
+- `ASTRA_STORAGE` - Storage directory (default: /app/storage)
+- `DEMO_DATA_DIR` - Demo data directory (default: /app/demo-data)
+- `GEMINI_MODEL` - Override Gemini model (default: gemini-1.5-pro)
+
+## Worker States
+
+### FileWorker
+1. **Queued** - File uploaded, awaiting processing
+2. **InProgress** - Currently being analyzed
+3. **Completed** - Ready for search (pending_analysis=FALSE)
+4. **Failed** - Error during processing
+
+### QueryWorker
+1. **Queued** - Query created, awaiting processing
+2. **InProgress** - Currently searching/analyzing
+3. **Completed** - Result available
+4. **Cancelled** - User cancelled
+5. **Failed** - Error during processing
+
+## Gemini Prompts
+
+### FileWorker Stage 1 (Flash)
+```
+Describe the file '{filename}' and extract all key components, keywords, 
+and details for later vectorization. Be comprehensive and factual.
+```
+
+### FileWorker Stage 2 (Pro)
+```
+Given the file '{filename}' and its description: {desc}
+Generate a set of vector graph data (keywords, use cases, relationships) 
+that can be used for broad and precise search. Only include what is 
+directly supported by the file.
+```
+
+### QueryWorker Stage 4 (Relationships)
+```
+You are an assistant analyzing relationships STRICTLY within the provided files.
+Query: {query}
+Files: {file_list}
+Tasks:
+1) Summarize key details from the files relevant to the query.
+2) Describe relationships and linkages strictly supported by these files.
+3) List important follow-up questions that could be answered only using the provided files.
+Rules: Do NOT guess or invent. If information is insufficient in the files, explicitly state that.
+```
+
+### QueryWorker Stage 5 (Final Answer)
+```
+You are to compose a final answer to the user query using only the information from the files.
+Query: {query}
+Files considered: {file_list}
+Relationship analysis: {relationships}
+Requirements:
+- Use only information present in the files and analysis above.
+- If the answer is uncertain or cannot be determined from the files, clearly state that limitation.
+- Avoid speculation or assumptions.
+Provide a concise, structured answer.
+```
+
+## Docker Architecture
+
+### Services
+- **rust-engine** - Warp API + workers (port 8000)
+- **web-app** - Express + React (port 3001)
+- **mysql** - MySQL 9.1 (port 3306)
+- **qdrant** - Qdrant vector DB (port 6333)
+- **phpmyadmin** - DB admin UI (port 8080)
+
+### Volumes (Production)
+- `rust-storage:/app/storage` - File storage (writable)
+- `/var/www/codered-astra/rust-engine/demo-data:/app/demo-data:ro` - Demo PDFs (read-only)
+- `~/astra-logs:/var/log` - Log files
+
+## Common Issues
+
+### 1. SQL Syntax Error
+**Problem**: `error near 'CREATE TABLE'`
+**Cause**: Multiple CREATE TABLE in one query
+**Fix**: Split into separate `sqlx::query()` calls
+
+### 2. Permission Denied
+**Problem**: `Permission denied (os error 13)`
+**Cause**: Container user can't write to storage
+**Fix**: Use Docker volume, ensure ownership matches container user
+
+### 3. Worker Not Processing
+**Problem**: Files/queries stuck in Queued
+**Cause**: Worker crashed or not started
+**Fix**: Check logs, ensure workers spawned in main.rs
+
+### 4. Qdrant Connection Failed
+**Problem**: `qdrant upsert/search failed`
+**Cause**: Qdrant not running or wrong URL
+**Fix**: Verify QDRANT_URL, check qdrant container health
+
+## Development Commands
+
+```bash
+# Build and run locally
+cd rust-engine
+cargo build
+cargo run
+
+# Check code
+cargo check
+
+# Run with logs
+RUST_LOG=info cargo run
+
+# Docker compose (dev)
+docker-compose up --build
+
+# Docker compose (production)
+docker-compose -f docker-compose.prod.yml up -d
+
+# View logs
+docker logs rust-engine -f
+
+# Rebuild single service
+docker-compose build rust-engine
+docker-compose up -d rust-engine
+```
+
+## Testing Flow
+
+1. Start services: `docker-compose up -d`
+2. Import demo data: `curl -X POST http://localhost:3001/api/files/import-demo`
+3. Wait for FileWorker to complete (~30 seconds for 20 files)
+4. Check file status: `curl http://localhost:3001/api/files/list`
+5. Create query: `curl -X POST http://localhost:3001/api/query/create -H "Content-Type: application/json" -d '{"q": "ISS main bus voltage", "top_k": 5}'`
+6. Check status: `curl http://localhost:3001/api/query/status?id=<id>`
+7. Get result: `curl http://localhost:3001/api/query/result?id=<id>`
+
+## Performance Notes
+
+- FileWorker: ~1-2 sec per file (demo embeddings)
+- QueryWorker: ~3-5 sec per query (search + 2 Gemini calls)
+- Qdrant search: <100ms for 1000s of vectors
+- MySQL queries: <10ms for simple selects
+
+## Security Considerations
+
+- Store GEMINI_API_KEY in GitHub Secrets (production)
+- Use environment variables for all credentials
+- Don't commit `.env` files
+- Restrict phpmyadmin to internal network only
+- Use HTTPS in production deployment
--- a/rust-engine/src/api.rs
+++ b/rust-engine/src/api.rs
@ -1,9 +1,8 @@
-use crate::gemini_client;
 use crate::vector_db::QdrantClient;
 use crate::storage;
 use anyhow::Result;
 use bytes::Buf;
-use futures_util::{StreamExt, TryStreamExt};
+use futures_util::TryStreamExt;
 use serde::Deserialize;
 use sqlx::{MySqlPool, Row};
 use warp::{multipart::FormData, Filter, Rejection, Reply};
@ -80,10 +79,7 @@ pub fn routes(pool: MySqlPool) -> impl Filter<Extract = impl Reply, Error = Reje
 }

 async fn handle_upload(mut form: FormData, pool: MySqlPool) -> Result<impl Reply, Rejection> {
-    // qdrant client
-    let qdrant_url = std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://qdrant:6333".to_string());
-    let qdrant = QdrantClient::new(&qdrant_url);
-
+    let mut created_files = Vec::new();
    while let Some(field) = form.try_next().await.map_err(|_| warp::reject())? {
    let _name = field.name().to_string();
        let filename = field
@ -116,7 +112,7 @@ async fn handle_upload(mut form: FormData, pool: MySqlPool) -> Result<impl Reply

        // Insert file record with pending_analysis = true, description = NULL
        let id = uuid::Uuid::new_v4().to_string();
-        sqlx::query("INSERT INTO files (id, filename, path, description, pending_analysis) VALUES (?, ?, ?, ?, ?)")
+        sqlx::query("INSERT INTO files (id, filename, path, description, pending_analysis, analysis_status) VALUES (?, ?, ?, ?, ?, 'Queued')")
            .bind(&id)
            .bind(&filename)
            .bind(path.to_str().unwrap())
@ -128,10 +124,18 @@ async fn handle_upload(mut form: FormData, pool: MySqlPool) -> Result<impl Reply
                tracing::error!("DB insert error: {}", e);
                warp::reject()
            })?;
-        // Enqueue worker task to process file (to be implemented)
+        created_files.push(serde_json::json!({
+            "id": id,
+            "filename": filename,
+            "pending_analysis": true,
+            "analysis_status": "Queued"
+        }));
    }

-    Ok(warp::reply::json(&serde_json::json!({"success": true})))
+    Ok(warp::reply::json(&serde_json::json!({
+        "uploaded": created_files.len(),
+        "files": created_files
+    })))
 }

 async fn handle_import_demo(params: std::collections::HashMap<String, String>, pool: MySqlPool) -> Result<impl Reply, Rejection> {
@ -209,7 +213,7 @@ async fn handle_delete(q: DeleteQuery, pool: MySqlPool) -> Result<impl Reply, Re
        let _ = storage::delete_file(std::path::Path::new(&path));
        // Remove from Qdrant
        let qdrant_url = std::env::var("QDRANT_URL").unwrap_or_else(|_| "http://qdrant:6333".to_string());
-        let qdrant = crate::vector_db::QdrantClient::new(&qdrant_url);
+        let qdrant = QdrantClient::new(&qdrant_url);
        let _ = qdrant.delete_point(&q.id).await;
        let _ = sqlx::query("DELETE FROM files WHERE id = ?").bind(&q.id).execute(&pool).await;
        return Ok(warp::reply::json(&serde_json::json!({"deleted": true})));
--- a/rust-engine/src/db.rs
+++ b/rust-engine/src/db.rs
@ -1,10 +1,11 @@
-use sqlx::{MySql, MySqlPool};
+use sqlx::MySqlPool;
 use tracing::info;

 pub async fn init_db(database_url: &str) -> Result<MySqlPool, sqlx::Error> {
    let pool = MySqlPool::connect(database_url).await?;

    // Create tables if they don't exist. Simple schema for demo/hackathon use.
+    // Note: MySQL requires separate statements for each CREATE TABLE
    sqlx::query(
        r#"
        CREATE TABLE IF NOT EXISTS files (
@ -15,8 +16,14 @@ pub async fn init_db(database_url: &str) -> Result<MySqlPool, sqlx::Error> {
            created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
            pending_analysis BOOLEAN DEFAULT TRUE,
            analysis_status VARCHAR(32) DEFAULT 'Queued'
-        );
+        )
+        "#,
+    )
+    .execute(&pool)
+    .await?;

+    sqlx::query(
+        r#"
        CREATE TABLE IF NOT EXISTS queries (
            id VARCHAR(36) PRIMARY KEY,
            status VARCHAR(32) NOT NULL,
@ -24,7 +31,7 @@ pub async fn init_db(database_url: &str) -> Result<MySqlPool, sqlx::Error> {
            result JSON,
            created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
            updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
-        );
+        )
        "#,
    )
    .execute(&pool)
--- a/rust-engine/src/file_worker.rs
+++ b/rust-engine/src/file_worker.rs
@ -1,4 +1,5 @@
-use crate::gemini_client::{generate_text, demo_text_embedding, DEMO_EMBED_DIM};
+use crate::gemini_client::{demo_text_embedding, generate_text_with_model, DEMO_EMBED_DIM};
+use crate::vector;
 use crate::vector_db::QdrantClient;
 use sqlx::MySqlPool;
 use anyhow::Result;
@ -27,6 +28,9 @@ impl FileWorker {
                    info!("Processing file {}", fid);
                    if let Err(e) = self.process_file(&fid).await {
                        error!("Error processing file {}: {}", fid, e);
+                        if let Err(mark_err) = self.mark_failed(&fid, &format!("{}", e)).await {
+                            error!("Failed to mark file {} as failed: {}", fid, mark_err);
+                        }
                    }
                }
                Ok(None) => {
@ -67,11 +71,17 @@ impl FileWorker {
            .fetch_one(&self.pool)
            .await?;
    let filename: String = row.get("filename");
-        let path: String = row.get("path");
+    let _path: String = row.get("path");

        // Stage 1: Gemini 2.5 Flash for description
-        std::env::set_var("GEMINI_MODEL", "gemini-1.5-flash");
-        let desc = generate_text(&format!("Describe the file '{filename}' and extract all key components, keywords, and details for later vectorization. Be comprehensive and factual.")).await.unwrap_or_else(|e| format!("[desc error: {}]", e));
+        let desc = generate_text_with_model(
+            "gemini-2.5-flash",
+            &format!(
+                "Describe the file '{filename}' and extract all key components, keywords, and details for later vectorization. Be comprehensive and factual."
+            ),
+        )
+        .await
+        .unwrap_or_else(|e| format!("[desc error: {}]", e));
        sqlx::query("UPDATE files SET description = ?, analysis_status = 'InProgress' WHERE id = ?")
            .bind(&desc)
            .bind(file_id)
@ -79,12 +89,26 @@ impl FileWorker {
            .await?;

        // Stage 2: Gemini 2.5 Pro for deep vector graph data
-        std::env::set_var("GEMINI_MODEL", "gemini-1.5-pro");
-        let vector_graph = generate_text(&format!("Given the file '{filename}' and its description: {desc}\nGenerate a set of vector graph data (keywords, use cases, relationships) that can be used for broad and precise search. Only include what is directly supported by the file.")).await.unwrap_or_else(|e| format!("[vector error: {}]", e));
+        let vector_graph = generate_text_with_model(
+            "gemini-2.5-pro",
+            &format!(
+                "Given the file '{filename}' and its description: {desc}\nGenerate a set of vector graph data (keywords, use cases, relationships) that can be used for broad and precise search. Only include what is directly supported by the file."
+            ),
+        )
+        .await
+        .unwrap_or_else(|e| format!("[vector error: {}]", e));

        // Stage 3: Embed and upsert to Qdrant
        let emb = demo_text_embedding(&vector_graph).await?;
-        self.qdrant.upsert_point(file_id, emb).await?;
+        match self.qdrant.upsert_point(file_id, emb.clone()).await {
+            Ok(_) => {
+                let _ = vector::store_embedding(file_id, emb.clone());
+            }
+            Err(err) => {
+                error!("Qdrant upsert failed for {}: {}", file_id, err);
+                let _ = vector::store_embedding(file_id, emb);
+            }
+        }

        // Mark file as ready
        sqlx::query("UPDATE files SET pending_analysis = FALSE, analysis_status = 'Completed' WHERE id = ?")
@ -93,4 +117,17 @@ impl FileWorker {
            .await?;
        Ok(())
    }
+
+    async fn mark_failed(&self, file_id: &str, reason: &str) -> Result<()> {
+        sqlx::query("UPDATE files SET analysis_status = 'Failed', pending_analysis = TRUE WHERE id = ?")
+            .bind(file_id)
+            .execute(&self.pool)
+            .await?;
+        sqlx::query("UPDATE files SET description = COALESCE(description, ?) WHERE id = ?")
+            .bind(format!("[analysis failed: {}]", reason))
+            .bind(file_id)
+            .execute(&self.pool)
+            .await?;
+        Ok(())
+    }
 }
--- a/rust-engine/src/gemini_client.rs
+++ b/rust-engine/src/gemini_client.rs
@ -1,30 +1,11 @@
 use anyhow::Result;
-use serde::{Deserialize, Serialize};
-use serde_json::json;
 use reqwest::Client;
+use serde::Deserialize;
+use serde_json::json;

-// NOTE: This is a small stub to represent where you'd call the Gemini API.
-// Replace with real API call and proper auth handling for production.
-
-#[derive(Debug, Deserialize)]
-pub struct GeminiTokenResponse {
-    pub token: String,
-}
-
-pub async fn generate_token_for_file(_path: &str) -> Result<String> {
-    Ok("gemini-token-placeholder".to_string())
-}
-
-/// Demo embedding generator - deterministic pseudo-embedding from filename/path
-pub fn demo_embedding_from_path(path: &str) -> Vec<f32> {
-    // Very simple: hash bytes into a small vector
-    let mut v = vec![0f32; 64];
-    for (i, b) in path.as_bytes().iter().enumerate() {
-        let idx = i % v.len();
-        v[idx] += (*b as f32) / 255.0;
-    }
-    v
-}
+// NOTE: This file provides lightweight helpers around the Gemini API. For the
+// hackathon demo we fall back to deterministic strings when the API key is not
+// configured so the flows still work end-to-end.

 pub const DEMO_EMBED_DIM: usize = 64;

@ -38,16 +19,27 @@ pub async fn demo_text_embedding(text: &str) -> Result<Vec<f32>> {
    Ok(v)
 }

-/// Generate text with Gemini (Generative Language API). Falls back to a demo string if GEMINI_API_KEY is not set.
+/// Generate text using the default model (GEMINI_MODEL or gemini-2.5-pro).
+#[allow(dead_code)]
 pub async fn generate_text(prompt: &str) -> Result<String> {
+    let model = std::env::var("GEMINI_MODEL").unwrap_or_else(|_| "gemini-2.5-pro".to_string());
+    generate_text_with_model(&model, prompt).await
+}
+
+/// Generate text with an explicit Gemini model. Falls back to a deterministic
+/// response when the API key is not set so the demo still runs.
+pub async fn generate_text_with_model(model: &str, prompt: &str) -> Result<String> {
    let api_key = match std::env::var("GEMINI_API_KEY") {
        Ok(k) if !k.is_empty() => k,
        _ => {
-            return Ok(format!("[demo] Gemini not configured. Prompt preview: {}", truncate(prompt, 240)));
+            return Ok(format!(
+                "[demo] Gemini ({}) not configured. Prompt preview: {}",
+                model,
+                truncate(prompt, 240)
+            ));
        }
    };

-    let model = std::env::var("GEMINI_MODEL").unwrap_or_else(|_| "gemini-1.5-pro".to_string());
    let url = format!(
        "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent?key={}",
        model, api_key
@ -62,7 +54,12 @@ pub async fn generate_text(prompt: &str) -> Result<String> {
    let status = resp.status();
    let txt = resp.text().await?;
    if !status.is_success() {
-        return Ok(format!("[demo] Gemini error {}: {}", status, truncate(&txt, 240)));
+        return Ok(format!(
+            "[demo] Gemini ({}) error {}: {}",
+            model,
+            status,
+            truncate(&txt, 240)
+        ));
    }

    #[derive(Deserialize)]
@ -84,5 +81,9 @@ pub async fn generate_text(prompt: &str) -> Result<String> {
 }

 fn truncate(s: &str, max: usize) -> String {
-    if s.len() <= max { s.to_string() } else { format!("{}…", &s[..max]) }
+    if s.len() <= max {
+        s.to_string()
+    } else {
+        format!("{}…", &s[..max])
+    }
 }
--- a/rust-engine/src/models.rs
+++ b/rust-engine/src/models.rs
@ -14,6 +14,7 @@ pub struct FileRecord {
 }

 impl FileRecord {
+    #[allow(dead_code)]
    pub fn new(filename: impl Into<String>, path: impl Into<String>, description: Option<String>) -> Self {
        Self {
            id: Uuid::new_v4().to_string(),
--- a/rust-engine/src/worker.rs
+++ b/rust-engine/src/worker.rs
@ -1,5 +1,6 @@
-use crate::gemini_client::{demo_text_embedding, DEMO_EMBED_DIM, generate_text};
+use crate::gemini_client::{demo_text_embedding, generate_text_with_model, DEMO_EMBED_DIM};
 use crate::models::{QueryRecord, QueryStatus};
+use crate::vector;
 use crate::vector_db::QdrantClient;
 use anyhow::Result;
 use sqlx::MySqlPool;
@ -86,13 +87,32 @@ impl Worker {
        let text = q.payload.get("q").and_then(|v| v.as_str()).unwrap_or("");
        let emb = demo_text_embedding(text).await?;
        let top_k = q.payload.get("top_k").and_then(|v| v.as_u64()).unwrap_or(5) as usize;
+        let top_k = top_k.max(1).min(20);

        // Check cancellation
        if self.is_cancelled(&q.id).await? { return Ok(()); }

        // Stage 3: search top-K in Qdrant
-    let hits = self.qdrant.search_top_k(emb, top_k).await.unwrap_or_default();
-    let top_ids: Vec<String> = hits.iter().map(|(id, _)| id.clone()).collect();
+        let hits = match self.qdrant.search_top_k(emb.clone(), top_k).await {
+            Ok(list) if !list.is_empty() => list,
+            Ok(_) => Vec::new(),
+            Err(err) => {
+                error!("Qdrant search failed for query {}: {}", q.id, err);
+                Vec::new()
+            }
+        };
+
+        let hits = if hits.is_empty() {
+            match vector::query_top_k(&emb, top_k) {
+                Ok(fallback_ids) if !fallback_ids.is_empty() => {
+                    info!("Using in-memory fallback for query {}", q.id);
+                    fallback_ids.into_iter().map(|id| (id, 0.0)).collect()
+                }
+                _ => Vec::new(),
+            }
+        } else {
+            hits
+        };

        // Check cancellation
        if self.is_cancelled(&q.id).await? { return Ok(()); }
@ -117,11 +137,23 @@ impl Worker {

        // Stage 5: call Gemini to analyze relationships and propose follow-up details strictly from provided files
        let relationships_prompt = build_relationships_prompt(text, &files_json);
-        let relationships = generate_text(&relationships_prompt).await.unwrap_or_else(|e| format!("[demo] relationships error: {}", e));
+        let (relationships, final_answer) = if files_json.is_empty() {
+            (
+                "No analyzed files are ready yet. Try seeding demo data or wait for processing to finish.".to_string(),
+                "I could not find any relevant documents yet. Once files finish analysis I will be able to answer.".to_string(),
+            )
+        } else {
+            let relationships = generate_text_with_model("gemini-2.5-pro", &relationships_prompt)
+                .await
+                .unwrap_or_else(|e| format!("[demo] relationships error: {}", e));

            // Stage 6: final answer synthesis with strict constraints (no speculation; say unknown when insufficient)
            let final_prompt = build_final_answer_prompt(text, &files_json, &relationships);
-        let final_answer = generate_text(&final_prompt).await.unwrap_or_else(|e| format!("[demo] final answer error: {}", e));
+            let final_answer = generate_text_with_model("gemini-2.5-pro", &final_prompt)
+                .await
+                .unwrap_or_else(|e| format!("[demo] final answer error: {}", e));
+            (relationships, final_answer)
+        };

        // Stage 7: persist results
        let result = serde_json::json!({
--- a/web-app/package.json
+++ b/web-app/package.json
@ -16,7 +16,7 @@
    "@google/genai": "^1.25.0",
    "@tailwindcss/postcss": "^4.1.14",
  "@tailwindcss/vite": "^4.1.14",
-    "@vitejs/plugin-react": "^5.0.4",
+  "@vitejs/plugin-react-swc": "^3.7.0",
    "bootstrap": "^5.3.8",
    "bootstrap-icons": "^1.13.1",
    "class-variance-authority": "^0.7.1",
@ -26,6 +26,7 @@
    "helmet": "^8.1.0",
    "lucide-react": "^0.546.0",
    "motion": "^12.23.24",
+    "node-fetch": "^3.3.2",
    "pg": "^8.16.3",
    "react": "^19.2.0",
    "react-bootstrap": "^2.10.10",
--- a/web-app/server.mjs
+++ b/web-app/server.mjs
@ -2,25 +2,35 @@ import express from 'express';
 import path from 'node:path';
 import helmet from 'helmet';
 import cors from 'cors';
+import fetch from 'node-fetch';
 import { fileURLToPath } from 'node:url';

 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);

 const app = express();
-const PORT = process.env.PORT || 3000;
-const RUST_ENGINE_BASE = process.env.RUST_ENGINE_BASE || 'http://rust-engine:8000';
+const PORT = Number(process.env.PORT) || 3000;
+const HOST = process.env.HOST || '0.0.0.0';
+const RUST_ENGINE_BASE =
+  process.env.RUST_ENGINE_BASE ||
+  process.env.RUST_ENGINE_URL ||
+  'http://rust-engine:8000';

-app.use(helmet());
+app.set('trust proxy', true);
+app.use(helmet({ contentSecurityPolicy: false }));
 app.use(cors());
 app.use(express.json());

+app.get('/api/healthz', (_req, res) => {
+  res.json({ status: 'ok', upstream: RUST_ENGINE_BASE });
+});
+
 // Proxy minimal API needed by the UI to the rust-engine container
 app.post('/api/files/import-demo', async (req, res) => {
  try {
    const qs = req.url.includes('?') ? req.url.substring(req.url.indexOf('?')) : '';
    const url = `${RUST_ENGINE_BASE}/api/files/import-demo${qs}`;
-    const upstream = await fetch(url, { method: 'POST' });
+    const upstream = await fetch(url, { method: 'POST', headers: { 'content-type': 'application/json' }, body: req.body ? JSON.stringify(req.body) : undefined });
    const text = await upstream.text();
    res.status(upstream.status).type(upstream.headers.get('content-type') || 'application/json').send(text);
  } catch (err) {
@ -38,7 +48,7 @@ app.get('*', (req, res) => {
  res.sendFile(path.join(distDir, 'index.html'));
 });

-app.listen(PORT, '0.0.0.0', () => {
-  console.log(`Web app server listening on http://0.0.0.0:${PORT}`);
+app.listen(PORT, HOST, () => {
+  console.log(`Web app server listening on http://${HOST}:${PORT}`);
  console.log(`Proxying to rust engine at ${RUST_ENGINE_BASE}`);
 });
--- a/web-app/vite.config.js
+++ b/web-app/vite.config.js
@ -1,5 +1,5 @@
 import { defineConfig } from "vite";
-import react from "@vitejs/plugin-react";
+import react from "@vitejs/plugin-react-swc";
 import jsconfigPaths from "vite-jsconfig-paths";
 import tailwindcss from "@tailwindcss/vite";