Implement file information processing queue system and Vector Graph preperation
This commit is contained in:
parent
8cda296143
commit
381b7b8858
9 changed files with 350 additions and 36 deletions
|
|
@ -1,4 +1,4 @@
|
|||
use crate::gemini_client::{demo_text_embedding, DEMO_EMBED_DIM};
|
||||
use crate::gemini_client::{demo_text_embedding, DEMO_EMBED_DIM, generate_text};
|
||||
use crate::models::{QueryRecord, QueryStatus};
|
||||
use crate::vector_db::QdrantClient;
|
||||
use anyhow::Result;
|
||||
|
|
@ -84,21 +84,51 @@ impl Worker {
|
|||
|
||||
// Stage 2: embed query text
|
||||
let text = q.payload.get("q").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let emb = demo_text_embedding(text).await?;
|
||||
let emb = demo_text_embedding(text).await?;
|
||||
let top_k = q.payload.get("top_k").and_then(|v| v.as_u64()).unwrap_or(5) as usize;
|
||||
|
||||
// Check cancellation
|
||||
if self.is_cancelled(&q.id).await? { return Ok(()); }
|
||||
|
||||
// Stage 3: search top-K in Qdrant
|
||||
let top_ids = self.qdrant.search_top_k(emb, 5).await.unwrap_or_default();
|
||||
let hits = self.qdrant.search_top_k(emb, top_k).await.unwrap_or_default();
|
||||
let top_ids: Vec<String> = hits.iter().map(|(id, _)| id.clone()).collect();
|
||||
|
||||
// Check cancellation
|
||||
if self.is_cancelled(&q.id).await? { return Ok(()); }
|
||||
|
||||
// Stage 4: persist results
|
||||
// Stage 4: fetch file metadata for IDs
|
||||
let mut files_json = Vec::new();
|
||||
for (fid, score) in hits {
|
||||
if let Some(row) = sqlx::query("SELECT id, filename, path, description FROM files WHERE id = ? AND pending_analysis = FALSE")
|
||||
.bind(&fid)
|
||||
.fetch_optional(&self.pool)
|
||||
.await? {
|
||||
use sqlx::Row;
|
||||
let id: String = row.get("id");
|
||||
let filename: String = row.get("filename");
|
||||
let path: String = row.get("path");
|
||||
let description: Option<String> = row.get("description");
|
||||
files_json.push(serde_json::json!({
|
||||
"id": id, "filename": filename, "path": path, "description": description, "score": score
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 5: call Gemini to analyze relationships and propose follow-up details strictly from provided files
|
||||
let relationships_prompt = build_relationships_prompt(text, &files_json);
|
||||
let relationships = generate_text(&relationships_prompt).await.unwrap_or_else(|e| format!("[demo] relationships error: {}", e));
|
||||
|
||||
// Stage 6: final answer synthesis with strict constraints (no speculation; say unknown when insufficient)
|
||||
let final_prompt = build_final_answer_prompt(text, &files_json, &relationships);
|
||||
let final_answer = generate_text(&final_prompt).await.unwrap_or_else(|e| format!("[demo] final answer error: {}", e));
|
||||
|
||||
// Stage 7: persist results
|
||||
let result = serde_json::json!({
|
||||
"summary": format!("Found {} related files", top_ids.len()),
|
||||
"related_file_ids": top_ids,
|
||||
"summary": format!("Found {} related files", files_json.len()),
|
||||
"related_files": files_json,
|
||||
"relationships": relationships,
|
||||
"final_answer": final_answer,
|
||||
});
|
||||
sqlx::query("UPDATE queries SET status = 'Completed', result = ? WHERE id = ?")
|
||||
.bind(result)
|
||||
|
|
@ -158,3 +188,47 @@ impl Worker {
|
|||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
fn build_relationships_prompt(query: &str, files: &Vec<serde_json::Value>) -> String {
|
||||
let files_snippets: Vec<String> = files.iter().map(|f| format!(
|
||||
"- id: {id}, filename: {name}, path: {path}, desc: {desc}",
|
||||
id=f.get("id").and_then(|v| v.as_str()).unwrap_or(""),
|
||||
name=f.get("filename").and_then(|v| v.as_str()).unwrap_or(""),
|
||||
path=f.get("path").and_then(|v| v.as_str()).unwrap_or(""),
|
||||
desc=f.get("description").and_then(|v| v.as_str()).unwrap_or("")
|
||||
)).collect();
|
||||
format!(
|
||||
"You are an assistant analyzing relationships STRICTLY within the provided files.\n\
|
||||
Query: {query}\n\
|
||||
Files:\n{files}\n\
|
||||
Tasks:\n\
|
||||
1) Summarize key details from the files relevant to the query.\n\
|
||||
2) Describe relationships and linkages strictly supported by these files.\n\
|
||||
3) List important follow-up questions that could be answered only using the provided files.\n\
|
||||
Rules: Do NOT guess or invent. If information is insufficient in the files, explicitly state that.",
|
||||
query=query,
|
||||
files=files_snippets.join("\n")
|
||||
)
|
||||
}
|
||||
|
||||
fn build_final_answer_prompt(query: &str, files: &Vec<serde_json::Value>, relationships: &str) -> String {
|
||||
let files_short: Vec<String> = files.iter().map(|f| format!(
|
||||
"- {name} ({id})",
|
||||
id=f.get("id").and_then(|v| v.as_str()).unwrap_or(""),
|
||||
name=f.get("filename").and_then(|v| v.as_str()).unwrap_or("")
|
||||
)).collect();
|
||||
format!(
|
||||
"You are to compose a final answer to the user query using only the information from the files.\n\
|
||||
Query: {query}\n\
|
||||
Files considered:\n{files}\n\
|
||||
Relationship analysis:\n{rels}\n\
|
||||
Requirements:\n\
|
||||
- Use only information present in the files and analysis above.\n\
|
||||
- If the answer is uncertain or cannot be determined from the files, clearly state that limitation.\n\
|
||||
- Avoid speculation or assumptions.\n\
|
||||
Provide a concise, structured answer.",
|
||||
query=query,
|
||||
files=files_short.join("\n"),
|
||||
rels=relationships
|
||||
)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue