Multi-Document Retrieval
Query across multiple compiled documents using the cross-document strategy with graph-based score boosting.
import asyncio
from vectorless import Engine
async def main():
engine = Engine(
api_key="sk-...",
model="gpt-4o",
)
# Compile multiple documents
docs = ["./report-q1.pdf", "./report-q2.pdf", "./report-q3.pdf"]
doc_ids = []
for path in docs:
result = await engine.compile(path=path)
doc_ids.append(result.doc_id)
print(f"Compiled: {path} → {result.doc_id}")
# Check the cross-document graph
graph = await engine.get_graph()
if graph:
print(f"\nGraph: {graph.node_count()} docs, {graph.edge_count()} edges")
for doc_id in doc_ids:
neighbors = graph.get_neighbors(doc_id)
for edge in neighbors:
print(f" {doc_id[:8]}... → {edge.target_doc_id[:8]}... ({edge.weight:.2f})")
# Query across all documents
response = await engine.ask(
"Compare quarterly revenue trends",
doc_ids=doc_ids,
)
for item in response.items:
print(f"\n[{item.doc_id[:8]}...] Confidence: {item.confidence:.2f}")
print(item.content[:300])
# Or query entire workspace
response = await engine.ask(
"What documents discuss risk factors?",
workspace_scope=True,
)
print(f"\nFound in {len(response.items)} document(s)")
# Cleanup
for doc_id in doc_ids:
await engine.remove_document(doc_id)
asyncio.run(main())
Key Concepts
Document Graph
After compiling, documents are connected in a graph based on shared keywords. The graph enables:
- Score boosting — High-confidence results in one document boost neighbor documents
- Relationship discovery — Automatically find related documents
- Cross-referencing — Results from connected documents are surfaced together