Skip to main content

Multi-Document Retrieval

Query across multiple compiled documents using the cross-document strategy with graph-based score boosting.

import asyncio
from vectorless import Engine

async def main():
engine = Engine(
api_key="sk-...",
model="gpt-4o",
)

# Compile multiple documents
docs = ["./report-q1.pdf", "./report-q2.pdf", "./report-q3.pdf"]
doc_ids = []

for path in docs:
result = await engine.compile(path=path)
doc_ids.append(result.doc_id)
print(f"Compiled: {path}{result.doc_id}")

# Check the cross-document graph
graph = await engine.get_graph()
if graph:
print(f"\nGraph: {graph.node_count()} docs, {graph.edge_count()} edges")
for doc_id in doc_ids:
neighbors = graph.get_neighbors(doc_id)
for edge in neighbors:
print(f" {doc_id[:8]}... → {edge.target_doc_id[:8]}... ({edge.weight:.2f})")

# Query across all documents
response = await engine.ask(
"Compare quarterly revenue trends",
doc_ids=doc_ids,
)

for item in response.items:
print(f"\n[{item.doc_id[:8]}...] Confidence: {item.confidence:.2f}")
print(item.content[:300])

# Or query entire workspace
response = await engine.ask(
"What documents discuss risk factors?",
workspace_scope=True,
)

print(f"\nFound in {len(response.items)} document(s)")

# Cleanup
for doc_id in doc_ids:
await engine.remove_document(doc_id)

asyncio.run(main())

Key Concepts

Document Graph

After compiling, documents are connected in a graph based on shared keywords. The graph enables:

  • Score boosting — High-confidence results in one document boost neighbor documents
  • Relationship discovery — Automatically find related documents
  • Cross-referencing — Results from connected documents are surfaced together