Multi-Document Retrieval

Query across multiple compiled documents using the cross-document strategy with graph-based score boosting.

import asyncio
from vectorless import Engine

async def main():
    engine = Engine(
        api_key="sk-...",
        model="gpt-4o",
    )

    # Compile multiple documents
    docs = ["./report-q1.pdf", "./report-q2.pdf", "./report-q3.pdf"]
    doc_ids = []

    for path in docs:
        result = await engine.compile(path=path)
        doc_ids.append(result.doc_id)
        print(f"Compiled: {path} → {result.doc_id}")

    # Check the cross-document graph
    graph = await engine.get_graph()
    if graph:
        print(f"\nGraph: {graph.node_count()} docs, {graph.edge_count()} edges")
        for doc_id in doc_ids:
            neighbors = graph.get_neighbors(doc_id)
            for edge in neighbors:
                print(f"  {doc_id[:8]}... → {edge.target_doc_id[:8]}... ({edge.weight:.2f})")

    # Query across all documents
    response = await engine.ask(
        "Compare quarterly revenue trends",
        doc_ids=doc_ids,
    )

    for item in response.items:
        print(f"\n[{item.doc_id[:8]}...] Confidence: {item.confidence:.2f}")
        print(item.content[:300])

    # Or query entire workspace
    response = await engine.ask(
        "What documents discuss risk factors?",
        workspace_scope=True,
    )

    print(f"\nFound in {len(response.items)} document(s)")

    # Cleanup
    for doc_id in doc_ids:
        await engine.remove_document(doc_id)

asyncio.run(main())

Key Concepts

Document Graph

After compiling, documents are connected in a graph based on shared keywords. The graph enables:

Score boosting — High-confidence results in one document boost neighbor documents
Relationship discovery — Automatically find related documents
Cross-referencing — Results from connected documents are surfaced together

Key Concepts​

Document Graph​

Key Concepts

Document Graph