Document Question Answering#

This example demonstrates how to create an agent that can answer questions about documents using the file input capability of LLMs like GPT-4o.

Document QA Tool#

First, we’ll create a tool that can load and process documents:

from brain.agents.tool import tool
from brain.agents.models import LocalFile
from pydantic import BaseModel
from typing import List, Optional
import os

class DocumentReference(BaseModel):
    filename: str
    page_number: Optional[int] = None

class DocumentQuery(BaseModel):
    query: str
    document_references: List[DocumentReference]
    use_summarization: bool = False

class DocumentAnswer(BaseModel):
    answer: str
    sources: List[DocumentReference]

@tool()
async def process_document_query(query: DocumentQuery) -> DocumentAnswer:
    """
    Process a query against one or more documents

    Args:
        query: Document query with references to files to search

    Returns:
        An answer to the query with source references
    """
    # Collect file references
    files = []

    for ref in query.document_references:
        filepath = os.path.join("documents", ref.filename)
        if not os.path.exists(filepath):
            return DocumentAnswer(
                answer=f"Error: Document {ref.filename} not found.",
                sources=[]
            )

        files.append(LocalFile(name=filepath))

    # In a real implementation, you might use specialized document processing
    # For this example, we'll rely on the LLM to process the document directly

    # Create a message to send to the LLM
    message_content = f"Question: {query.query}\n\n"
    message_content += "Please analyze the provided documents to answer this question."

    if query.use_summarization:
        message_content += " Provide a concise summary."

    # Note: In a real implementation, you would use the LLM to process the documents
    # and extract the answer. This is a simplified example.

    # Mock response for example purposes
    return DocumentAnswer(
        answer=f"This is a mock answer to the query: {query.query}. In a real implementation, " +
               "this would be the result of processing the documents with an LLM.",
        sources=query.document_references
    )

Setting Up the Document QA Agent#

Now, let’s create an agent that uses the document processing tool:

import asyncio
import os
from brain.agents.agent import Agent
from brain.agents.llm.openai import OpenAIBaseLLM
from brain.agents.models import LocalFile, Message
from brain.agents.callback import callback

@callback("message_stream.assistant")
async def stream_to_console(agent, event, stream):
    print("\nAssistant: ", end="", flush=True)
    async for chunk in stream:
        if hasattr(chunk, "chunk"):
            print(chunk.chunk, end="", flush=True)

async def direct_document_qa(llm, query, document_path):
    """
    Process a document query directly using the LLM's file processing capability
    """
    # Check if file exists
    if not os.path.exists(document_path):
        return f"Error: Document not found at {document_path}"

    # Create a file reference
    document_file = LocalFile(name=document_path)

    # Create messages for the LLM
    messages = [
        Message(
            role="user",
            content=f"Please analyze the attached document and answer this question: {query}"
        )
    ]

    # Generate response with the document
    response = await llm.generate_messages(
        messages=messages,
        files=[document_file],
        model="gpt-4o" # Use a model that supports file input
    )

    # Return the response content
    return response[0].content

async def main():
    # Initialize the LLM
    llm = OpenAIBaseLLM(
        api_key=os.environ.get("OPENAI_API_KEY"),
        default_model="gpt-4o"  # Using a model that supports file input
    )

    # Create the agent with the document processing tool
    agent = Agent(
        llm=llm,
        tools=[process_document_query],
        instructions="""
        You are a document analysis assistant. You can answer questions about documents
        by using the process_document_query tool.

        For simple document queries, you can also directly analyze documents that are
        uploaded by the user.
        """,
        callbacks=[stream_to_console]
    )

    # Example document path (you would need to create this directory and add documents)
    document_dir = "documents"
    os.makedirs(document_dir, exist_ok=True)

    # Run a conversation loop
    print("Document QA Agent (type 'exit' to quit)")
    print(f"Available documents will be looked for in the '{document_dir}' directory")

    while True:
        # Get user input
        user_input = input("\nYou: ")
        if user_input.lower() == "exit":
            break

        # Check if user has explicitly mentioned a file to analyze directly
        if user_input.startswith("Analyze document:"):
            parts = user_input.split(":", 2)
            if len(parts) == 3:
                document_name = parts[1].strip()
                query = parts[2].strip()

                document_path = os.path.join(document_dir, document_name)

                # Use direct document analysis
                response = await direct_document_qa(llm, query, document_path)
                print(f"\nAssistant: {response}")
                continue

        # Process with the agent
        await agent.run(user_input)
        print()  # Add a newline after the response

if __name__ == "__main__":
    asyncio.run(main())

Complete Example#

For the complete implementation, the code would be combined and enhanced with error handling and additional features. The approach described above demonstrates the basic structure of a document QA system using Malevich Brain.