Document Question Answering#
This example demonstrates how to create an agent that can answer questions about documents using the file input capability of LLMs like GPT-4o.
Document QA Tool#
First, we’ll create a tool that can load and process documents:
from brain.agents.tool import tool
from brain.agents.models import LocalFile
from pydantic import BaseModel
from typing import List, Optional
import os
class DocumentReference(BaseModel):
filename: str
page_number: Optional[int] = None
class DocumentQuery(BaseModel):
query: str
document_references: List[DocumentReference]
use_summarization: bool = False
class DocumentAnswer(BaseModel):
answer: str
sources: List[DocumentReference]
@tool()
async def process_document_query(query: DocumentQuery) -> DocumentAnswer:
"""
Process a query against one or more documents
Args:
query: Document query with references to files to search
Returns:
An answer to the query with source references
"""
# Collect file references
files = []
for ref in query.document_references:
filepath = os.path.join("documents", ref.filename)
if not os.path.exists(filepath):
return DocumentAnswer(
answer=f"Error: Document {ref.filename} not found.",
sources=[]
)
files.append(LocalFile(name=filepath))
# In a real implementation, you might use specialized document processing
# For this example, we'll rely on the LLM to process the document directly
# Create a message to send to the LLM
message_content = f"Question: {query.query}\n\n"
message_content += "Please analyze the provided documents to answer this question."
if query.use_summarization:
message_content += " Provide a concise summary."
# Note: In a real implementation, you would use the LLM to process the documents
# and extract the answer. This is a simplified example.
# Mock response for example purposes
return DocumentAnswer(
answer=f"This is a mock answer to the query: {query.query}. In a real implementation, " +
"this would be the result of processing the documents with an LLM.",
sources=query.document_references
)
Setting Up the Document QA Agent#
Now, let’s create an agent that uses the document processing tool:
import asyncio
import os
from brain.agents.agent import Agent
from brain.agents.llm.openai import OpenAIBaseLLM
from brain.agents.models import LocalFile, Message
from brain.agents.callback import callback
@callback("message_stream.assistant")
async def stream_to_console(agent, event, stream):
print("\nAssistant: ", end="", flush=True)
async for chunk in stream:
if hasattr(chunk, "chunk"):
print(chunk.chunk, end="", flush=True)
async def direct_document_qa(llm, query, document_path):
"""
Process a document query directly using the LLM's file processing capability
"""
# Check if file exists
if not os.path.exists(document_path):
return f"Error: Document not found at {document_path}"
# Create a file reference
document_file = LocalFile(name=document_path)
# Create messages for the LLM
messages = [
Message(
role="user",
content=f"Please analyze the attached document and answer this question: {query}"
)
]
# Generate response with the document
response = await llm.generate_messages(
messages=messages,
files=[document_file],
model="gpt-4o" # Use a model that supports file input
)
# Return the response content
return response[0].content
async def main():
# Initialize the LLM
llm = OpenAIBaseLLM(
api_key=os.environ.get("OPENAI_API_KEY"),
default_model="gpt-4o" # Using a model that supports file input
)
# Create the agent with the document processing tool
agent = Agent(
llm=llm,
tools=[process_document_query],
instructions="""
You are a document analysis assistant. You can answer questions about documents
by using the process_document_query tool.
For simple document queries, you can also directly analyze documents that are
uploaded by the user.
""",
callbacks=[stream_to_console]
)
# Example document path (you would need to create this directory and add documents)
document_dir = "documents"
os.makedirs(document_dir, exist_ok=True)
# Run a conversation loop
print("Document QA Agent (type 'exit' to quit)")
print(f"Available documents will be looked for in the '{document_dir}' directory")
while True:
# Get user input
user_input = input("\nYou: ")
if user_input.lower() == "exit":
break
# Check if user has explicitly mentioned a file to analyze directly
if user_input.startswith("Analyze document:"):
parts = user_input.split(":", 2)
if len(parts) == 3:
document_name = parts[1].strip()
query = parts[2].strip()
document_path = os.path.join(document_dir, document_name)
# Use direct document analysis
response = await direct_document_qa(llm, query, document_path)
print(f"\nAssistant: {response}")
continue
# Process with the agent
await agent.run(user_input)
print() # Add a newline after the response
if __name__ == "__main__":
asyncio.run(main())
Complete Example#
For the complete implementation, the code would be combined and enhanced with error handling and additional features. The approach described above demonstrates the basic structure of a document QA system using Malevich Brain.