Document Question Answering =========================== This example demonstrates how to create an agent that can answer questions about documents using the file input capability of LLMs like GPT-4o. Document QA Tool ----------------- First, we'll create a tool that can load and process documents: .. code-block:: python from brain.agents.tool import tool from brain.agents.models import LocalFile from pydantic import BaseModel from typing import List, Optional import os class DocumentReference(BaseModel): filename: str page_number: Optional[int] = None class DocumentQuery(BaseModel): query: str document_references: List[DocumentReference] use_summarization: bool = False class DocumentAnswer(BaseModel): answer: str sources: List[DocumentReference] @tool() async def process_document_query(query: DocumentQuery) -> DocumentAnswer: """ Process a query against one or more documents Args: query: Document query with references to files to search Returns: An answer to the query with source references """ # Collect file references files = [] for ref in query.document_references: filepath = os.path.join("documents", ref.filename) if not os.path.exists(filepath): return DocumentAnswer( answer=f"Error: Document {ref.filename} not found.", sources=[] ) files.append(LocalFile(name=filepath)) # In a real implementation, you might use specialized document processing # For this example, we'll rely on the LLM to process the document directly # Create a message to send to the LLM message_content = f"Question: {query.query}\n\n" message_content += "Please analyze the provided documents to answer this question." if query.use_summarization: message_content += " Provide a concise summary." # Note: In a real implementation, you would use the LLM to process the documents # and extract the answer. This is a simplified example. # Mock response for example purposes return DocumentAnswer( answer=f"This is a mock answer to the query: {query.query}. In a real implementation, " + "this would be the result of processing the documents with an LLM.", sources=query.document_references ) Setting Up the Document QA Agent -------------------------------- Now, let's create an agent that uses the document processing tool: .. code-block:: python import asyncio import os from brain.agents.agent import Agent from brain.agents.llm.openai import OpenAIBaseLLM from brain.agents.models import LocalFile, Message from brain.agents.callback import callback @callback("message_stream.assistant") async def stream_to_console(agent, event, stream): print("\nAssistant: ", end="", flush=True) async for chunk in stream: if hasattr(chunk, "chunk"): print(chunk.chunk, end="", flush=True) async def direct_document_qa(llm, query, document_path): """ Process a document query directly using the LLM's file processing capability """ # Check if file exists if not os.path.exists(document_path): return f"Error: Document not found at {document_path}" # Create a file reference document_file = LocalFile(name=document_path) # Create messages for the LLM messages = [ Message( role="user", content=f"Please analyze the attached document and answer this question: {query}" ) ] # Generate response with the document response = await llm.generate_messages( messages=messages, files=[document_file], model="gpt-4o" # Use a model that supports file input ) # Return the response content return response[0].content async def main(): # Initialize the LLM llm = OpenAIBaseLLM( api_key=os.environ.get("OPENAI_API_KEY"), default_model="gpt-4o" # Using a model that supports file input ) # Create the agent with the document processing tool agent = Agent( llm=llm, tools=[process_document_query], instructions=""" You are a document analysis assistant. You can answer questions about documents by using the process_document_query tool. For simple document queries, you can also directly analyze documents that are uploaded by the user. """, callbacks=[stream_to_console] ) # Example document path (you would need to create this directory and add documents) document_dir = "documents" os.makedirs(document_dir, exist_ok=True) # Run a conversation loop print("Document QA Agent (type 'exit' to quit)") print(f"Available documents will be looked for in the '{document_dir}' directory") while True: # Get user input user_input = input("\nYou: ") if user_input.lower() == "exit": break # Check if user has explicitly mentioned a file to analyze directly if user_input.startswith("Analyze document:"): parts = user_input.split(":", 2) if len(parts) == 3: document_name = parts[1].strip() query = parts[2].strip() document_path = os.path.join(document_dir, document_name) # Use direct document analysis response = await direct_document_qa(llm, query, document_path) print(f"\nAssistant: {response}") continue # Process with the agent await agent.run(user_input) print() # Add a newline after the response if __name__ == "__main__": asyncio.run(main()) Complete Example ---------------- For the complete implementation, the code would be combined and enhanced with error handling and additional features. The approach described above demonstrates the basic structure of a document QA system using Malevich Brain.