From 90ac5e9e82007571dcde38dec8ed98f960f86165 Mon Sep 17 00:00:00 2001
From: Gaetan Hurel <ghsioux@github.com>
Date: Thu, 26 Jun 2025 14:52:20 +0200
Subject: [PATCH] create simple react agent

---
 simple-react-agent/README.md       |  96 +++++++++++++
 simple-react-agent/log_analyzer.py | 142 +++++++++++++++++++
 simple-react-agent/loghub          |   1 +
 simple-react-agent/main.py         | 213 +++++++++++++++++++++++++++++
 4 files changed, 452 insertions(+)
 create mode 100644 simple-react-agent/README.md
 create mode 100644 simple-react-agent/log_analyzer.py
 create mode 120000 simple-react-agent/loghub
 create mode 100644 simple-react-agent/main.py

diff --git a/simple-react-agent/README.md b/simple-react-agent/README.md
new file mode 100644
index 0000000..132e8e1
--- /dev/null
+++ b/simple-react-agent/README.md
@@ -0,0 +1,96 @@
+# Simple ReAct Agent for Log Analysis
+
+This directory contains a simple ReAct (Reasoning and Acting) agent implementation for log analysis and system administration tasks.
+
+## Overview
+
+The simple ReAct agent follows a straightforward pattern:
+1. **Receives** user input
+2. **Reasons** about what tools to use
+3. **Acts** by executing tools when needed
+4. **Responds** with the final result
+
+## Features
+
+- **Single Agent**: One agent handles all tasks
+- **Shell Access**: Execute system commands safely
+- **Log Analysis**: Specialized log analysis capabilities
+- **Interactive Chat**: Stream responses with tool usage visibility
+- **Conversation History**: Maintains context across interactions
+
+## Architecture
+
+```
+User Input → ReAct Agent → Tools (Shell + Log Analyzer) → Response
+```
+
+## Files
+
+- `main.py`: Main application with ReAct agent implementation
+- `log_analyzer.py`: Specialized tool for analyzing log files
+- `loghub/`: Symbolic link to log files directory
+
+## Tools Available
+
+1. **Shell Tool**: Execute system commands
+   - System monitoring (`top`, `ps`, `df`, etc.)
+   - File operations
+   - Network diagnostics
+
+2. **Log Analyzer Tool**: Analyze log files with different modes:
+   - `error_patterns`: Find and categorize error messages
+   - `frequency`: Analyze frequency of different log patterns
+   - `timeline`: Show chronological patterns of events
+   - `summary`: Provide an overall summary of the log file
+
+## Usage
+
+```bash
+cd simple-react-agent
+python main.py
+```
+
+### Example Interactions
+
+```
+User: Analyze the Apache logs for error patterns
+Agent: 🔧 Using tool: analyze_log_file
+       Args: {'file_path': 'Apache/Apache_2k.log', 'analysis_type': 'error_patterns'}
+       📋 Tool result: Found 15 error patterns in Apache logs...
+
+User: Check disk usage on the system
+Agent: 🔧 Using tool: shell
+       Args: {'command': 'df -h'}
+       📋 Tool result: Filesystem usage information...
+```
+
+## Pros and Cons
+
+### ✅ Pros
+- **Simple to understand**: Single agent, clear flow
+- **Easy to debug**: Linear execution path
+- **Quick setup**: Minimal configuration required
+- **Resource efficient**: Lower computational overhead
+- **Good for**: Simple tasks, learning, rapid prototyping
+
+### ❌ Cons
+- **Limited specialization**: One agent handles everything
+- **No parallel processing**: Sequential tool execution
+- **Scaling challenges**: Complex tasks may overwhelm single agent
+- **Less sophisticated**: No coordination between specialized experts
+
+## When to Use
+
+Choose the simple ReAct agent when:
+- You need a straightforward log analysis tool
+- Your use cases are relatively simple
+- You want to understand LangGraph basics
+- Resource usage is a concern
+- You prefer simplicity over sophistication
+
+## Requirements
+
+```bash
+pip install langchain-openai langgraph langchain-community
+export OPENAI_API_KEY="your-api-key"
+```
diff --git a/simple-react-agent/log_analyzer.py b/simple-react-agent/log_analyzer.py
new file mode 100644
index 0000000..ad7149d
--- /dev/null
+++ b/simple-react-agent/log_analyzer.py
@@ -0,0 +1,142 @@
+import os
+import re
+from collections import Counter
+from typing import List, Dict, Any
+from langchain_core.tools import tool
+
+
+@tool
+def analyze_log_file(file_path: str, analysis_type: str = "error_patterns") -> Dict[str, Any]:
+    """
+    Analyze log files for common sysadmin debugging patterns.
+    
+    Args:
+        file_path: Path to the log file (relative to loghub directory)
+        analysis_type: Type of analysis - "error_patterns", "frequency", "timeline", or "summary"
+    
+    Returns:
+        Dictionary with analysis results
+    """
+    try:
+        # Construct full path
+        if not file_path.startswith('/'):
+            full_path = f"loghub/{file_path}"
+        else:
+            full_path = file_path
+            
+        if not os.path.exists(full_path):
+            return {"error": f"File not found: {full_path}"}
+        
+        with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+            lines = f.readlines()
+        
+        if analysis_type == "error_patterns":
+            return _analyze_error_patterns(lines, file_path)
+        elif analysis_type == "frequency":
+            return _analyze_frequency(lines, file_path)
+        elif analysis_type == "timeline":
+            return _analyze_timeline(lines, file_path)
+        elif analysis_type == "summary":
+            return _analyze_summary(lines, file_path)
+        else:
+            return {"error": f"Unknown analysis type: {analysis_type}"}
+            
+    except Exception as e:
+        return {"error": f"Error analyzing file: {str(e)}"}
+
+
+def _analyze_error_patterns(lines: List[str], file_path: str) -> Dict[str, Any]:
+    """Analyze error patterns in log lines."""
+    error_keywords = ['error', 'fail', 'exception', 'critical', 'fatal', 'denied', 'refused', 'timeout']
+    
+    error_lines = []
+    error_counts = Counter()
+    
+    for i, line in enumerate(lines, 1):
+        line_lower = line.lower()
+        for keyword in error_keywords:
+            if keyword in line_lower:
+                error_lines.append(f"Line {i}: {line.strip()}")
+                error_counts[keyword] += 1
+                break
+    
+    return {
+        "file": file_path,
+        "analysis_type": "error_patterns",
+        "total_lines": len(lines),
+        "error_lines_count": len(error_lines),
+        "error_keywords_frequency": dict(error_counts.most_common()),
+        "sample_errors": error_lines[:10],  # First 10 error lines
+        "summary": f"Found {len(error_lines)} error-related lines out of {len(lines)} total lines"
+    }
+
+
+def _analyze_frequency(lines: List[str], file_path: str) -> Dict[str, Any]:
+    """Analyze frequency patterns in logs."""
+    # Extract common patterns (simplified)
+    patterns = Counter()
+    
+    for line in lines:
+        # Remove timestamps and specific values for pattern matching
+        cleaned = re.sub(r'\d+', 'NUM', line)
+        cleaned = re.sub(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', 'IP', cleaned)
+        cleaned = re.sub(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', 'UUID', cleaned)
+        patterns[cleaned.strip()] += 1
+    
+    return {
+        "file": file_path,
+        "analysis_type": "frequency",
+        "total_lines": len(lines),
+        "unique_patterns": len(patterns),
+        "most_common_patterns": [{"pattern": p, "count": c} for p, c in patterns.most_common(10)],
+        "summary": f"Found {len(patterns)} unique patterns in {len(lines)} lines"
+    }
+
+
+def _analyze_timeline(lines: List[str], file_path: str) -> Dict[str, Any]:
+    """Analyze timeline patterns in logs."""
+    timestamps = []
+    
+    # Try to extract timestamps (simplified for demo)
+    timestamp_patterns = [
+        r'(\w{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2})',  # Jun 14 15:16:01
+        r'(\[\w{3}\s+\w{3}\s+\d{2}\s+\d{2}:\d{2}:\d{2}\s+\d{4}\])',  # [Sun Dec 04 04:47:44 2005]
+    ]
+    
+    for line in lines[:100]:  # Sample first 100 lines for demo
+        for pattern in timestamp_patterns:
+            match = re.search(pattern, line)
+            if match:
+                timestamps.append(match.group(1))
+                break
+    
+    return {
+        "file": file_path,
+        "analysis_type": "timeline",
+        "total_lines": len(lines),
+        "timestamps_found": len(timestamps),
+        "sample_timestamps": timestamps[:10],
+        "summary": f"Extracted {len(timestamps)} timestamps from first 100 lines"
+    }
+
+
+def _analyze_summary(lines: List[str], file_path: str) -> Dict[str, Any]:
+    """Provide a general summary of the log file."""
+    total_lines = len(lines)
+    
+    # Basic statistics
+    avg_line_length = sum(len(line) for line in lines) / total_lines if total_lines > 0 else 0
+    empty_lines = sum(1 for line in lines if not line.strip())
+    
+    # Sample content
+    sample_lines = [line.strip() for line in lines[:5] if line.strip()]
+    
+    return {
+        "file": file_path,
+        "analysis_type": "summary",
+        "total_lines": total_lines,
+        "empty_lines": empty_lines,
+        "average_line_length": round(avg_line_length, 2),
+        "sample_content": sample_lines,
+        "summary": f"Log file with {total_lines} lines, average length {avg_line_length:.1f} characters"
+    }
diff --git a/simple-react-agent/loghub b/simple-react-agent/loghub
new file mode 120000
index 0000000..91e1893
--- /dev/null
+++ b/simple-react-agent/loghub
@@ -0,0 +1 @@
+../loghub
\ No newline at end of file
diff --git a/simple-react-agent/main.py b/simple-react-agent/main.py
new file mode 100644
index 0000000..b4ee740
--- /dev/null
+++ b/simple-react-agent/main.py
@@ -0,0 +1,213 @@
+import os
+from langchain.chat_models import init_chat_model
+from langchain_community.tools.shell.tool import ShellTool
+from langgraph.prebuilt import create_react_agent
+from langchain_core.messages import HumanMessage
+from log_analyzer import analyze_log_file
+
+
+def create_agent():
+    """Create and return a ReAct agent with shell and log analysis capabilities."""
+    
+    # Initialize the chat model (using OpenAI GPT-4)
+    # Make sure you have set your OPENAI_API_KEY environment variable
+    llm = init_chat_model("openai:gpt-4o-mini")
+    
+    # Define the tools available to the agent
+    shell_tool = ShellTool()
+    tools = [shell_tool, analyze_log_file]
+    
+    # Create a ReAct agent with system prompt
+    system_prompt = """You are a helpful assistant with access to shell commands and log analysis capabilities.
+
+You can:
+1. Execute shell commands using the shell tool to interact with the system
+2. Analyze log files using the analyze_log_file tool to help with debugging and system administration tasks
+
+The log analyzer can process files in the loghub directory with different analysis types:
+- "error_patterns": Find and categorize error messages
+- "frequency": Analyze frequency of different log patterns
+- "timeline": Show chronological patterns of events
+- "summary": Provide an overall summary of the log file
+
+When helping users:
+- Be thorough in your analysis
+- Explain what you're doing and why
+- Use appropriate tools based on the user's request
+- If analyzing logs, suggest which analysis type might be most helpful
+- Always be cautious with shell commands and explain what they do
+
+Available log files are in the loghub directory with subdirectories for different systems like:
+Android, Apache, BGL, Hadoop, HDFS, HealthApp, HPC, Linux, Mac, OpenSSH, OpenStack, Proxifier, Spark, Thunderbird, Windows, Zookeeper
+"""
+    
+    # Create the ReAct agent
+    agent = create_react_agent(
+        llm, 
+        tools, 
+        prompt=system_prompt
+    )
+    
+    return agent
+
+
+def stream_agent_updates(agent, user_input: str, conversation_history: list):
+    """Stream agent updates for a user input with conversation history."""
+    # Create a human message
+    message = HumanMessage(content=user_input)
+    
+    # Add the new message to conversation history
+    conversation_history.append(message)
+    
+    print("\nAgent: ", end="", flush=True)
+    
+    # Use the agent's stream method to get real-time updates with full conversation
+    final_response = ""
+    tool_calls_made = False
+    
+    for event in agent.stream({"messages": conversation_history}, stream_mode="updates"):
+        for node_name, node_output in event.items():
+            if node_name == "agent" and "messages" in node_output:
+                last_message = node_output["messages"][-1]
+                
+                # Check if this is a tool call
+                if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
+                    tool_calls_made = True
+                    for tool_call in last_message.tool_calls:
+                        print(f"\n🔧 Using tool: {tool_call['name']}")
+                        if tool_call.get('args'):
+                            print(f"   Args: {tool_call['args']}")
+                
+                # Check if this is the final response (no tool calls)
+                elif hasattr(last_message, 'content') and last_message.content and not getattr(last_message, 'tool_calls', None):
+                    final_response = last_message.content
+            
+            elif node_name == "tools" and "messages" in node_output:
+                # Show tool results
+                for msg in node_output["messages"]:
+                    if hasattr(msg, 'content'):
+                        print(f"\n📋 Tool result: {msg.content[:200]}{'...' if len(msg.content) > 200 else ''}")
+    
+    # Print the final response
+    if final_response:
+        if tool_calls_made:
+            print(f"\n\n{final_response}")
+        else:
+            print(final_response)
+        # Add the agent's response to conversation history
+        from langchain_core.messages import AIMessage
+        conversation_history.append(AIMessage(content=final_response))
+    else:
+        print("No response generated.")
+    
+    print()  # Add newline
+
+
+def visualize_agent(agent):
+    """Display the agent's graph structure."""
+    try:
+        print("\n📊 Agent Graph Structure:")
+        print("=" * 40)
+        # Get the graph and display its structure
+        graph = agent.get_graph()
+        
+        # Print nodes
+        print("Nodes:")
+        for node_id in graph.nodes:
+            print(f"  - {node_id}")
+        
+        # Print edges
+        print("\nEdges:")
+        for edge in graph.edges:
+            print(f"  - {edge}")
+        
+        print("=" * 40)
+        print("This agent follows the ReAct (Reasoning and Acting) pattern:")
+        print("1. Receives user input")
+        print("2. Reasons about what tools to use")
+        print("3. Executes tools when needed")
+        print("4. Provides final response")
+        print("=" * 40)
+        
+    except Exception as e:
+        print(f"Could not visualize agent: {e}")
+
+
+def main():
+    # Check if required API keys are set
+    if not os.getenv("OPENAI_API_KEY"):
+        print("Please set your OPENAI_API_KEY environment variable.")
+        print("You can set it by running: export OPENAI_API_KEY='your-api-key-here'")
+        return
+    
+    print("🤖 LangGraph Log Analysis Agent")
+    print("Type 'quit', 'exit', or 'q' to exit the chat.")
+    print("Type 'help' or 'h' for help and examples.")
+    print("Type 'graph' to see the agent structure.")
+    print("Type 'clear' or 'reset' to clear conversation history.")
+    print("⚠️  WARNING: This agent has shell access - use with caution!")
+    print("📊 Available log analysis capabilities:")
+    print("   - Analyze log files in the loghub directory")
+    print("   - Execute shell commands for system administration")
+    print("   - Help with debugging and troubleshooting")
+    print("-" * 60)
+    
+    # Create the agent
+    try:
+        agent = create_agent()
+        print("✅ Log Analysis Agent initialized successfully!")
+        print("💡 Try asking: 'Analyze the Apache logs for error patterns'")
+        print("💡 Or: 'List the available log files in the loghub directory'")
+        
+        # Show agent structure
+        visualize_agent(agent)
+        
+    except Exception as e:
+        print(f"❌ Error initializing agent: {e}")
+        return
+    
+    # Start the chat loop
+    conversation_history = []  # Initialize conversation history
+    
+    while True:
+        try:
+            user_input = input("\nUser: ")
+            if user_input.lower() in ["quit", "exit", "q"]:
+                print("👋 Goodbye!")
+                break
+            elif user_input.lower() in ["help", "h"]:
+                print("\n🆘 Help:")
+                print("Commands:")
+                print("  - quit/exit/q: Exit the agent")
+                print("  - help/h: Show this help")
+                print("  - graph: Show agent structure")
+                print("\nExample queries:")
+                print("  - 'Analyze the Apache logs for error patterns'")
+                print("  - 'Show me a summary of the HDFS logs'")
+                print("  - 'List all available log files'")
+                print("  - 'Find error patterns in Linux logs'")
+                print("  - 'Check disk usage on the system'")
+                print("  - 'clear': Clear conversation history")
+                continue
+            elif user_input.lower() in ["graph", "structure"]:
+                visualize_agent(agent)
+                continue
+            elif user_input.lower() in ["clear", "reset"]:
+                conversation_history = []
+                print("🗑️ Conversation history cleared!")
+                continue
+            
+            if user_input.strip():
+                stream_agent_updates(agent, user_input, conversation_history)
+            else:
+                print("Please enter a message.")
+                
+        except KeyboardInterrupt:
+            print("\n👋 Goodbye!")
+            break
+        except Exception as e:
+            print(f"❌ Error: {e}")
+
+
+if __name__ == "__main__":
+    main()