Gaetan Hurel 7e340a6649
use 4.1
2025-06-30 16:55:41 +02:00

217 lines
9.6 KiB
Python

import os
import warnings
import readline # Enable arrow key support and command history in input()
from langchain.chat_models import init_chat_model
from langchain_community.tools.shell.tool import ShellTool
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import HumanMessage
from custom_tools import configured_remote_server
# Suppress the shell tool warning since we're using it intentionally for sysadmin tasks
warnings.filterwarnings("ignore", message="The shell tool has no safeguards by default. Use at your own risk.")
warnings.filterwarnings("ignore", message="The SSH tool has no safeguards by default. Use at your own risk.")
def create_agent():
"""Create and return a ReAct agent specialized for system administration and debugging."""
# Initialize the chat model (using OpenAI GPT-4)
# Make sure you have set your OPENAI_API_KEY environment variable
llm = init_chat_model("openai:gpt-4.1")
# Define the tools available to the agent
shell_tool = ShellTool()
tools = [shell_tool, configured_remote_server]
# Create a ReAct agent with system administration debugging focus
system_prompt = """You are an expert system administrator debugging agent with deep knowledge of Linux, macOS, BSD, and Windows systems.
## PRIMARY MISSION
Help sysadmins diagnose, troubleshoot, and resolve system issues efficiently. You have access to both local shell commands and remote SSH access to execute diagnostic procedures on multiple systems.
## CORE CAPABILITIES
1. **Local System Analysis**: Execute shell commands on the local machine (terminal tool)
2. **Remote System Analysis**: Execute commands on remote servers via SSH (configured_remote_server)
3. **OS Detection**: Automatically detect the operating system and adapt commands accordingly
4. **Issue Diagnosis**: Analyze symptoms and systematically investigate root causes
5. **Problem Resolution**: Provide solutions and execute fixes when safe to do so
## AVAILABLE TOOLS
- **terminal**: Execute commands on the local machine
- **configured_remote_server**: Execute commands on the pre-configured remote server
## OPERATING SYSTEM AWARENESS
- **First interaction**: Always detect the OS using appropriate commands (uname, systeminfo, etc.)
- **Command adaptation**: Use OS-specific commands and syntax
- **Cross-platform knowledge**: Understand differences between Linux/Unix, macOS, BSD, and Windows
- **Session memory**: Remember the detected OS throughout the conversation
## SAFETY PROTOCOLS
1. **Read-only first**: Always start with non-destructive diagnostic commands
2. **Explain before executing**: Describe what each command does and why it's needed
3. **Confirmation for risky commands**: Ask for explicit permission before running potentially harmful commands
4. **Dangerous command examples**: rm, mkfs, dd, shutdown, reboot, chmod 777, deleting system files
5. **Safe command examples**: ps, top, df, free, netstat, ss, journalctl, tail, cat, ls
## DIAGNOSTIC WORKFLOWS
### Performance Issues
1. Check system resources (CPU, memory, disk, network)
2. Identify resource-hungry processes
3. Analyze system load and bottlenecks
4. Check for hardware issues
### Service Issues
1. Check service status and logs
2. Verify dependencies and prerequisites
3. Test connectivity and permissions
4. Analyze configuration files
### Network Issues
1. Test basic connectivity (ping, traceroute)
2. Check DNS resolution
3. Verify network interfaces and routing
4. Analyze firewall rules and port accessibility
### System Stability
1. Check system logs for errors
2. Verify disk space and filesystem health
3. Monitor system temperature and hardware status
4. Check for memory leaks or corruption
## RESPONSE METHODOLOGY
1. **Listen carefully**: Understand the user's problem description
2. **Probe systematically**: Ask clarifying questions if needed
3. **Detect environment**: Identify OS and system characteristics
4. **Execute diagnostics**: Run appropriate commands with explanations
5. **Interpret results**: Analyze command outputs and explain findings
6. **Provide solutions**: Suggest fixes and implement them safely
7. **Follow up**: Verify fixes worked and suggest preventive measures
## COMMUNICATION STYLE
- Be professional but friendly
- Explain technical concepts clearly
- Always explain what commands do before running them
- Provide context for why specific diagnostics are needed
- Offer multiple solutions when possible
- Be patient with follow-up questions
## COMMAND EXECUTION GUIDELINES
- Use appropriate flags to avoid hanging (e.g., 'top -n 1', 'ps aux')
- Pipe long outputs through 'head' or 'tail' when appropriate
- Use 'timeout' command for potentially long-running diagnostics
- Always explain the output interpretation
- Suggest next steps based on findings"""
# Create the ReAct agent
agent = create_react_agent(
llm,
tools,
prompt=system_prompt
)
return agent
def run_agent_query(agent, user_input: str, conversation_history: list):
"""Run a simple agent query and display results cleanly."""
# Create a human message
message = HumanMessage(content=user_input)
# Add the new message to conversation history
conversation_history.append(message)
# Use the agent's stream method for clean output like the LangChain tutorial
for step in agent.stream({"messages": conversation_history}, stream_mode="values"):
step["messages"][-1].pretty_print()
# Add the agent's response to conversation history
if step and "messages" in step:
conversation_history.append(step["messages"][-1])
def main():
# Check if required API keys are set
if not os.getenv("OPENAI_API_KEY"):
print("Please set your OPENAI_API_KEY environment variable.")
print("You can set it by running: export OPENAI_API_KEY='your-api-key-here'")
return
print("🔧 SysAdmin Debugging Agent - Powered by LangGraph")
print("Type 'quit', 'exit', or 'q' to exit the chat.")
print("Type 'help' or 'h' for help and examples.")
print("Type 'clear' or 'reset' to clear conversation history.")
print("⚠️ WARNING: This agent has local shell and remote SSH access - use with caution!")
print("🛠️ System Administration Capabilities:")
print(" - Local system diagnostics via shell commands")
print(" - Remote server management via SSH connections")
print(" - Diagnose performance issues (CPU, memory, disk, network)")
print(" - Troubleshoot service and daemon problems")
print(" - Analyze system logs and error messages")
print(" - Network connectivity diagnostics")
print(" - Cross-platform support (Linux, macOS, BSD, Windows)")
print("-" * 70)
# Create the agent
try:
agent = create_agent()
print("✅ SysAdmin Debugging Agent initialized successfully!")
print("💡 Try asking: 'My system is running slow, can you help?'")
print("💡 Or: 'Check if my web server is running properly'")
print("💡 Or: 'Connect to my remote server and check disk space'")
print("💡 Or: 'Analyze recent system errors'")
except Exception as e:
print(f"❌ Error initializing agent: {e}")
return
# Start the chat loop
conversation_history = [] # Initialize conversation history
while True:
try:
user_input = input("\nUser: ")
if user_input.lower() in ["quit", "exit", "q"]:
print("👋 Goodbye!")
break
elif user_input.lower() in ["help", "h"]:
print("\n🆘 Help - SysAdmin Debugging Agent:")
print("Commands:")
print(" - quit/exit/q: Exit the agent")
print(" - help/h: Show this help")
print(" - clear/reset: Clear conversation history")
print("\nSystem Debugging Examples:")
print(" - 'My server is running slow, help me diagnose the issue'")
print(" - 'Check why my Apache/Nginx service won't start'")
print(" - 'Connect to my remote server and check system load'")
print(" - 'Analyze high CPU usage on this system'")
print(" - 'Check disk space on my remote server via SSH'")
print(" - 'Troubleshoot network connectivity problems'")
print(" - 'Check disk space and filesystem health'")
print(" - 'Review recent system errors in logs'")
print("\nSafety Notes:")
print(" - Agent will ask permission before running potentially harmful commands")
print(" - All commands are explained before execution")
print(" - Diagnostic commands are prioritized over destructive ones")
continue
elif user_input.lower() in ["clear", "reset"]:
conversation_history = []
print("🗑️ Conversation history cleared!")
continue
if user_input.strip():
run_agent_query(agent, user_input, conversation_history)
else:
print("Please enter a message.")
except KeyboardInterrupt:
print("\n👋 Goodbye!")
break
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
main()