217 lines
9.6 KiB
Python
217 lines
9.6 KiB
Python
import os
|
|
import warnings
|
|
import readline # Enable arrow key support and command history in input()
|
|
from langchain.chat_models import init_chat_model
|
|
from langchain_community.tools.shell.tool import ShellTool
|
|
from langgraph.prebuilt import create_react_agent
|
|
from langchain_core.messages import HumanMessage
|
|
from custom_tools import configured_remote_server
|
|
|
|
# Suppress the shell tool warning since we're using it intentionally for sysadmin tasks
|
|
warnings.filterwarnings("ignore", message="The shell tool has no safeguards by default. Use at your own risk.")
|
|
warnings.filterwarnings("ignore", message="The SSH tool has no safeguards by default. Use at your own risk.")
|
|
|
|
|
|
def create_agent():
|
|
"""Create and return a ReAct agent specialized for system administration and debugging."""
|
|
|
|
# Initialize the chat model (using OpenAI GPT-4)
|
|
# Make sure you have set your OPENAI_API_KEY environment variable
|
|
llm = init_chat_model("openai:gpt-4.1")
|
|
|
|
# Define the tools available to the agent
|
|
shell_tool = ShellTool()
|
|
tools = [shell_tool, configured_remote_server]
|
|
|
|
|
|
# Create a ReAct agent with system administration debugging focus
|
|
system_prompt = """You are an expert system administrator debugging agent with deep knowledge of Linux, macOS, BSD, and Windows systems.
|
|
|
|
## PRIMARY MISSION
|
|
Help sysadmins diagnose, troubleshoot, and resolve system issues efficiently. You have access to both local shell commands and remote SSH access to execute diagnostic procedures on multiple systems.
|
|
|
|
## CORE CAPABILITIES
|
|
1. **Local System Analysis**: Execute shell commands on the local machine (terminal tool)
|
|
2. **Remote System Analysis**: Execute commands on remote servers via SSH (configured_remote_server)
|
|
3. **OS Detection**: Automatically detect the operating system and adapt commands accordingly
|
|
4. **Issue Diagnosis**: Analyze symptoms and systematically investigate root causes
|
|
5. **Problem Resolution**: Provide solutions and execute fixes when safe to do so
|
|
|
|
## AVAILABLE TOOLS
|
|
- **terminal**: Execute commands on the local machine
|
|
- **configured_remote_server**: Execute commands on the pre-configured remote server
|
|
|
|
## OPERATING SYSTEM AWARENESS
|
|
- **First interaction**: Always detect the OS using appropriate commands (uname, systeminfo, etc.)
|
|
- **Command adaptation**: Use OS-specific commands and syntax
|
|
- **Cross-platform knowledge**: Understand differences between Linux/Unix, macOS, BSD, and Windows
|
|
- **Session memory**: Remember the detected OS throughout the conversation
|
|
|
|
## SAFETY PROTOCOLS
|
|
1. **Read-only first**: Always start with non-destructive diagnostic commands
|
|
2. **Explain before executing**: Describe what each command does and why it's needed
|
|
3. **Confirmation for risky commands**: Ask for explicit permission before running potentially harmful commands
|
|
4. **Dangerous command examples**: rm, mkfs, dd, shutdown, reboot, chmod 777, deleting system files
|
|
5. **Safe command examples**: ps, top, df, free, netstat, ss, journalctl, tail, cat, ls
|
|
|
|
## DIAGNOSTIC WORKFLOWS
|
|
### Performance Issues
|
|
1. Check system resources (CPU, memory, disk, network)
|
|
2. Identify resource-hungry processes
|
|
3. Analyze system load and bottlenecks
|
|
4. Check for hardware issues
|
|
|
|
### Service Issues
|
|
1. Check service status and logs
|
|
2. Verify dependencies and prerequisites
|
|
3. Test connectivity and permissions
|
|
4. Analyze configuration files
|
|
|
|
### Network Issues
|
|
1. Test basic connectivity (ping, traceroute)
|
|
2. Check DNS resolution
|
|
3. Verify network interfaces and routing
|
|
4. Analyze firewall rules and port accessibility
|
|
|
|
### System Stability
|
|
1. Check system logs for errors
|
|
2. Verify disk space and filesystem health
|
|
3. Monitor system temperature and hardware status
|
|
4. Check for memory leaks or corruption
|
|
|
|
## RESPONSE METHODOLOGY
|
|
1. **Listen carefully**: Understand the user's problem description
|
|
2. **Probe systematically**: Ask clarifying questions if needed
|
|
3. **Detect environment**: Identify OS and system characteristics
|
|
4. **Execute diagnostics**: Run appropriate commands with explanations
|
|
5. **Interpret results**: Analyze command outputs and explain findings
|
|
6. **Provide solutions**: Suggest fixes and implement them safely
|
|
7. **Follow up**: Verify fixes worked and suggest preventive measures
|
|
|
|
## COMMUNICATION STYLE
|
|
- Be professional but friendly
|
|
- Explain technical concepts clearly
|
|
- Always explain what commands do before running them
|
|
- Provide context for why specific diagnostics are needed
|
|
- Offer multiple solutions when possible
|
|
- Be patient with follow-up questions
|
|
|
|
## COMMAND EXECUTION GUIDELINES
|
|
- Use appropriate flags to avoid hanging (e.g., 'top -n 1', 'ps aux')
|
|
- Pipe long outputs through 'head' or 'tail' when appropriate
|
|
- Use 'timeout' command for potentially long-running diagnostics
|
|
- Always explain the output interpretation
|
|
- Suggest next steps based on findings"""
|
|
|
|
|
|
# Create the ReAct agent
|
|
agent = create_react_agent(
|
|
llm,
|
|
tools,
|
|
prompt=system_prompt
|
|
)
|
|
|
|
return agent
|
|
|
|
|
|
def run_agent_query(agent, user_input: str, conversation_history: list):
|
|
"""Run a simple agent query and display results cleanly."""
|
|
# Create a human message
|
|
message = HumanMessage(content=user_input)
|
|
|
|
# Add the new message to conversation history
|
|
conversation_history.append(message)
|
|
|
|
# Use the agent's stream method for clean output like the LangChain tutorial
|
|
for step in agent.stream({"messages": conversation_history}, stream_mode="values"):
|
|
step["messages"][-1].pretty_print()
|
|
|
|
# Add the agent's response to conversation history
|
|
if step and "messages" in step:
|
|
conversation_history.append(step["messages"][-1])
|
|
|
|
|
|
|
|
def main():
|
|
# Check if required API keys are set
|
|
if not os.getenv("OPENAI_API_KEY"):
|
|
print("Please set your OPENAI_API_KEY environment variable.")
|
|
print("You can set it by running: export OPENAI_API_KEY='your-api-key-here'")
|
|
return
|
|
|
|
print("🔧 SysAdmin Debugging Agent - Powered by LangGraph")
|
|
print("Type 'quit', 'exit', or 'q' to exit the chat.")
|
|
print("Type 'help' or 'h' for help and examples.")
|
|
print("Type 'clear' or 'reset' to clear conversation history.")
|
|
print("⚠️ WARNING: This agent has local shell and remote SSH access - use with caution!")
|
|
print("🛠️ System Administration Capabilities:")
|
|
print(" - Local system diagnostics via shell commands")
|
|
print(" - Remote server management via SSH connections")
|
|
print(" - Diagnose performance issues (CPU, memory, disk, network)")
|
|
print(" - Troubleshoot service and daemon problems")
|
|
print(" - Analyze system logs and error messages")
|
|
print(" - Network connectivity diagnostics")
|
|
print(" - Cross-platform support (Linux, macOS, BSD, Windows)")
|
|
print("-" * 70)
|
|
|
|
# Create the agent
|
|
try:
|
|
agent = create_agent()
|
|
print("✅ SysAdmin Debugging Agent initialized successfully!")
|
|
print("💡 Try asking: 'My system is running slow, can you help?'")
|
|
print("💡 Or: 'Check if my web server is running properly'")
|
|
print("💡 Or: 'Connect to my remote server and check disk space'")
|
|
print("💡 Or: 'Analyze recent system errors'")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Error initializing agent: {e}")
|
|
return
|
|
|
|
# Start the chat loop
|
|
conversation_history = [] # Initialize conversation history
|
|
|
|
while True:
|
|
try:
|
|
user_input = input("\nUser: ")
|
|
if user_input.lower() in ["quit", "exit", "q"]:
|
|
print("👋 Goodbye!")
|
|
break
|
|
elif user_input.lower() in ["help", "h"]:
|
|
print("\n🆘 Help - SysAdmin Debugging Agent:")
|
|
print("Commands:")
|
|
print(" - quit/exit/q: Exit the agent")
|
|
print(" - help/h: Show this help")
|
|
print(" - clear/reset: Clear conversation history")
|
|
print("\nSystem Debugging Examples:")
|
|
print(" - 'My server is running slow, help me diagnose the issue'")
|
|
print(" - 'Check why my Apache/Nginx service won't start'")
|
|
print(" - 'Connect to my remote server and check system load'")
|
|
print(" - 'Analyze high CPU usage on this system'")
|
|
print(" - 'Check disk space on my remote server via SSH'")
|
|
print(" - 'Troubleshoot network connectivity problems'")
|
|
print(" - 'Check disk space and filesystem health'")
|
|
print(" - 'Review recent system errors in logs'")
|
|
print("\nSafety Notes:")
|
|
print(" - Agent will ask permission before running potentially harmful commands")
|
|
print(" - All commands are explained before execution")
|
|
print(" - Diagnostic commands are prioritized over destructive ones")
|
|
continue
|
|
elif user_input.lower() in ["clear", "reset"]:
|
|
conversation_history = []
|
|
print("🗑️ Conversation history cleared!")
|
|
continue
|
|
|
|
if user_input.strip():
|
|
run_agent_query(agent, user_input, conversation_history)
|
|
else:
|
|
print("Please enter a message.")
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n👋 Goodbye!")
|
|
break
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |