spoonos-testing-patterns
SKILL.md
Testing Patterns
Comprehensive testing strategies for SpoonOS agents.
Testing Pyramid
/\
/ \ E2E Tests (few)
/----\
/ \ Integration Tests (some)
/--------\
/ \ Unit Tests (many)
/------------\
Unit Testing Tools
Testing a Custom Tool
# tests/test_tools.py
import pytest
from unittest.mock import AsyncMock, patch
from my_agent.tools import PriceTool, WalletTool
@pytest.mark.asyncio
async def test_price_tool_success():
"""Test successful price fetch."""
tool = PriceTool()
# Mock the HTTP response
with patch('aiohttp.ClientSession.get') as mock_get:
mock_response = AsyncMock()
mock_response.status = 200
mock_response.json = AsyncMock(return_value={
"bitcoin": {"usd": 50000, "usd_24h_change": 2.5}
})
mock_get.return_value.__aenter__.return_value = mock_response
result = await tool.execute(coin_id="bitcoin", currency="usd")
assert "50,000" in result
assert "2.50%" in result
@pytest.mark.asyncio
async def test_price_tool_not_found():
"""Test handling of unknown coin."""
tool = PriceTool()
with patch('aiohttp.ClientSession.get') as mock_get:
mock_response = AsyncMock()
mock_response.status = 200
mock_response.json = AsyncMock(return_value={})
mock_get.return_value.__aenter__.return_value = mock_response
result = await tool.execute(coin_id="unknown_coin")
assert "not found" in result.lower()
@pytest.mark.asyncio
async def test_wallet_tool_validation():
"""Test address validation."""
tool = WalletTool()
# Invalid address
result = await tool.execute(address="invalid", chain="ethereum")
assert "Error" in result
# Valid address format
result = await tool.execute(
address="0x742d35Cc6634C0532925a3b844Bc9e7595f0bEb7",
chain="ethereum"
)
# Should not have validation error
assert "Invalid" not in result or "validation" not in result.lower()
Testing Tool Parameters
def test_tool_parameters_schema():
"""Verify tool parameter schema."""
tool = PriceTool()
params = tool.parameters
assert params["type"] == "object"
assert "coin_id" in params["properties"]
assert "required" in params
assert "coin_id" in params["required"]
def test_tool_to_param():
"""Test OpenAI-compatible conversion."""
tool = PriceTool()
param = tool.to_param()
assert param["type"] == "function"
assert param["function"]["name"] == "get_price"
assert "description" in param["function"]
Mocking LLM Responses
Mock ChatBot
# tests/conftest.py
import pytest
from unittest.mock import AsyncMock, MagicMock
from spoon_ai.chat import ChatBot
@pytest.fixture
def mock_chatbot():
"""Create a mock ChatBot that returns predefined responses."""
chatbot = MagicMock(spec=ChatBot)
async def mock_chat(messages, tools=None, **kwargs):
# Return a mock response based on input
last_message = messages[-1]["content"]
if "price" in last_message.lower():
return MagicMock(
content="I'll check the price for you.",
tool_calls=[{
"id": "call_123",
"function": {
"name": "get_price",
"arguments": '{"coin_id": "bitcoin"}'
}
}]
)
else:
return MagicMock(
content="I don't understand that request.",
tool_calls=None
)
chatbot.chat = AsyncMock(side_effect=mock_chat)
return chatbot
@pytest.fixture
def mock_tool_response():
"""Fixture for mocking tool execution."""
async def _mock_response(tool_name, expected_result):
tool = MagicMock()
tool.execute = AsyncMock(return_value=expected_result)
return tool
return _mock_response
Using Mock in Tests
# tests/test_agent.py
import pytest
from my_agent import TradingAgent
@pytest.mark.asyncio
async def test_agent_price_query(mock_chatbot):
"""Test agent handles price queries."""
agent = TradingAgent()
agent.llm = mock_chatbot
# Mock tool execution
agent.tools["get_price"].execute = AsyncMock(
return_value="BTC: $50,000 USD"
)
result = await agent.run("What's the Bitcoin price?")
# Verify tool was called
agent.tools["get_price"].execute.assert_called_once()
assert "50,000" in result
@pytest.mark.asyncio
async def test_agent_max_steps():
"""Test agent respects max_steps limit."""
agent = TradingAgent()
agent.max_steps = 2
# Mock LLM to always request tool calls
agent.llm.chat = AsyncMock(return_value=MagicMock(
content="",
tool_calls=[{"id": "1", "function": {"name": "get_price", "arguments": "{}"}}]
))
result = await agent.run("Complex query requiring many steps")
assert agent.current_step <= agent.max_steps
Integration Testing
Testing Agent with Real Tools
# tests/integration/test_agent_integration.py
import pytest
import os
# Skip if no API keys
pytestmark = pytest.mark.skipif(
not os.getenv("OPENAI_API_KEY"),
reason="Requires OPENAI_API_KEY"
)
@pytest.mark.asyncio
async def test_agent_full_flow():
"""Integration test with real LLM."""
from my_agent import ResearchAgent
agent = ResearchAgent()
await agent.initialize()
result = await agent.run("What is 2 + 2?")
assert "4" in result
@pytest.mark.asyncio
async def test_agent_tool_execution():
"""Test agent executes tools correctly."""
from my_agent import TradingAgent
from my_agent.tools import CalculatorTool
agent = TradingAgent()
agent.tools.add_tool(CalculatorTool())
result = await agent.run("Calculate 100 * 5")
assert "500" in result
Testing with Fixtures
# tests/integration/conftest.py
import pytest
from spoon_ai.agents import SpoonReactMCP
from spoon_ai.chat import ChatBot
from spoon_ai.tools import ToolManager
@pytest.fixture(scope="session")
def real_agent():
"""Create a real agent for integration tests."""
return SpoonReactMCP(
name="test_agent",
llm=ChatBot(model_name="gpt-4o-mini"), # Use cheaper model
tools=ToolManager([]),
max_steps=5
)
@pytest.fixture
def isolated_agent(real_agent):
"""Get agent with fresh state."""
real_agent.reset()
return real_agent
Testing MCP Tools
Mock MCP Server
# tests/test_mcp_tools.py
import pytest
from unittest.mock import AsyncMock, patch
from spoon_ai.tools.mcp_tool import MCPTool
@pytest.fixture
def mock_mcp_client():
"""Mock MCP client for testing."""
with patch('spoon_ai.tools.mcp_tool.MCPClient') as mock:
client_instance = AsyncMock()
client_instance.list_tools = AsyncMock(return_value=[
{"name": "search", "description": "Search the web"}
])
client_instance.call_tool = AsyncMock(return_value={
"content": [{"type": "text", "text": "Search results..."}]
})
mock.return_value = client_instance
yield client_instance
@pytest.mark.asyncio
async def test_mcp_tool_execution(mock_mcp_client):
"""Test MCP tool execution."""
tool = MCPTool(
name="test-mcp",
mcp_config={"command": "npx", "args": ["-y", "test-mcp"]}
)
result = await tool.execute(query="test query")
mock_mcp_client.call_tool.assert_called_once()
assert "Search results" in result
Testing StateGraph Workflows
Graph Unit Tests
# tests/test_graph.py
import pytest
from spoon_ai.graph import StateGraph, END
from typing import TypedDict
class TestState(TypedDict):
value: int
processed: bool
def increment(state: TestState) -> dict:
return {"value": state["value"] + 1}
def double(state: TestState) -> dict:
return {"value": state["value"] * 2}
def mark_processed(state: TestState) -> dict:
return {"processed": True}
@pytest.fixture
def simple_graph():
"""Create a simple test graph."""
graph = StateGraph(TestState)
graph.add_node("increment", increment)
graph.add_node("double", double)
graph.add_node("mark", mark_processed)
graph.set_entry_point("increment")
graph.add_edge("increment", "double")
graph.add_edge("double", "mark")
graph.add_edge("mark", END)
return graph.compile()
@pytest.mark.asyncio
async def test_graph_execution(simple_graph):
"""Test graph executes all nodes."""
result = await simple_graph.invoke({
"value": 5,
"processed": False
})
# 5 + 1 = 6, 6 * 2 = 12
assert result["value"] == 12
assert result["processed"] == True
@pytest.mark.asyncio
async def test_graph_streaming(simple_graph):
"""Test graph streaming output."""
outputs = []
async for event in simple_graph.astream({
"value": 1,
"processed": False
}):
outputs.append(event)
assert len(outputs) == 3 # One per node
Testing Conditional Edges
def should_continue(state: TestState) -> str:
if state["value"] > 100:
return "end"
return "continue"
@pytest.fixture
def conditional_graph():
"""Graph with conditional routing."""
graph = StateGraph(TestState)
graph.add_node("double", double)
graph.add_node("finish", mark_processed)
graph.set_entry_point("double")
graph.add_conditional_edges(
"double",
should_continue,
{"continue": "double", "end": "finish"}
)
graph.add_edge("finish", END)
return graph.compile()
@pytest.mark.asyncio
async def test_conditional_loop(conditional_graph):
"""Test conditional edge routing."""
result = await conditional_graph.invoke({
"value": 10,
"processed": False
})
# 10 -> 20 -> 40 -> 80 -> 160 (> 100, stop)
assert result["value"] == 160
assert result["processed"] == True
Debugging Patterns
Debug Logging
# debug_utils.py
import logging
import json
from functools import wraps
logger = logging.getLogger(__name__)
def log_tool_call(func):
"""Decorator to log tool calls."""
@wraps(func)
async def wrapper(*args, **kwargs):
logger.debug(f"Tool call: {func.__name__}")
logger.debug(f"Args: {args}, Kwargs: {kwargs}")
try:
result = await func(*args, **kwargs)
logger.debug(f"Result: {result[:200]}...")
return result
except Exception as e:
logger.error(f"Tool error: {e}")
raise
return wrapper
def log_agent_step(agent, step_num, action, result):
"""Log agent execution step."""
logger.info(json.dumps({
"agent": agent.name,
"step": step_num,
"action": action,
"result_preview": str(result)[:100]
}))
Test Debugging
@pytest.mark.asyncio
async def test_with_debug_output(caplog):
"""Test with captured logs."""
import logging
caplog.set_level(logging.DEBUG)
agent = TradingAgent()
result = await agent.run("Test query")
# Check logs for debugging
assert "Tool call" in caplog.text
# Print all logs if test fails
for record in caplog.records:
print(f"{record.levelname}: {record.message}")
Snapshot Testing
# tests/test_snapshots.py
import pytest
import json
@pytest.fixture
def snapshot_dir(tmp_path):
return tmp_path / "snapshots"
def test_tool_output_snapshot(snapshot_dir):
"""Compare tool output against snapshot."""
tool = PriceTool()
# Mock consistent response
result = {
"symbol": "BTC",
"price": 50000,
"change_24h": 2.5
}
snapshot_file = snapshot_dir / "price_output.json"
if snapshot_file.exists():
expected = json.loads(snapshot_file.read_text())
assert result == expected
else:
snapshot_dir.mkdir(exist_ok=True)
snapshot_file.write_text(json.dumps(result, indent=2))
pytest.skip("Snapshot created")
Test Configuration
pytest.ini
[pytest]
asyncio_mode = auto
testpaths = tests
python_files = test_*.py
python_functions = test_*
addopts = -v --tb=short
markers =
slow: marks tests as slow
integration: marks integration tests
Running Tests
# Run all tests
pytest
# Run with coverage
pytest --cov=my_agent --cov-report=html
# Run only unit tests
pytest tests/unit/
# Run integration tests
pytest tests/integration/ -m integration
# Run specific test
pytest tests/test_tools.py::test_price_tool_success -v
# Debug mode
pytest --pdb --pdbcls=IPython.terminal.debugger:TerminalPdb
CI/CD Integration
GitHub Actions
# .github/workflows/test.yml
name: Tests
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install pytest pytest-asyncio pytest-cov
- name: Run unit tests
run: pytest tests/unit/ --cov=my_agent
- name: Run integration tests
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: pytest tests/integration/ -m integration
Best Practices
- Isolate Tests - Each test should be independent
- Mock External Services - Don't rely on APIs in unit tests
- Use Fixtures - Share setup code efficiently
- Test Edge Cases - Errors, timeouts, empty responses
- Keep Tests Fast - Unit tests should run in milliseconds
- Clear Assertions - One logical assertion per test
Weekly Installs
1
Repository
xspoonai/spoon-…me-skillGitHub Stars
12
First Seen
5 days ago
Security Audits
Installed on
amp1
cline1
openclaw1
opencode1
cursor1
kimi-cli1