Testing¶
Intent Kit provides comprehensive testing tools to ensure your DAGs work correctly and reliably.
Running Tests¶
# Run all tests
uv run pytest
# Run with coverage
uv run pytest --cov=intent_kit
# Run specific test file
uv run pytest tests/test_dag.py
# Run with verbose output
uv run pytest -v
Test Categories¶
Unit Tests¶
- Node functionality (classifiers, extractors, actions)
- DAG building and execution
- Context management
- Parameter extraction and validation
Integration Tests¶
- Complete workflow execution
- Intent routing
- Error handling and recovery
- LLM integration
Evaluation Tests¶
- Performance benchmarking
- Accuracy measurement
- Regression testing
Writing Tests¶
Example Test Structure¶
import pytest
from intent_kit import DAGBuilder, run_dag
from intent_kit.core.context import DefaultContext
def test_simple_action():
"""Test basic action execution."""
def greet(name: str, **kwargs) -> str:
return f"Hello {name}!"
# Create DAG
builder = DAGBuilder()
builder.with_default_llm_config({
"provider": "openrouter",
"model": "google/gemma-2-9b-it"
})
builder.add_node("classifier", "classifier",
output_labels=["greet"],
description="Main classifier")
builder.add_node("extract_name", "extractor",
param_schema={"name": str},
description="Extract name")
builder.add_node("greet_action", "action",
action=greet,
description="Greet user")
builder.add_edge("classifier", "extract_name", "greet")
builder.add_edge("extract_name", "greet_action", "success")
builder.set_entrypoints(["classifier"])
dag = builder.build()
context = DefaultContext()
result, context = run_dag(dag, "Hello Alice")
assert result.data == "Hello Alice!"
Test Best Practices¶
- Use descriptive test names that explain what is being tested
- Test both success and failure cases
- Mock external dependencies (LLM APIs, etc.)
- Use fixtures for common setup
- Test edge cases and error conditions
Test Fixtures¶
Common DAG Fixtures¶
import pytest
from intent_kit import DAGBuilder, run_dag
from intent_kit.core.context import DefaultContext
@pytest.fixture
def simple_dag():
"""Create a simple DAG for testing."""
def greet(name: str, **kwargs) -> str:
return f"Hello {name}!"
builder = DAGBuilder()
builder.add_node("classifier", "classifier",
output_labels=["greet"],
description="Main classifier",
llm_config={
"provider": "openrouter",
"model": "google/gemma-2-9b-it"
})
builder.add_node("extract_name", "extractor",
param_schema={"name": str},
description="Extract name")
builder.add_node("greet_action", "action",
action=greet,
description="Greet user")
builder.add_edge("classifier", "extract_name", "greet")
builder.add_edge("extract_name", "greet_action", "success")
builder.set_entrypoints(["classifier"])
return builder.build()
@pytest.fixture
def test_context():
"""Create a test context."""
return DefaultContext()
def test_greeting_workflow(simple_dag, test_context):
"""Test the complete greeting workflow."""
result, test_context = run_dag(simple_dag, "Hello Alice")
assert result.data == "Hello Alice!"
Mock LLM Fixtures¶
import pytest
from unittest.mock import Mock
@pytest.fixture
def mock_llm_service():
"""Mock LLM service for testing."""
mock_service = Mock()
mock_service.generate_text.return_value = "greet"
return mock_service
def test_classifier_with_mock(simple_dag, test_context, mock_llm_service):
"""Test classifier with mocked LLM service."""
# Inject mock service into context
test_context.set("llm_service", mock_llm_service)
result, test_context = run_dag(simple_dag, "Hello Alice")
assert result.data == "Hello Alice!"
Testing Different Node Types¶
Testing Classifier Nodes¶
def test_classifier_node():
"""Test classifier node functionality."""
def custom_classifier(input_text: str, **kwargs) -> str:
if "hello" in input_text.lower():
return "greet"
return "unknown"
builder = DAGBuilder()
builder.add_node("classifier", "classifier",
output_labels=["greet", "unknown"],
description="Test classifier",
classification_func=custom_classifier)
builder.add_node("greet_action", "action",
action=lambda: "Hello!",
description="Greet action")
builder.add_edge("classifier", "greet_action", "greet")
builder.set_entrypoints(["classifier"])
dag = builder.build()
context = DefaultContext()
# Test greeting input
result, context = run_dag(dag, "Hello there")
assert result.data == "Hello!"
# Test unknown input
result, context = run_dag(dag, "Random text")
assert result.data is None # No action executed
Testing Extractor Nodes¶
def test_extractor_node():
"""Test extractor node functionality."""
def test_action(name: str, age: int, **kwargs) -> str:
return f"{name} is {age} years old"
builder = DAGBuilder()
builder.add_node("extractor", "extractor",
param_schema={"name": str, "age": int},
description="Extract name and age",
output_key="extracted_params")
builder.add_node("action", "action",
action=test_action,
description="Test action")
builder.add_edge("extractor", "action", "success")
builder.set_entrypoints(["extractor"])
dag = builder.build()
context = DefaultContext()
# Mock extracted parameters
context.set("extracted_params", {"name": "Alice", "age": 25})
result, context = run_dag(dag, "Test input")
assert result.data == "Alice is 25 years old"
Testing Action Nodes¶
def test_action_node():
"""Test action node functionality."""
def test_action(name: str, **kwargs) -> str:
return f"Hello {name}!"
builder = DAGBuilder()
builder.add_node("action", "action",
action=test_action,
description="Test action")
builder.set_entrypoints(["action"])
dag = builder.build()
context = DefaultContext()
# Mock parameters
context.set("extracted_params", {"name": "Bob"})
result, context = run_dag(dag, "Test input")
assert result.data == "Hello Bob!"
Testing Error Conditions¶
Testing Invalid Inputs¶
def test_invalid_input_handling(simple_dag, test_context):
"""Test handling of invalid inputs."""
# Test with empty input
result, test_context = run_dag(simple_dag, "")
assert result.data is None or "error" in str(result.data).lower()
# Test with None input
result, test_context = run_dag(simple_dag, None)
assert result.data is None or "error" in str(result.data).lower()
Testing Context Errors¶
def test_context_error_handling():
"""Test context error handling."""
def failing_action(**kwargs) -> str:
raise ValueError("Test error")
builder = DAGBuilder()
builder.add_node("action", "action",
action=failing_action,
description="Failing action")
builder.set_entrypoints(["action"])
dag = builder.build()
context = DefaultContext()
# Test error handling
result, context = run_dag(dag, "Test input")
assert result.data is None or "error" in str(result.data).lower()
Integration Testing¶
Testing Complete Workflows¶
def test_complete_workflow():
"""Test a complete workflow with multiple nodes."""
def greet(name: str, **kwargs) -> str:
return f"Hello {name}!"
def get_weather(city: str, **kwargs) -> str:
return f"Weather in {city} is sunny"
# Create complex DAG
builder = DAGBuilder()
builder.add_node("classifier", "classifier",
output_labels=["greet", "weather"],
description="Main classifier")
builder.add_node("extract_greet", "extractor",
param_schema={"name": str},
description="Extract name")
builder.add_node("extract_weather", "extractor",
param_schema={"city": str},
description="Extract city")
builder.add_node("greet_action", "action",
action=greet,
description="Greet action")
builder.add_node("weather_action", "action",
action=get_weather,
description="Weather action")
# Connect nodes
builder.add_edge("classifier", "extract_greet", "greet")
builder.add_edge("extract_greet", "greet_action", "success")
builder.add_edge("classifier", "extract_weather", "weather")
builder.add_edge("extract_weather", "weather_action", "success")
builder.set_entrypoints(["classifier"])
dag = builder.build()
context = DefaultContext()
# Test greeting workflow
result, context = run_dag(dag, "Hello Alice")
assert result.data == "Hello Alice!"
# Test weather workflow
result, context = run_dag(dag, "Weather in San Francisco")
assert result.data == "Weather in San Francisco is sunny"
Performance Testing¶
Use the evaluation framework for performance testing:
from intent_kit.evals import run_eval, load_dataset
# Load performance test dataset
dataset = load_dataset("tests/performance_dataset.yaml")
result = run_eval(dataset, your_dag)
# Check performance metrics
print(f"Average response time: {result.avg_response_time()}ms")
print(f"Throughput: {result.throughput()} requests/second")
Continuous Integration¶
Tests are automatically run on: - Every pull request - Every push to main branch - Coverage reports are generated and tracked
Debugging Tests¶
# Run tests with debug output
uv run pytest -s
# Run specific test with debugger
uv run pytest tests/test_dag.py::test_specific_function -s
# Generate coverage report
uv run pytest --cov=intent_kit --cov-report=html
Best Practices¶
1. Test Structure¶
- Organize tests by functionality
- Use descriptive test names
- Group related tests in classes
2. Test Data¶
- Use realistic test data
- Test edge cases and boundary conditions
- Include both valid and invalid inputs
3. Mocking¶
- Mock external dependencies
- Use fixtures for common setup
- Test error conditions explicitly
4. Coverage¶
- Aim for high test coverage
- Focus on critical paths
- Test error handling thoroughly
5. Maintenance¶
- Keep tests up to date with code changes
- Refactor tests when needed
- Use parameterized tests for similar scenarios