Utilities¶
Intent Kit provides a comprehensive set of utility modules that support core functionality, including type validation, text processing, performance monitoring, logging, and reporting.
Overview¶
The utilities module includes: - Type Coercion - Advanced type validation and conversion - Text Utils - Text processing and manipulation - Performance Utils - Performance monitoring and cost tracking - Logger - Structured logging system - Report Utils - Report generation and formatting - Typed Output - Type-safe output handling
Type Coercion¶
The type_coercion module provides robust type validation and conversion capabilities.
Basic Type Validation¶
from intent_kit.utils.type_coercion import validate_type, resolve_type
# Validate basic types
result = validate_type("42", int)
print(result) # 42
result = validate_type("3.14", float)
print(result) # 3.14
result = validate_type("true", bool)
print(result) # True
# Handle invalid types
try:
result = validate_type("not_a_number", int)
except TypeValidationError as e:
print(f"Validation error: {e}")
Complex Type Validation¶
from intent_kit.utils.type_coercion import validate_raw_content
# Validate complex structures
data = {
"name": "John",
"age": "25",
"scores": ["85", "92", "78"],
"active": "true"
}
schema = {
"name": str,
"age": int,
"scores": list,
"active": bool
}
validated_data = validate_raw_content(data, dict)
print(validated_data)
# {
# "name": "John",
# "age": 25,
# "scores": ["85", "92", "78"],
# "active": True
# }
Custom Type Validation¶
from typing import Union, List
from intent_kit.utils.type_coercion import TypeValidationError
# Define custom validation rules
def validate_email(value: str) -> str:
if "@" not in value:
raise TypeValidationError(f"Invalid email format: {value}")
return value
def validate_age(value: Union[int, str]) -> int:
age = validate_type(value, int)
if age < 0 or age > 150:
raise TypeValidationError(f"Age must be between 0 and 150: {age}")
return age
# Use custom validators
schema = {
"email": validate_email,
"age": validate_age,
"tags": List[str]
}
data = {
"email": "user@example.com",
"age": "25",
"tags": ["python", "ai", "ml"]
}
validated = validate_raw_content(data, dict)
Text Utils¶
The text_utils module provides comprehensive text processing capabilities.
Text Cleaning and Normalization¶
from intent_kit.utils.text_utils import (
clean_text, normalize_text, extract_keywords,
calculate_similarity, split_text
)
# Clean and normalize text
text = " Hello, World! How are you? "
cleaned = clean_text(text)
print(cleaned) # "Hello, World! How are you?"
normalized = normalize_text(text)
print(normalized) # "hello world how are you"
# Extract keywords
keywords = extract_keywords("Machine learning is a subset of artificial intelligence")
print(keywords) # ["machine", "learning", "subset", "artificial", "intelligence"]
Text Similarity¶
from intent_kit.utils.text_utils import calculate_similarity
# Calculate similarity between texts
text1 = "Hello, how are you?"
text2 = "Hi, how are you doing?"
text3 = "What's the weather like?"
similarity_1_2 = calculate_similarity(text1, text2)
similarity_1_3 = calculate_similarity(text1, text3)
print(f"Similarity 1-2: {similarity_1_2:.2f}") # Higher similarity
print(f"Similarity 1-3: {similarity_1_3:.2f}") # Lower similarity
Text Splitting and Chunking¶
from intent_kit.utils.text_utils import split_text, chunk_text
# Split text into sentences
text = "Hello there. How are you? I'm doing well. Thanks for asking."
sentences = split_text(text)
print(sentences)
# ["Hello there.", "How are you?", "I'm doing well.", "Thanks for asking."]
# Chunk text for processing
long_text = "This is a very long text that needs to be chunked..."
chunks = chunk_text(long_text, max_chunk_size=100, overlap=20)
print(f"Created {len(chunks)} chunks")
Advanced Text Processing¶
from intent_kit.utils.text_utils import (
extract_entities, detect_language, translate_text,
summarize_text, extract_sentiment
)
# Extract named entities
text = "John Smith works at Google in San Francisco"
entities = extract_entities(text)
print(entities) # {"PERSON": ["John Smith"], "ORG": ["Google"], "LOC": ["San Francisco"]}
# Detect language
language = detect_language("Bonjour, comment allez-vous?")
print(language) # "fr"
# Extract sentiment
sentiment = extract_sentiment("I love this product! It's amazing.")
print(sentiment) # {"positive": 0.9, "negative": 0.1}
Performance Utils¶
The perf_util module provides comprehensive performance monitoring and cost tracking.
Basic Performance Tracking¶
from intent_kit.utils.perf_util import PerfUtil
# Initialize performance utility
perf = PerfUtil()
# Track execution time
with perf.timer("data_processing"):
# Your code here
import time
time.sleep(1)
# Get timing information
timings = perf.get_timings()
print(timings) # {"data_processing": 1.002}
Cost Tracking¶
from intent_kit.utils.perf_util import PerfUtil
perf = PerfUtil()
# Track token usage and costs
perf.record_tokens(
provider="openrouter",
model="google/gemma-2-9b-it",
input_tokens=100,
output_tokens=50
)
# Get cost information
costs = perf.get_costs()
print(costs) # {"openrouter": {"google/gemma-2-9b-it": 0.0023}}
# Get total cost
total_cost = perf.get_total_cost()
print(f"Total cost: ${total_cost:.4f}")
Performance Monitoring¶
from intent_kit.utils.perf_util import PerfUtil
perf = PerfUtil()
# Monitor multiple operations
with perf.timer("classification"):
# Classification logic
pass
with perf.timer("extraction"):
# Extraction logic
pass
with perf.timer("action_execution"):
# Action execution
pass
# Get detailed performance report
report = perf.generate_report()
print(report)
# {
# "timings": {
# "classification": 0.5,
# "extraction": 1.2,
# "action_execution": 0.3
# },
# "costs": {...},
# "summary": {
# "total_time": 2.0,
# "total_cost": 0.005
# }
# }
Memory Usage Tracking¶
from intent_kit.utils.perf_util import PerfUtil
import psutil
perf = PerfUtil()
# Track memory usage
initial_memory = psutil.Process().memory_info().rss / 1024 / 1024 # MB
perf.record_memory_usage("start", initial_memory)
# After processing
final_memory = psutil.Process().memory_info().rss / 1024 / 1024
perf.record_memory_usage("end", final_memory)
memory_usage = perf.get_memory_usage()
print(f"Memory used: {memory_usage['end'] - memory_usage['start']:.2f} MB")
Logger¶
The logger module provides structured logging capabilities.
Basic Logging¶
from intent_kit.utils.logger import Logger
# Create logger instance
logger = Logger("my_component")
# Basic logging
logger.info("Component initialized")
logger.warning("Deprecated feature used")
logger.error("An error occurred", exc_info=True)
logger.debug("Debug information")
Structured Logging¶
from intent_kit.utils.logger import Logger
logger = Logger("dag_execution")
# Log with structured data
logger.info("DAG execution started", extra={
"dag_id": "booking_flow",
"user_id": "user123",
"input_length": 50
})
logger.info("Node executed", extra={
"node_id": "classifier",
"execution_time": 0.5,
"result": "weather"
})
Log Configuration¶
from intent_kit.utils.logger import Logger, configure_logging
# Configure logging
configure_logging(
level="INFO",
format="json",
output_file="logs/app.log"
)
# Create logger with specific configuration
logger = Logger("custom_logger", level="DEBUG")
Performance Logging¶
from intent_kit.utils.logger import Logger
logger = Logger("performance")
# Log performance metrics
def log_performance_metrics(operation, duration, tokens=None, cost=None):
logger.info("Performance metrics", extra={
"operation": operation,
"duration_seconds": duration,
"tokens_used": tokens,
"cost_usd": cost
})
# Usage
log_performance_metrics("llm_generation", 1.5, tokens=150, cost=0.003)
Report Utils¶
The report_utils module provides comprehensive reporting capabilities.
Basic Report Generation¶
from intent_kit.utils.report_utils import ReportGenerator
# Create report generator
reporter = ReportGenerator()
# Add data to report
reporter.add_section("execution_summary", {
"total_requests": 100,
"successful_requests": 95,
"failed_requests": 5,
"average_response_time": 1.2
})
reporter.add_section("cost_analysis", {
"total_cost": 0.25,
"cost_per_request": 0.0025,
"provider_breakdown": {
"openai": 0.15,
"anthropic": 0.10
}
})
# Generate report
report = reporter.generate_report()
print(report)
Performance Reports¶
from intent_kit.utils.report_utils import PerformanceReport
# Create performance report
perf_report = PerformanceReport()
# Add performance data
perf_report.add_execution_data({
"node_id": "classifier",
"execution_time": 0.5,
"tokens_used": 100,
"cost": 0.002
})
perf_report.add_execution_data({
"node_id": "extractor",
"execution_time": 1.2,
"tokens_used": 200,
"cost": 0.004
})
# Generate performance summary
summary = perf_report.generate_summary()
print(summary)
# {
# "total_executions": 2,
# "total_time": 1.7,
# "total_cost": 0.006,
# "average_time": 0.85,
# "node_breakdown": {...}
# }
HTML Report Generation¶
from intent_kit.utils.report_utils import HTMLReportGenerator
# Create HTML report
html_reporter = HTMLReportGenerator()
# Add sections
html_reporter.add_section("Overview", {
"title": "Execution Summary",
"content": "This report summarizes the DAG execution performance."
})
html_reporter.add_metrics("Performance Metrics", {
"Response Time": "1.2s",
"Cost": "$0.25",
"Accuracy": "95%"
})
# Generate HTML report
html_content = html_reporter.generate_html()
with open("report.html", "w") as f:
f.write(html_content)
JSON Report Export¶
from intent_kit.utils.report_utils import JSONReportExporter
# Create JSON exporter
exporter = JSONReportExporter()
# Add report data
exporter.add_data("execution_results", {
"total_requests": 100,
"success_rate": 0.95,
"average_latency": 1.2
})
# Export to JSON
json_data = exporter.export()
with open("report.json", "w") as f:
json.dump(json_data, f, indent=2)
Typed Output¶
The typed_output module provides type-safe output handling.
Basic Typed Output¶
from intent_kit.utils.typed_output import TypedOutput, OutputType
# Create typed output
output = TypedOutput(
content="Hello, world!",
output_type=OutputType.TEXT,
metadata={"confidence": 0.95}
)
# Access typed content
print(output.content) # "Hello, world!"
print(output.output_type) # OutputType.TEXT
print(output.metadata) # {"confidence": 0.95}
Structured Output¶
from intent_kit.utils.typed_output import TypedOutput, OutputType
# Create structured output
structured_output = TypedOutput(
content={
"name": "John Doe",
"age": 30,
"email": "john@example.com"
},
output_type=OutputType.JSON,
metadata={
"schema_version": "1.0",
"validation_passed": True
}
)
# Validate structured content
if structured_output.output_type == OutputType.JSON:
data = structured_output.content
print(f"Name: {data['name']}")
print(f"Age: {data['age']}")
Output Validation¶
from intent_kit.utils.typed_output import TypedOutput, OutputType, validate_output
# Create output with validation
output = TypedOutput(
content={"temperature": 25, "humidity": 60},
output_type=OutputType.JSON
)
# Validate output
is_valid = validate_output(output, {
"temperature": (int, lambda x: 0 <= x <= 50),
"humidity": (int, lambda x: 0 <= x <= 100)
})
print(f"Output is valid: {is_valid}")
Output Transformation¶
from intent_kit.utils.typed_output import TypedOutput, OutputType
# Create output
output = TypedOutput(
content="The temperature is 25°C",
output_type=OutputType.TEXT
)
# Transform output
def extract_temperature(text):
import re
match = re.search(r'(\d+)°C', text)
return int(match.group(1)) if match else None
transformed = output.transform(
extract_temperature,
OutputType.NUMBER,
metadata={"extraction_method": "regex"}
)
print(transformed.content) # 25
print(transformed.output_type) # OutputType.NUMBER
Best Practices¶
1. Type Safety¶
# Always use type validation for external data
from intent_kit.utils.type_coercion import validate_raw_content
def process_user_input(data):
schema = {
"name": str,
"age": int,
"email": str
}
try:
validated_data = validate_raw_content(data, dict)
return validated_data
except TypeValidationError as e:
logger.error(f"Validation failed: {e}")
raise
2. Performance Monitoring¶
# Use performance utilities consistently
from intent_kit.utils.perf_util import PerfUtil
perf = PerfUtil()
def expensive_operation():
with perf.timer("expensive_operation"):
# Your code here
result = complex_calculation()
# Record costs if applicable
perf.record_tokens("openrouter", "google/gemma-2-9b-it", 100, 50)
return result
3. Structured Logging¶
# Use structured logging for better observability
from intent_kit.utils.logger import Logger
logger = Logger("dag_execution")
def execute_dag(dag_id, user_input):
logger.info("DAG execution started", extra={
"dag_id": dag_id,
"input_length": len(user_input),
"timestamp": datetime.now().isoformat()
})
try:
result = dag.execute(user_input)
logger.info("DAG execution completed", extra={
"dag_id": dag_id,
"success": True,
"execution_time": result.execution_time
})
return result
except Exception as e:
logger.error("DAG execution failed", extra={
"dag_id": dag_id,
"error": str(e),
"success": False
}, exc_info=True)
raise
4. Error Handling¶
# Use utilities for robust error handling
from intent_kit.utils.type_coercion import TypeValidationError
from intent_kit.utils.logger import Logger
logger = Logger("data_processing")
def safe_data_processing(data):
try:
# Process data with type validation
validated_data = validate_raw_content(data, dict)
return process_validated_data(validated_data)
except TypeValidationError as e:
logger.warning("Data validation failed", extra={
"error": str(e),
"data_type": type(data).__name__
})
return None
except Exception as e:
logger.error("Unexpected error in data processing", exc_info=True)
raise