rust-observability
SKILL.md
Rust Observability
Production observability with tracing, metrics, and structured logging
Version Context
- tracing: 0.1.x
- tracing-subscriber: 0.3.x
- opentelemetry: 0.26.x
- metrics: 0.24.x
- metrics-exporter-prometheus: 0.17.x
When to Use This Skill
- Adding structured logging
- Instrumenting functions with traces
- Setting up OpenTelemetry
- Exposing Prometheus metrics
- Implementing health checks
- Request correlation and tracing
- Performance monitoring
Structured Logging
Basic Tracing Setup
use tracing::{info, warn, error, debug, trace};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
/// Initialize logging for development
pub fn init_tracing() {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "info,my_app=debug".into())
)
.with(tracing_subscriber::fmt::layer())
.init();
}
/// Initialize structured JSON logging for production
pub fn init_json_tracing() {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "info".into())
)
.with(tracing_subscriber::fmt::layer().json())
.init();
}
// Usage
fn main() {
init_tracing();
info!("Application starting");
debug!(version = env!("CARGO_PKG_VERSION"), "Debug information");
warn!(retry_count = 3, "Retry limit approaching");
error!(error = "connection failed", "Database connection error");
}
Instrumented Functions
use tracing::{instrument, info, Span};
/// Automatic span creation with function name
#[instrument]
async fn fetch_user(user_id: &str) -> Result<User, UserError> {
info!("Fetching user from database");
let user = db.find_user(user_id).await?;
info!(email = %user.email, "User fetched successfully");
Ok(user)
}
/// Custom span name and fields
#[instrument(
name = "user.create",
skip(db, payload),
fields(
user.email = %payload.email,
user.name = %payload.name,
)
)]
async fn create_user(
db: &Database,
payload: CreateUserRequest,
) -> Result<User, UserError> {
let span = Span::current();
let user = db.users.create(payload).await?;
// Add fields dynamically
span.record("user.id", &user.id.to_string());
Ok(user)
}
/// Skip sensitive fields
#[instrument(skip(password))]
async fn authenticate(username: &str, password: &str) -> Result<Session, AuthError> {
// password is not logged
verify_credentials(username, password).await
}
OpenTelemetry Integration
Setup OpenTelemetry
use opentelemetry::{global, sdk::Resource, KeyValue};
use opentelemetry_otlp::WithExportConfig;
use tracing_subscriber::layer::SubscriberExt;
use tracing_opentelemetry::OpenTelemetryLayer;
pub fn init_telemetry() -> Result<(), Box<dyn std::error::Error>> {
// Configure OpenTelemetry
global::set_text_map_propagator(
opentelemetry::sdk::propagation::TraceContextPropagator::new()
);
// Create OTLP exporter
let tracer = opentelemetry_otlp::new_pipeline()
.tracing()
.with_exporter(
opentelemetry_otlp::new_exporter()
.tonic()
.with_endpoint("http://localhost:4317")
)
.with_trace_config(
opentelemetry::sdk::trace::config().with_resource(Resource::new(vec![
KeyValue::new("service.name", "my-service"),
KeyValue::new("service.version", env!("CARGO_PKG_VERSION")),
KeyValue::new("deployment.environment", "production"),
]))
)
.install_batch(opentelemetry::runtime::Tokio)?;
// Configure tracing subscriber with OpenTelemetry layer
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "info".into())
)
.with(tracing_subscriber::fmt::layer())
.with(OpenTelemetryLayer::new(tracer))
.init();
Ok(())
}
// Shutdown gracefully
pub fn shutdown_telemetry() {
global::shutdown_tracer_provider();
}
Prometheus Metrics
Initialize Metrics
use metrics::{counter, gauge, histogram, describe_counter, describe_histogram, Unit};
use metrics_exporter_prometheus::PrometheusBuilder;
/// Initialize Prometheus metrics
pub fn init_metrics() -> Result<(), Box<dyn std::error::Error>> {
PrometheusBuilder::new()
.with_http_listener(([0, 0, 0, 0], 9090))
.install()?;
// Describe metrics
describe_counter!(
"http_requests_total",
Unit::Count,
"Total HTTP requests"
);
describe_histogram!(
"http_request_duration_seconds",
Unit::Seconds,
"HTTP request duration"
);
describe_counter!(
"users_created_total",
Unit::Count,
"Total users created"
);
Ok(())
}
Usage in Handlers
#[instrument(skip(state))]
async fn create_user(
State(state): State<Arc<AppState>>,
Json(payload): Json<CreateUserRequest>,
) -> Result<Json<User>, AppError> {
// Increment counter
counter!("http_requests_total", "method" => "POST", "endpoint" => "/users").increment(1);
let start = std::time::Instant::now();
let user = state.user_service.create_user(payload).await?;
// Record histogram
let duration = start.elapsed();
histogram!(
"http_request_duration_seconds",
"method" => "POST",
"endpoint" => "/users",
"status" => "200"
).record(duration.as_secs_f64());
// Business metric
counter!("users_created_total", "source" => "api").increment(1);
Ok(Json(user))
}
Health Checks
Health Endpoint
use axum::{extract::State, response::Json, http::StatusCode};
use serde_json::json;
#[derive(serde::Serialize)]
pub struct HealthResponse {
status: String,
version: String,
checks: Vec<HealthCheck>,
}
#[derive(serde::Serialize)]
pub struct HealthCheck {
name: String,
status: String,
duration_ms: u64,
}
#[instrument(skip(state))]
pub async fn health_check(
State(state): State<Arc<AppState>>,
) -> Result<Json<HealthResponse>, StatusCode> {
let mut checks = Vec::new();
let mut all_healthy = true;
// Database health check
let start = std::time::Instant::now();
let db_healthy = state.database.ping().await.is_ok();
checks.push(HealthCheck {
name: "database".to_string(),
status: if db_healthy { "healthy" } else { "unhealthy" }.to_string(),
duration_ms: start.elapsed().as_millis() as u64,
});
all_healthy &= db_healthy;
// Cache health check
let start = std::time::Instant::now();
let cache_healthy = state.cache.ping().await.is_ok();
checks.push(HealthCheck {
name: "cache".to_string(),
status: if cache_healthy { "healthy" } else { "unhealthy" }.to_string(),
duration_ms: start.elapsed().as_millis() as u64,
});
all_healthy &= cache_healthy;
let response = HealthResponse {
status: if all_healthy { "healthy" } else { "unhealthy" }.to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
checks,
};
if all_healthy {
Ok(Json(response))
} else {
Err(StatusCode::SERVICE_UNAVAILABLE)
}
}
/// Readiness check (Kubernetes)
#[instrument(skip(state))]
pub async fn readiness_check(
State(state): State<Arc<AppState>>,
) -> Result<StatusCode, StatusCode> {
// Quick check if ready to serve traffic
if state.database.ping().await.is_ok() {
Ok(StatusCode::OK)
} else {
Err(StatusCode::SERVICE_UNAVAILABLE)
}
}
Request Correlation
Request ID Middleware
use axum::{
extract::Request,
middleware::Next,
response::Response,
};
use uuid::Uuid;
#[derive(Clone, Debug)]
pub struct RequestId(pub Uuid);
pub async fn request_id_middleware(
mut request: Request,
next: Next,
) -> Response {
// Extract or generate request ID
let request_id = request
.headers()
.get("x-request-id")
.and_then(|h| h.to_str().ok())
.and_then(|s| Uuid::parse_str(s).ok())
.unwrap_or_else(Uuid::new_v4);
// Add to extensions
request.extensions_mut().insert(RequestId(request_id));
// Create span with request ID
let span = info_span!(
"http_request",
request_id = %request_id,
method = %request.method(),
uri = %request.uri(),
);
let _guard = span.enter();
info!("Request started");
let mut response = next.run(request).await;
// Add to response headers
response.headers_mut().insert(
"x-request-id",
request_id.to_string().parse().unwrap(),
);
info!("Request completed");
response
}
Best Practices
- Use structured fields: Log key-value pairs, not formatted strings
- Instrument at boundaries: HTTP handlers, database calls, external services
- Set appropriate levels: ERROR for failures, WARN for issues, INFO for important events
- Include correlation IDs: Request IDs, trace IDs, user IDs
- Record durations: Use histogram metrics for timing
- Implement health checks: /health and /ready endpoints
- Expose metrics: Prometheus endpoint on separate port
- Skip sensitive data: Don't log passwords, tokens, PII
- Use spans for context: Group related logs with spans
Common Dependencies
[dependencies]
# Tracing
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] }
tracing-opentelemetry = "0.27"
# OpenTelemetry
opentelemetry = { version = "0.26", features = ["trace"] }
opentelemetry-otlp = { version = "0.26", features = ["trace"] }
# Metrics
metrics = "0.24"
metrics-exporter-prometheus = "0.17"
# Utilities
uuid = { version = "1", features = ["v4", "serde"] }