terraform-ecs
SKILL.md
Terraform ECS Cluster Provisioning
Production-ready patterns for provisioning AWS ECS clusters with Terraform using the official terraform-aws-modules/ecs module.
Quick Reference
| Command | Description |
|---|---|
terraform init |
Initialize Terraform working directory |
terraform plan |
Preview infrastructure changes |
terraform apply |
Create/update ECS cluster |
terraform destroy |
Delete ECS cluster and resources |
aws ecs list-clusters |
List all ECS clusters |
terraform output |
View cluster outputs |
Version Requirements
terraform {
required_version = ">= 1.11.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 6.0"
}
}
}
Basic ECS Cluster with Fargate
module "ecs" {
source = "terraform-aws-modules/ecs/aws"
version = "~> 5.0"
cluster_name = "production-ecs"
# Capacity providers
default_capacity_provider_use_fargate = true
fargate_capacity_providers = {
FARGATE = {
default_capacity_provider_strategy = {
weight = 50
base = 1
}
}
FARGATE_SPOT = {
default_capacity_provider_strategy = {
weight = 50
}
}
}
# CloudWatch Container Insights
cluster_settings = {
name = "containerInsights"
value = "enabled"
}
tags = {
Environment = "production"
Terraform = "true"
}
}
Task Definition
resource "aws_ecs_task_definition" "app" {
family = "my-app"
network_mode = "awsvpc"
requires_compatibilities = ["FARGATE"]
cpu = "512"
memory = "1024"
execution_role_arn = aws_iam_role.ecs_task_execution.arn
task_role_arn = aws_iam_role.ecs_task.arn
container_definitions = jsonencode([
{
name = "my-app"
image = "${aws_ecr_repository.app.repository_url}:latest"
cpu = 512
memory = 1024
essential = true
portMappings = [
{
containerPort = 8080
hostPort = 8080
protocol = "tcp"
}
]
logConfiguration = {
logDriver = "awslogs"
options = {
"awslogs-group" = aws_cloudwatch_log_group.app.name
"awslogs-region" = data.aws_region.current.name
"awslogs-stream-prefix" = "ecs"
}
}
environment = [
{
name = "APP_ENV"
value = "production"
}
]
secrets = [
{
name = "DB_PASSWORD"
valueFrom = aws_secretsmanager_secret.db_password.arn
}
]
healthCheck = {
command = ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"]
interval = 30
timeout = 5
retries = 3
startPeriod = 60
}
}
])
tags = var.tags
}
Service with Load Balancer
resource "aws_ecs_service" "app" {
name = "my-service"
cluster = module.ecs.cluster_id
task_definition = aws_ecs_task_definition.app.arn
desired_count = 3
# Use capacity providers instead of launch_type
capacity_provider_strategy {
capacity_provider = "FARGATE"
weight = 1
base = 1
}
capacity_provider_strategy {
capacity_provider = "FARGATE_SPOT"
weight = 3
}
platform_version = "1.4.0"
network_configuration {
subnets = module.vpc.private_subnets
security_groups = [aws_security_group.ecs_tasks.id]
assign_public_ip = false
}
load_balancer {
target_group_arn = aws_lb_target_group.app.arn
container_name = "my-app"
container_port = 8080
}
deployment_configuration {
maximum_percent = 200
minimum_healthy_percent = 100
}
deployment_circuit_breaker {
enable = true
rollback = true
}
enable_execute_command = true
depends_on = [aws_lb_listener.app]
tags = var.tags
lifecycle {
ignore_changes = [task_definition] # Allow external deployments
}
}
Application Load Balancer
resource "aws_lb" "app" {
name = "app-alb"
internal = false
load_balancer_type = "application"
security_groups = [aws_security_group.alb.id]
subnets = module.vpc.public_subnets
tags = var.tags
}
resource "aws_lb_target_group" "app" {
name = "app-tg"
port = 8080
protocol = "HTTP"
vpc_id = module.vpc.vpc_id
target_type = "ip" # Required for awsvpc network mode
health_check {
healthy_threshold = 2
unhealthy_threshold = 2
timeout = 5
interval = 30
path = "/health"
matcher = "200"
}
tags = var.tags
}
resource "aws_lb_listener" "app" {
load_balancer_arn = aws_lb.app.arn
port = 443
protocol = "HTTPS"
ssl_policy = "ELBSecurityPolicy-TLS13-1-2-2021-06"
certificate_arn = aws_acm_certificate.app.arn
default_action {
type = "forward"
target_group_arn = aws_lb_target_group.app.arn
}
}
Auto Scaling
resource "aws_appautoscaling_target" "ecs" {
max_capacity = 10
min_capacity = 1
resource_id = "service/${module.ecs.cluster_name}/${aws_ecs_service.app.name}"
scalable_dimension = "ecs:service:DesiredCount"
service_namespace = "ecs"
}
# CPU-based scaling
resource "aws_appautoscaling_policy" "cpu" {
name = "cpu-autoscaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.ecs.resource_id
scalable_dimension = aws_appautoscaling_target.ecs.scalable_dimension
service_namespace = aws_appautoscaling_target.ecs.service_namespace
target_tracking_scaling_policy_configuration {
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageCPUUtilization"
}
target_value = 70.0
scale_in_cooldown = 300
scale_out_cooldown = 60
}
}
# Memory-based scaling
resource "aws_appautoscaling_policy" "memory" {
name = "memory-autoscaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.ecs.resource_id
scalable_dimension = aws_appautoscaling_target.ecs.scalable_dimension
service_namespace = aws_appautoscaling_target.ecs.service_namespace
target_tracking_scaling_policy_configuration {
predefined_metric_specification {
predefined_metric_type = "ECSServiceAverageMemoryUtilization"
}
target_value = 80.0
}
}
# Request count scaling (requires ALB)
resource "aws_appautoscaling_policy" "requests" {
name = "requests-autoscaling"
policy_type = "TargetTrackingScaling"
resource_id = aws_appautoscaling_target.ecs.resource_id
scalable_dimension = aws_appautoscaling_target.ecs.scalable_dimension
service_namespace = aws_appautoscaling_target.ecs.service_namespace
target_tracking_scaling_policy_configuration {
predefined_metric_specification {
predefined_metric_type = "ALBRequestCountPerTarget"
resource_label = "${aws_lb.app.arn_suffix}/${aws_lb_target_group.app.arn_suffix}"
}
target_value = 1000
}
}
IAM Roles
# Task Execution Role (for ECS agent)
resource "aws_iam_role" "ecs_task_execution" {
name = "ecs-task-execution-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
}]
})
}
resource "aws_iam_role_policy_attachment" "ecs_task_execution" {
role = aws_iam_role.ecs_task_execution.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
resource "aws_iam_role_policy" "ecs_task_execution_secrets" {
name = "secrets-access"
role = aws_iam_role.ecs_task_execution.id
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"secretsmanager:GetSecretValue"
]
Resource = aws_secretsmanager_secret.db_password.arn
},
{
Effect = "Allow"
Action = [
"ssm:GetParameters"
]
Resource = "arn:aws:ssm:*:*:parameter/ecs/*"
}
]
})
}
# Task Role (for application)
resource "aws_iam_role" "ecs_task" {
name = "ecs-task-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
}]
})
}
resource "aws_iam_role_policy" "ecs_task" {
name = "app-permissions"
role = aws_iam_role.ecs_task.id
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:GetObject",
"s3:PutObject"
]
Resource = "${aws_s3_bucket.app.arn}/*"
},
{
Effect = "Allow"
Action = [
"ssmmessages:CreateControlChannel",
"ssmmessages:CreateDataChannel",
"ssmmessages:OpenControlChannel",
"ssmmessages:OpenDataChannel"
]
Resource = "*"
}
]
})
}
VPC Configuration
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.0"
name = "ecs-vpc"
cidr = "10.0.0.0/16"
azs = ["us-east-1a", "us-east-1b", "us-east-1c"]
private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
public_subnets = ["10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"]
enable_nat_gateway = true
single_nat_gateway = false # One per AZ for HA
tags = var.tags
}
# Security Groups
resource "aws_security_group" "ecs_tasks" {
name = "ecs-tasks-sg"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 8080
to_port = 8080
protocol = "tcp"
security_groups = [aws_security_group.alb.id]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = var.tags
}
resource "aws_security_group" "alb" {
name = "alb-sg"
vpc_id = module.vpc.vpc_id
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = var.tags
}
Outputs
output "cluster_id" {
description = "ECS cluster ID"
value = module.ecs.cluster_id
}
output "cluster_name" {
description = "ECS cluster name"
value = module.ecs.cluster_name
}
output "service_name" {
description = "ECS service name"
value = aws_ecs_service.app.name
}
output "alb_dns_name" {
description = "ALB DNS name"
value = aws_lb.app.dns_name
}
Detailed Documentation
For comprehensive guides, see:
- Cluster Configuration - Cluster setup, capacity providers, settings
- Task Definitions - Container definitions, volumes, secrets
- Service Patterns - Deployment, load balancers, auto-scaling
Common Commands
# Update kubeconfig equivalent - configure AWS CLI
aws ecs list-services --cluster production-ecs
# Check service status
aws ecs describe-services \
--cluster production-ecs \
--services my-service
# View task logs
aws logs tail /ecs/my-app --follow
# Execute command in container
aws ecs execute-command \
--cluster production-ecs \
--task <task-id> \
--container my-app \
--interactive \
--command "/bin/sh"
Best Practices
- Use capacity provider strategy instead of launch_type for flexibility
- Enable deployment circuit breaker with rollback for safety
- Use Fargate Spot for cost optimization (up to 70% savings)
- Pin platform version explicitly (e.g., "1.4.0")
- Enable Container Insights for observability
- Use secrets manager for sensitive configuration
- Separate task execution and task roles for least privilege
- Use target_type = "ip" for target groups with awsvpc mode