boto3-ecs

SKILL.md

AWS Boto3 ECS Integration

Complete patterns for managing Amazon ECS clusters, services, and tasks using AWS Boto3 SDK.

Quick Reference

Client Initialization

import boto3
from typing import Optional

def get_ecs_client(region_name: str = 'us-east-1',
                   profile_name: Optional[str] = None):
    """Initialize ECS client with optional profile"""
    session = boto3.Session(
        region_name=region_name,
        profile_name=profile_name
    )
    return session.client('ecs')

# Usage
ecs = get_ecs_client(region_name='us-west-2')

Essential Cluster Operations

# List clusters
clusters = ecs.list_clusters()
for cluster_arn in clusters['clusterArns']:
    print(cluster_arn)

# Describe cluster
cluster = ecs.describe_clusters(
    clusters=['my-cluster'],
    include=['ATTACHMENTS', 'SETTINGS', 'STATISTICS']
)
print(f"Status: {cluster['clusters'][0]['status']}")
print(f"Running Tasks: {cluster['clusters'][0]['runningTasksCount']}")

# Create cluster with Fargate
response = ecs.create_cluster(
    clusterName='my-cluster',
    capacityProviders=['FARGATE', 'FARGATE_SPOT'],
    defaultCapacityProviderStrategy=[
        {'capacityProvider': 'FARGATE', 'weight': 1, 'base': 1},
        {'capacityProvider': 'FARGATE_SPOT', 'weight': 1}
    ],
    settings=[
        {'name': 'containerInsights', 'value': 'enabled'}
    ]
)

Task Definition Management

# Register task definition
response = ecs.register_task_definition(
    family='my-app',
    networkMode='awsvpc',
    requiresCompatibilities=['FARGATE'],
    cpu='256',
    memory='512',
    executionRoleArn='arn:aws:iam::123456789:role/ecsTaskExecutionRole',
    taskRoleArn='arn:aws:iam::123456789:role/myAppTaskRole',
    containerDefinitions=[
        {
            'name': 'my-app',
            'image': '123456789.dkr.ecr.us-east-1.amazonaws.com/my-app:latest',
            'essential': True,
            'portMappings': [
                {'containerPort': 8080, 'protocol': 'tcp'}
            ],
            'logConfiguration': {
                'logDriver': 'awslogs',
                'options': {
                    'awslogs-group': '/ecs/my-app',
                    'awslogs-region': 'us-east-1',
                    'awslogs-stream-prefix': 'ecs'
                }
            },
            'environment': [
                {'name': 'ENV', 'value': 'production'}
            ],
            'secrets': [
                {
                    'name': 'DB_PASSWORD',
                    'valueFrom': 'arn:aws:secretsmanager:us-east-1:123456789:secret:db-password'
                }
            ]
        }
    ]
)
task_def_arn = response['taskDefinition']['taskDefinitionArn']

# Describe task definition
task_def = ecs.describe_task_definition(
    taskDefinition='my-app:1',
    include=['TAGS']
)

# List task definition families
families = ecs.list_task_definition_families(
    status='ACTIVE'
)

# List task definitions for a family
revisions = ecs.list_task_definitions(
    familyPrefix='my-app',
    status='ACTIVE',
    sort='DESC'
)

Service Operations

# Create service with Fargate
response = ecs.create_service(
    cluster='my-cluster',
    serviceName='my-service',
    taskDefinition='my-app:1',
    desiredCount=3,
    launchType='FARGATE',
    platformVersion='1.4.0',
    networkConfiguration={
        'awsvpcConfiguration': {
            'subnets': ['subnet-12345', 'subnet-67890'],
            'securityGroups': ['sg-12345'],
            'assignPublicIp': 'DISABLED'
        }
    },
    loadBalancers=[
        {
            'targetGroupArn': 'arn:aws:elasticloadbalancing:...:targetgroup/my-tg/...',
            'containerName': 'my-app',
            'containerPort': 8080
        }
    ],
    deploymentConfiguration={
        'maximumPercent': 200,
        'minimumHealthyPercent': 100,
        'deploymentCircuitBreaker': {
            'enable': True,
            'rollback': True
        }
    },
    enableExecuteCommand=True
)

# Update service
response = ecs.update_service(
    cluster='my-cluster',
    service='my-service',
    taskDefinition='my-app:2',
    desiredCount=5,
    forceNewDeployment=True
)

# Describe services
services = ecs.describe_services(
    cluster='my-cluster',
    services=['my-service'],
    include=['TAGS']
)
for svc in services['services']:
    print(f"{svc['serviceName']}: {svc['status']}")
    print(f"  Running: {svc['runningCount']}/{svc['desiredCount']}")

# Delete service
ecs.update_service(cluster='my-cluster', service='my-service', desiredCount=0)
ecs.delete_service(cluster='my-cluster', service='my-service')

Running Tasks

# Run one-off task
response = ecs.run_task(
    cluster='my-cluster',
    taskDefinition='my-app:1',
    launchType='FARGATE',
    platformVersion='1.4.0',
    count=1,
    networkConfiguration={
        'awsvpcConfiguration': {
            'subnets': ['subnet-12345'],
            'securityGroups': ['sg-12345'],
            'assignPublicIp': 'ENABLED'
        }
    },
    overrides={
        'containerOverrides': [
            {
                'name': 'my-app',
                'command': ['python', 'migrate.py'],
                'environment': [
                    {'name': 'MIGRATION_MODE', 'value': 'true'}
                ]
            }
        ]
    }
)
task_arn = response['tasks'][0]['taskArn']

# List tasks
tasks = ecs.list_tasks(
    cluster='my-cluster',
    serviceName='my-service',
    desiredStatus='RUNNING'
)

# Describe tasks
task_details = ecs.describe_tasks(
    cluster='my-cluster',
    tasks=tasks['taskArns'],
    include=['TAGS']
)
for task in task_details['tasks']:
    print(f"Task: {task['taskArn']}")
    print(f"  Status: {task['lastStatus']}")
    print(f"  Health: {task.get('healthStatus', 'N/A')}")

# Stop task
ecs.stop_task(
    cluster='my-cluster',
    task=task_arn,
    reason='Manual stop for maintenance'
)

Capacity Providers

# Use capacity provider strategy instead of launchType
response = ecs.create_service(
    cluster='my-cluster',
    serviceName='my-service',
    taskDefinition='my-app:1',
    desiredCount=3,
    capacityProviderStrategy=[
        {'capacityProvider': 'FARGATE', 'weight': 1, 'base': 1},
        {'capacityProvider': 'FARGATE_SPOT', 'weight': 3}
    ],
    networkConfiguration={
        'awsvpcConfiguration': {
            'subnets': ['subnet-12345'],
            'securityGroups': ['sg-12345'],
            'assignPublicIp': 'DISABLED'
        }
    }
)

Common Patterns

Error Handling

from botocore.exceptions import ClientError, BotoCoreError

try:
    response = ecs.describe_services(
        cluster='my-cluster',
        services=['my-service']
    )
except ecs.exceptions.ClusterNotFoundException:
    print("Cluster not found")
except ecs.exceptions.ServiceNotFoundException:
    print("Service not found")
except ClientError as e:
    error_code = e.response['Error']['Code']
    if error_code == 'AccessDeniedException':
        print("Insufficient permissions")
    else:
        print(f"AWS Error: {error_code}")
except BotoCoreError as e:
    print(f"Connection error: {e}")

Wait for Service Stable

# Using waiters
waiter = ecs.get_waiter('services_stable')
waiter.wait(
    cluster='my-cluster',
    services=['my-service'],
    WaiterConfig={
        'Delay': 15,
        'MaxAttempts': 40
    }
)
print("Service is stable")

# Wait for tasks running
waiter = ecs.get_waiter('tasks_running')
waiter.wait(
    cluster='my-cluster',
    tasks=[task_arn]
)

Deployment Monitoring

def monitor_deployment(cluster: str, service: str, timeout: int = 600):
    """Monitor ECS deployment progress"""
    import time
    start = time.time()

    while time.time() - start < timeout:
        response = ecs.describe_services(
            cluster=cluster,
            services=[service]
        )
        svc = response['services'][0]

        # Check deployments
        for deployment in svc['deployments']:
            status = deployment['rolloutState']
            print(f"Deployment {deployment['id'][:8]}: {status}")
            print(f"  Running: {deployment['runningCount']}/{deployment['desiredCount']}")

            if status == 'COMPLETED':
                print("Deployment complete!")
                return True
            elif status == 'FAILED':
                print(f"Deployment failed: {deployment.get('rolloutStateReason', 'Unknown')}")
                return False

        time.sleep(15)

    print("Deployment timed out")
    return False

Progressive Disclosure

Quick Start (This File)

  • Client initialization
  • Cluster operations
  • Task definition management
  • Service CRUD operations
  • Running tasks
  • Basic error handling

Detailed References

When to Use This Skill

Use this skill when:

  • Managing ECS clusters programmatically
  • Creating or updating task definitions
  • Deploying and scaling services
  • Running one-off tasks (migrations, batch jobs)
  • Monitoring deployments
  • Integrating ECS with Python applications

Dependencies

pip install boto3 botocore

Related Skills

  • terraform-ecs: Infrastructure as Code for ECS
  • ecs-fargate: Fargate-specific patterns
  • ecs-deployment: Deployment strategies
  • ecs-troubleshooting: Debugging guide

Best Practices

  1. Use capacity providers instead of launchType for flexibility
  2. Enable deployment circuit breaker for automatic rollback
  3. Use secrets manager for sensitive data, never environment variables
  4. Enable execute command for debugging access
  5. Always specify platform version (e.g., '1.4.0') explicitly
  6. Use awsvpc network mode for Fargate (required) and EC2 (recommended)
  7. Implement proper error handling for all API calls
  8. Use waiters for async operations instead of polling
Weekly Installs
2
Installed on
claude-code2
kilo1
windsurf1
zencoder1
cline1
pi1