AWS Batch
Hands-On
Demo
data:image/s3,"s3://crabby-images/285c6/285c6d2598379e2628840097838851ea0eec5b0b" alt=""
Agenda
In this Demo, we will
- Create an IAM Role
- Create ECR Repository
- Create S3 Bucket and Upload Sample Data
- Create and Upload Docker Container
- Configure AWS Batch Compute Environment
- Create Job Queue
- Create Job Definition
- Submit Job
Demo Overview
data:image/s3,"s3://crabby-images/2c4cf/2c4cfb62b7a1196f2789b0da20921032b9bd0e9d" alt=""
Create an IAM Role
data:image/s3,"s3://crabby-images/54f69/54f69579ff04af766f4f620eab336e5d3c749aaf" alt=""
data:image/s3,"s3://crabby-images/a7e57/a7e5743f7b1009ce9b8a8a9a7457641f913a13ef" alt=""
data:image/s3,"s3://crabby-images/62ae8/62ae80f39a774e710ff755854eedae412eff4815" alt=""
AmazonECSTaskExecutionRolePolicy
data:image/s3,"s3://crabby-images/7e52b/7e52bdafc0ee0d148b517a350bb9b66e0e5e2d70" alt=""
AmazonS3FullAccess
data:image/s3,"s3://crabby-images/80db9/80db9a32258b057e08f05e530579e3090a9b0b1b" alt=""
ECSTaskExecutionRole
data:image/s3,"s3://crabby-images/0ce90/0ce90fbcc85ba8c3528776b8295cdbd6326d5f66" alt=""
data:image/s3,"s3://crabby-images/961aa/961aa154b177d415c11b8fcc321a11598eec5804" alt=""
Create ECR Repository
data:image/s3,"s3://crabby-images/21b4c/21b4cf1362bafc34c670d4487507fc062efaadba" alt=""
batch-demo-app
data:image/s3,"s3://crabby-images/3a2f0/3a2f016076bea933628d7ce0600858b5a14ffefc" alt=""
data:image/s3,"s3://crabby-images/a5dd7/a5dd77134ec80222455223ebaa35cea72878bc44" alt=""
remember to check here in case you need the commands
Create S3 Bucket and Upload Sample Data
# Generate a random suffix (using timestamp)
TIMESTAMP=$(date +%Y%m%d%H%M%S)
BUCKET_NAME="batch-demo-${TIMESTAMP}"
# Create the bucket
aws s3api create-bucket \
--bucket ${BUCKET_NAME} \
--region us-east-1
echo "Created bucket: ${BUCKET_NAME}"
# Create input and output folders
# (by creating empty objects with trailing slashes)
aws s3api put-object --bucket ${BUCKET_NAME} --key input/
aws s3api put-object --bucket ${BUCKET_NAME} --key output/
Create S3 Bucket and Folder Structure
# First create the data.csv file
cat << 'EOF' > data.csv
month,revenue,costs,units_sold,customer_count,avg_order_value
Jan_2024,120500.50,85000.75,1250,850,96.40
Feb_2024,135750.25,92500.50,1380,920,98.35
Mar_2024,142800.75,95750.25,1450,975,98.50
Apr_2024,128900.25,88250.50,1320,890,97.65
May_2024,155200.50,98500.75,1580,1050,98.25
Jun_2024,168500.75,102750.50,1720,1150,98.00
Jul_2024,172500.50,105250.25,1750,1180,98.55
Aug_2024,180250.25,108500.50,1820,1250,99.05
Sep_2024,165750.75,101250.25,1680,1120,98.65
Oct_2024,158900.50,99750.75,1620,1080,98.10
EOF
Sample data.csv
# Upload data.csv to the input folder
aws s3 cp data.csv s3://${BUCKET_NAME}/input/data.csv
# Verify the setup
echo "Listing contents of bucket ${BUCKET_NAME}:"
aws s3 ls s3://${BUCKET_NAME}/ --recursive
# Save bucket name for later use
echo ${BUCKET_NAME} > bucket_name.txt
echo "Bucket name saved to bucket_name.txt"
Upload data.csv to S3 Bucket
Create and Upload Docker Container
cat << 'EOF' > process_data.py
import boto3
import pandas as pd
import os
import sys
import logging
from datetime import datetime
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def process_file(input_bucket, input_key, output_bucket, output_key):
"""
Process data file from S3 and upload results back to S3.
Includes error handling and logging.
"""
s3 = boto3.client('s3')
try:
logger.info(f"Starting processing of s3://{input_bucket}/{input_key}")
# Download input file
local_input = f'/tmp/input_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
s3.download_file(input_bucket, input_key, local_input)
# Process the data
df = pd.read_csv(local_input)
logger.info(f"Loaded data with shape: {df.shape}")
# Get numeric columns only
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
# Calculate statistics for numeric columns only
result = pd.DataFrame({
'column': numeric_columns,
'mean': df[numeric_columns].mean(),
'median': df[numeric_columns].median(),
'std': df[numeric_columns].std()
})
# Save and upload results
local_output = f'/tmp/output_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
result.to_csv(local_output, index=False)
s3.upload_file(local_output, output_bucket, output_key)
logger.info(f"Successfully uploaded results to s3://{output_bucket}/{output_key}")
# Cleanup temporary files
os.remove(local_input)
os.remove(local_output)
except Exception as e:
logger.error(f"Error processing file: {str(e)}")
raise
def main():
if len(sys.argv) != 5:
logger.error("Required arguments: input_bucket input_key output_bucket output_key")
sys.exit(1)
process_file(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
if __name__ == "__main__":
main()
EOF
Sample Application Code
cat << 'EOF' > Dockerfile
FROM python:3.9-slim
# Install dependencies
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY process_data.py .
# Set permissions
RUN chmod +x process_data.py
# Set entry point
ENTRYPOINT ["python", "process_data.py"]
EOF
Dockerfile
cat << 'EOF' > requirements.txt
boto3==1.26.137
pandas==2.0.0
numpy==1.24.3
EOF
requirements.txt
# Export environment variables
export AWS_ACCOUNT_ID=651623850282
export AWS_REGION=us-east-1
# ECR repository URL
export ECR_REPO="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"
# Get ECR login token
aws ecr get-login-password --region ${AWS_REGION} \
| docker login --username AWS --password-stdin ${ECR_REPO}
# Build image
docker build -t batch-demo-app .
# Tag image
docker tag batch-demo-app:latest ${ECR_REPO}/batch-demo-app:latest
# Push to ECR
docker push ${ECR_REPO}/batch-demo-app:latest
Push Docker Image to ECR
Configure AWS Batch Compute Environment
data:image/s3,"s3://crabby-images/e369a/e369a03448b70f404a5f8666e7ea7ee9a91a2eac" alt=""
batch-demo-compute
data:image/s3,"s3://crabby-images/81949/819495a57686acb0641209fd6f6eb94e8d9c9479" alt=""
data:image/s3,"s3://crabby-images/afdb7/afdb7bb04e05611a7ca254176d5fd736dcf8d154" alt=""
data:image/s3,"s3://crabby-images/6dbfb/6dbfb1ef8f0a136f4d6d37aa48742502ac777ccf" alt=""
data:image/s3,"s3://crabby-images/2aaef/2aaefa20655eefb71d553db3eb7f5d2aa6f56333" alt=""
Create Job Queue
data:image/s3,"s3://crabby-images/302f4/302f470a2e0795a227b57a2aeaf30249cb59c83b" alt=""
batch-demo-queue
data:image/s3,"s3://crabby-images/42fbf/42fbf058ea99dbba34b3d8bba9b7586d786d316b" alt=""
data:image/s3,"s3://crabby-images/1e1d9/1e1d9165f36591ffb146c567eb0dc21e35e6a046" alt=""
Create Job Definition
data:image/s3,"s3://crabby-images/49661/49661eb453013f92283e83d6795cf2900e53820e" alt=""
651623850282.dkr.ecr.us-east-1.amazonaws.com/batch-demo-app:latest
data:image/s3,"s3://crabby-images/b9c6d/b9c6d7d22352aa37845a28c20aed31cff1e60fe3" alt=""
data:image/s3,"s3://crabby-images/09e6b/09e6bea982040f5662ca345bb678cd2161b97b87" alt=""
data:image/s3,"s3://crabby-images/271c1/271c16dfef46299eb5b031459a7da5696412d8e3" alt=""
data:image/s3,"s3://crabby-images/8dae5/8dae5dabbae1f61291e32a2e382e43065b2a5e72" alt=""
data:image/s3,"s3://crabby-images/636cb/636cbb4d50c5f097d3ae6184641cbaacf5f69f29" alt=""
us-east-1
AWS_DEFAULT_REGION
data:image/s3,"s3://crabby-images/881b9/881b9a528e9d631c6bb2e9494ba91cf1fc716816" alt=""
data:image/s3,"s3://crabby-images/16c70/16c709c25d8c865adfe4704a0e15535c93c38bd5" alt=""
["Ref::input_bucket", "Ref::input_key", "Ref::output_bucket", "Ref::output_key"]
data:image/s3,"s3://crabby-images/bea91/bea914767c6afc426612ab4540e705206a0f8a83" alt=""
data:image/s3,"s3://crabby-images/32131/321316c1adb32855eb83316fe6bd568ef08e079b" alt=""
data:image/s3,"s3://crabby-images/fb50a/fb50afb47e205bb7eda84455a95a3d0ce2027e87" alt=""
data:image/s3,"s3://crabby-images/3244a/3244a8d93ecb381a08fd980eaa12a894240f9ead" alt=""
data:image/s3,"s3://crabby-images/9d9df/9d9df253ba70a29995a78c0021dcdc9d503e533b" alt=""
data:image/s3,"s3://crabby-images/087b2/087b220a8177baa46af195dc1ab0cd55a5bb5e40" alt=""
Submit Job
data:image/s3,"s3://crabby-images/fa364/fa364e857ee468fa63bc57dd511e530bc92c90f0" alt=""
data:image/s3,"s3://crabby-images/bef37/bef37d36661d67c23d61f60192d2005ef1c4fa2d" alt=""
batch-demo-job-1
data:image/s3,"s3://crabby-images/a88a6/a88a642699c8a625ade1465ac9050e4725fd431a" alt=""
data:image/s3,"s3://crabby-images/0adb8/0adb8b421ccde5dfe9e43e49c16eda2de5014b0b" alt=""
data:image/s3,"s3://crabby-images/ee9f5/ee9f5b950eedc6925f63d91c80cefbef2736e1b4" alt=""
data:image/s3,"s3://crabby-images/c9c19/c9c19fb2ad781575a4b7fac321c9f29b4fbd0a72" alt=""
[
"batch-demo-20241114140922",
"input/data.csv"
"batch-demo-20241114140922",
"output/results.csv"
]
data:image/s3,"s3://crabby-images/8878b/8878b54a254cec92850e73ccdd84b5ee99ff5c46" alt=""
data:image/s3,"s3://crabby-images/a34d2/a34d2f947a0634a0015631b0f9e8b92269fc4952" alt=""
data:image/s3,"s3://crabby-images/651d0/651d019bd9235affcc0ec92c7662973c32cef90f" alt=""
data:image/s3,"s3://crabby-images/5e47f/5e47fe857aca6840b05fac17f01a7a53a80488fe" alt=""
data:image/s3,"s3://crabby-images/24557/245570785e332e6ef61ad14256726d4372830b46" alt=""
data:image/s3,"s3://crabby-images/d5ae0/d5ae0bc4e209e4d9827ae59c21647a97918466cc" alt=""
Clean Up
data:image/s3,"s3://crabby-images/32df7/32df75f1c8d5fe7da67cd19744ac4f6c648a1c2c" alt=""
Disable Compute Environment
data:image/s3,"s3://crabby-images/5270f/5270f666acd83487b4628e601956931fbf3b03e0" alt=""
Delete Compute Environment
data:image/s3,"s3://crabby-images/4b739/4b739c6cdfdca8fdbea006276aede67a99979686" alt=""
Disable Job Queue
data:image/s3,"s3://crabby-images/6aeba/6aeba5ebd7cc8f40368325ede7d010d460aba122" alt=""
Delete Job Queue
data:image/s3,"s3://crabby-images/c341f/c341fae985d78965169900ae65ddf7827d54b5f5" alt=""
Deregister Job Definition
data:image/s3,"s3://crabby-images/c085a/c085a1bd476dafe63008a890978eba3ac850ef6f" alt=""
Empty S3 Bucket
data:image/s3,"s3://crabby-images/f4283/f4283c06cf7ab241b82c2e054e4aae6d6a452197" alt=""
Delete S3 Bucket
data:image/s3,"s3://crabby-images/f2f2a/f2f2ac47e84b2afa3944bbfef88c55f37cec3a74" alt=""
Delete IAM Role
data:image/s3,"s3://crabby-images/b1941/b1941fa277c789d78f936c6a34804f7f49acd184" alt=""
Delete Repository Images
data:image/s3,"s3://crabby-images/6d5b9/6d5b990a5c5e8bd77f36d2db242c06f4bc2e25da" alt=""
Delete ECR Repository
🙏
Thanks
for
Watching
AWS Batch - Hands-On Demo
By Deepak Dubey
AWS Batch - Hands-On Demo
AWS Batch - Hands-On Demo
- 142