Using Autoscaling on AWS EMR, you can scale out and scale in nodes on a cluster. Scale out action can be triggered using Cloudwatch metrics(YARNMemoryAvailablePercentage and ContainerPendingRatio). Sample Policy below
"AutoScalingPolicy":
{
"Constraints":
{
"MinCapacity": 10,
"MaxCapacity": 50
},
"Rules":
[
{"Name": "Compute-scale-up",
"Description": "Scale out based on ContainerPending Mterics",
"Action":
{
"SimpleScalingPolicyConfiguration":
{"AdjustmentType": "CHANGE_IN_CAPACITY",
"ScalingAdjustment": 1,
"CoolDown":0}
},
"Trigger":
{"CloudWatchAlarmDefinition":
{"AlarmNamePrefix": "compute-scale-up",
"ComparisonOperator": "GREATER_THAN_OR_EQUAL",
"EvaluationPeriods": 3,
"MetricName": "ContainerPending",
"Namespace": "AWS/ElasticMapReduce",
"Period": 300,
"Statistic": "AVERAGE",
"Threshold": 10,
"Unit": "COUNT",
"Dimensions":
[
{"Key": "JobFlowId",
"Value": "${emr:cluster_id}"}
]
}
}
},
{"Name": "Compute-scale-down",
"Description": "Scale in",
"Action":
{
"SimpleScalingPolicyConfiguration":
{"AdjustmentType": "CHANGE_IN_CAPACITY",
"ScalingAdjustment": -1,
"CoolDown":300}
},
"Trigger":
{"CloudWatchAlarmDefinition":
{"AlarmNamePrefix": "compute-scale-down",
"ComparisonOperator": "GREATER_THAN_OR_EQUAL",
"EvaluationPeriods": 3,
"MetricName": "MemoryAvailableMB",
"Namespace": "AWS/ElasticMapReduce",
"Period": 300,
"Statistic": "AVERAGE",
"Threshold": 24000,
"Unit": "COUNT",
"Dimensions":
[
{"Key": "JobFlowId",
"Value": "${emr:cluster_id}"}
]
}
}
}
]
}
You can refer this blog for more details https://aws.amazon.com/blogs/big-data/dynamically-scale-applications-on-amazon-emr-with-auto-scaling/