Skip to content

Commit

Permalink
ctx/feat(providers): add aws terraform minimal examples (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
ctxswitch authored Feb 25, 2025
1 parent 4f94c8d commit 25eb32e
Show file tree
Hide file tree
Showing 11 changed files with 493 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ build/
*.tfstate*
*.lock.hcl
.oci
.aws
.terraform
148 changes: 148 additions & 0 deletions providers/aws/eks.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
data "aws_availability_zones" "available" { state = "available" }
data "aws_region" "current" {}

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 20.31"

cluster_name = local.name_prefix
cluster_version = "1.32"
cluster_endpoint_private_access = true
cluster_endpoint_public_access = true
enable_irsa = true

vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets

eks_managed_node_groups = {
for k, v in var.node_groups : k => {
desired_size = v.min_size
max_size = v.max_size
min_size = v.min_size

ami_type = v.gpu_count == 0 ? null : "AL2_x86_64_GPU"
block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = v.root_disk_size_gb
}
}
}

capacity_type = v.spot ? "SPOT" : "ON_DEMAND"
instance_types = [v.instance_type]
labels = merge(
v.gpu_count == 0 ? {} : {
"k8s.amazonaws.com/accelerator" = v.gpu_accelerator
},
v.dedicated_node_role == null ? {} : {
"flyte.org/node-role" = v.dedicated_node_role
}
)

subnet_ids = module.vpc.private_subnets
tags = {
"k8s.io/cluster-autoscaler/enabled" = true
"k8s.io/cluster-autoscaler/${local.name_prefix}" = true
}

taints = v.gpu_count == 0 ? [] : [
{
key = "nvidia.com/gpu"
value = "present"
effect = "NO_SCHEDULE"
}
]

iam_role_additional_policies = {
"CloudWatchAgentPolicy" = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"
}
}
}
}

resource "aws_autoscaling_group_tag" "eks_managed_node_group_asg_tag" {
for_each = merge([
for mng, tags in local.nodegroup_asg_tags : {
for tag_key, tag_value in tags : "${mng}-${replace(tag_key, "k8s.io/cluster-autoscaler/node-template/", "")}" => {
mng = mng
key = tag_key
value = tag_value
}
}
]...)

autoscaling_group_name = one(module.eks.eks_managed_node_groups[each.value.mng].node_group_autoscaling_group_names)

tag {
key = each.value.key
value = each.value.value
propagate_at_launch = false
}

depends_on = [module.eks]
}

data "aws_eks_cluster_auth" "default" {
name = module.eks.cluster_name
}

module "aws_load_balancer_controller_irsa_role" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
version = "5.11.2"

role_name = "${local.name_prefix}-aws-load-balancer-controller"
attach_load_balancer_controller_policy = false

oidc_providers = {
ex = {
provider_arn = module.eks.oidc_provider_arn
namespace_service_accounts = ["kube-system:aws-load-balancer-controller"]
}
}
}

module "cluster_autoscaler_irsa_role" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
version = "5.11.2"

role_name = "${local.name_prefix}-cluster-autoscaler"
attach_cluster_autoscaler_policy = true
cluster_autoscaler_cluster_ids = [module.eks.cluster_name]

oidc_providers = {
default = {
provider_arn = module.eks.oidc_provider_arn
namespace_service_accounts = ["kube-system:aws-cluster-autoscaler"]
}
}
}

resource "helm_release" "aws_cluster_autoscaler" {
namespace = "kube-system"
wait = true
timeout = 600

name = "aws-cluster-autoscaler"

repository = "https://kubernetes.github.io/autoscaler"
chart = "cluster-autoscaler"
version = "9.24.0"

set {
name = "autoDiscovery.clusterName"
value = module.eks.cluster_name
}

set {
name = "awsRegion"
value = data.aws_region.current.name
}

set {
name = "rbac.serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn"
value = module.cluster_autoscaler_irsa_role.iam_role_arn
}
depends_on = [ module.eks ]
}
62 changes: 62 additions & 0 deletions providers/aws/eks_auth.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
data "aws_iam_roles" "admin_regexes" {
for_each = toset(var.admin_role_regexes)
name_regex = each.key
}

locals {
admin_regex_role_arns = [
for role in data.aws_iam_roles.admin_regexes : one(role.arns)
]

pathless_admin_regex_role_arns = [
for parts in [for arn in local.admin_regex_role_arns : split("/", arn)] :
format("%s/%s", parts[0], element(parts, length(parts) - 1))
]

admin_role_arns = concat(
local.pathless_admin_regex_role_arns,
var.admin_role_arns,
)

admin_role_configmap_data = [
for role_arn in local.admin_role_arns : {
rolearn = role_arn
username = "union-admin"
groups = ["system:masters"]
}
]

node_role_configmap_data = [
for role_arn in var.node_role_arns : {
rolearn = role_arn
username = "system:node:{{EC2PrivateDNSName}}"
groups = ["system:bootstrappers", "system:nodes"]
}
]

admin_user_arns = [
for role_arn in var.admin_user_arns : {
userarn = role_arn
groups = ["system:masters"]
}
]

aws_auth_configmap_data = {
mapRoles = replace(yamlencode(concat(
local.admin_role_configmap_data,
local.node_role_configmap_data,
)), "\"", "")
mapUsers = replace(yamlencode(local.admin_user_arns), "\"", "")
}
}

resource "kubernetes_config_map_v1_data" "aws_auth" {
force = true

metadata {
name = "aws-auth"
namespace = "kube-system"
}

data = local.aws_auth_configmap_data
}
84 changes: 84 additions & 0 deletions providers/aws/iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
locals {
union_backend_ksas = ["flytepropeller"]
union_ksas = ["default"] #The KSA that Task Pods will use

union_worker_wi_members = toset([
for tpl in setproduct(
local.union_projects,
local.union_domains,
local.union_ksas
) : format("%s-%s:%s", tpl...)
])
}

data "aws_iam_policy_document" "union_data_bucket_policy" {
statement {
sid = ""
effect = "Allow"
actions = [
"s3:DeleteObject*",
"s3:GetObject*",
"s3:ListBucket",
"s3:PutObject*"
]
resources = [
"arn:aws:s3:::${module.union-data.s3_bucket_id}",
"arn:aws:s3:::${module.union-data.s3_bucket_id}/*"
]
}
}

data "aws_iam_policy_document" "union_backend_iam_policy" {
source_policy_documents = compact([
data.aws_iam_policy_document.union_data_bucket_policy.json
])
}

resource "aws_iam_policy" "union_backend_iam_policy" {
name = "${local.name_prefix}-flyte-backend-iam-policy"
policy = data.aws_iam_policy_document.union_backend_iam_policy.json
}

module "union_backend_irsa_role" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
version = "5.11.2"
assume_role_condition_test = "StringEquals"
role_name = "${local.name_prefix}-backend-role"
role_policy_arns = {
default = aws_iam_policy.union_backend_iam_policy.arn
}
oidc_providers = {
default = {
provider_arn = module.eks.oidc_provider_arn
namespace_service_accounts = ["flyte:flytepropeller","flyte:flyteadmin","flyte:datacatalog"]
}
}
}

data "aws_iam_policy_document" "union_worker_iam_policy" {
source_policy_documents = compact([
data.aws_iam_policy_document.union_data_bucket_policy.json
])
}

resource "aws_iam_policy" "flyte_worker_iam_policy" {
name = "${local.name_prefix}-flyte-worker-iam-policy"
policy = data.aws_iam_policy_document.union_worker_iam_policy.json
}

module "flyte_worker_irsa_role" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
version = "5.11.2"
assume_role_condition_test = "StringEquals"
role_name = "${local.name_prefix}-flyte-worker"
role_policy_arns = {
default = aws_iam_policy.flyte_worker_iam_policy.arn
}

oidc_providers = {
default = {
provider_arn = module.eks.oidc_provider_arn
namespace_service_accounts = local.union_worker_wi_members
}
}
}
52 changes: 52 additions & 0 deletions providers/aws/locals.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
locals {
# move to vars
project = "union"
environment = "terraform"
name_prefix = "${local.project}-${local.environment}"
account_id = data.aws_caller_identity.current.account_id

union_projects = ["flytesnacks"]
union_domains = ["development", "staging", "production"]

# move to vars as well
azs = data.aws_availability_zones.available.zone_ids
main_cidr_block = "10.0.0.0/16"
private_subnets = [
for idx, _ in local.azs :
format("10.%d.0.0/16", idx + 1)
]
public_subnets = [
for idx, _ in local.azs :
format("10.0.%d.0/24", idx + 1)
]
database_subnets = [
for idx, _ in local.azs :
format("10.0.%d.0/24", idx + 10)
]

nodegroup_asg_tags = {
for k, v in var.node_groups : k => merge(
# Spot
v.spot ? {
"k8s.io/cluster-autoscaler/node-template/label/eks.amazonaws.com/capacityType" = "SPOT"
} : {
"k8s.io/cluster-autoscaler/node-template/label/eks.amazonaws.com/capacityType" = "ON_DEMAND"
},
# Ephemeral storage
{
"k8s.io/cluster-autoscaler/node-template/resources/ephemeral-storage" = "${v.root_disk_size_gb}G"
},
# GPUs
v.gpu_count == 0 ? {} : {
"k8s.io/cluster-autoscaler/node-template/label/k8s.amazonaws.com/accelerator" = v.gpu_accelerator
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu" = tostring(v.gpu_count)
"k8s.io/cluster-autoscaler/node-template/taint/nvidia.com/gpu" = "present:NoSchedule"
},
# Dedicated node role
v.dedicated_node_role == null ? {} : {
"k8s.io/cluster-autoscaler/node-template/label/flyte.org/node-role" = v.dedicated_node_role
"k8s.io/cluster-autoscaler/node-template/taint/flyte.org/node-role" = "${v.dedicated_node_role}:NoSchedule"
}
)
}
}
Empty file added providers/aws/output.tf
Empty file.
Loading

0 comments on commit 25eb32e

Please sign in to comment.