diff --git a/.github/workflows/deploy-daily-snapshot.yml b/.github/workflows/deploy-daily-snapshot.yml index 224be8b77..c79e4fa51 100644 --- a/.github/workflows/deploy-daily-snapshot.yml +++ b/.github/workflows/deploy-daily-snapshot.yml @@ -1,13 +1,14 @@ name: Snapshot Service -concurrency: ci-${{ github.ref }} +concurrency: ci-${{ github.ref }}-snapshot-service on: pull_request: branches: - main paths: - - 'terraform/daily_snapshot/**' - - 'terraform/modules/daily_snapshot/**' + - 'tf-managed/modules/daily_snapshot/**' + - 'tf-managed/scripts/**' + - 'tf-managed/live/environments/prod/applications/snapshot-service/**' # This needs to be declared explicitly so that the job is actually # run when moved out of draft. types: [opened, synchronize, reopened, ready_for_review] @@ -15,31 +16,33 @@ on: branches: - main paths: - - 'terraform/daily_snapshot/**' - - 'terraform/modules/daily_snapshot/**' + - 'tf-managed/modules/daily_snapshot/**' + - 'tf-managed/scripts/**' + - 'tf-managed/live/environments/prod/applications/snapshot-service' workflow_dispatch: jobs: - deploy-daily-snapshot-calibnet: - name: Deploy + deploy-daily-snapshot: + env: + TF_VAR_monitoring: "{ \"enable\": true,\"slack_enable\":true,\"slack_destination_id\":\"${{ secrets.SLACK_DESTINATION_ID }}\",\"slack_channel_id\":\"${{ secrets.SLACK_CHANNEL_ID }}\"}" runs-on: ubuntu-latest permissions: write-all steps: - name: Checkout the code uses: actions/checkout@v4 - # Using Custom Composite action in ./composite-action/terraform folder - - name: Composite Action for Deploying Terraform Resources - uses: ./composite-action/terraform + # Using Custom Composite action in ./composite-action/terragrunt folder + - name: Composite Action for Deploying Terragrunt Resources + uses: ./composite-action/terragrunt with: do_token: ${{ secrets.DO_TOKEN }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} r2_access_key: ${{ secrets.R2_ACCESS_KEY }} r2_secret_key: ${{ secrets.R2_SECRET_KEY }} slack_token: ${{ secrets.SLACK_TOKEN }} - working_directory: terraform/daily_snapshot/prod - environment: Snapshot Service + working_directory: tf-managed/live/environments/prod/applications/snapshot-service + service_name: Snapshot Service new_relic_account_id: ${{ secrets.NEW_RELIC_ACCOUNT_ID }} new_relic_api_key: ${{ secrets.NEW_RELIC_API_KEY }} + ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} diff --git a/.github/workflows/deploy-sync-check.yml b/.github/workflows/deploy-sync-check.yml index 9f1b1723c..3c4f86aa2 100644 --- a/.github/workflows/deploy-sync-check.yml +++ b/.github/workflows/deploy-sync-check.yml @@ -1,40 +1,46 @@ name: Sync Check Service -concurrency: ci-${{ github.ref }} +concurrency: ci-${{ github.ref }}-sync-check on: pull_request: branches: - main paths: - - 'terraform/sync_check/**' - - 'terraform/modules/sync_check/**' + - 'tf-managed/modules/sync-check/**' + - 'tf-managed/scripts/**' + - 'tf-managed/live/environments/prod/applications/sync-check/**' + # This needs to be declared explicitly so that the job is actually + # run when moved out of draft. + types: [opened, synchronize, reopened, ready_for_review] push: branches: - main paths: - - 'terraform/sync_check/**' - - 'terraform/modules/sync_check/**' + - 'tf-managed/modules/sync-check/**' + - 'tf-managed/scripts/**' + - 'tf-managed/live/environments/prod/applications/sync-check' workflow_dispatch: jobs: - sync-check: - name: Deploy + deploy-sync-check: runs-on: ubuntu-latest permissions: write-all steps: - name: Checkout the code uses: actions/checkout@v4 - # Using Custom Composite action in ./composite-action/terraform folder - - name: Composite Action for Deploying Terraform Resources - uses: ./composite-action/terraform + # Using Custom Composite action in ./composite-action/terragrunt folder + - name: Composite Action for Deploying Terragrunt Resources + uses: ./composite-action/terragrunt with: do_token: ${{ secrets.DO_TOKEN }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} + r2_access_key: ${{ secrets.R2_ACCESS_KEY }} + r2_secret_key: ${{ secrets.R2_SECRET_KEY }} slack_token: ${{ secrets.SLACK_TOKEN }} - working_directory: terraform/sync_check - environment: Sync Check Service + working_directory: tf-managed/live/environments/prod/applications/sync-check + service_name: Sync Check Service new_relic_account_id: ${{ secrets.NEW_RELIC_ACCOUNT_ID }} new_relic_api_key: ${{ secrets.NEW_RELIC_API_KEY }} + ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }} diff --git a/.github/workflows/scripts-lint.yml b/.github/workflows/scripts-lint.yml index fec9d298f..3898e1902 100644 --- a/.github/workflows/scripts-lint.yml +++ b/.github/workflows/scripts-lint.yml @@ -29,7 +29,7 @@ jobs: - name: Run rubocop run: | gem install rubocop --no-document - rubocop scripts/ # TODO: Apply rubocop to terraform/modules/ + rubocop tf-managed/scripts/ # TODO: Apply rubocop to tf-managed/modules/ run-js-linters: runs-on: ubuntu-latest steps: diff --git a/.tflint.hcl b/.tflint.hcl index f8979ce8f..18e65e468 100644 --- a/.tflint.hcl +++ b/.tflint.hcl @@ -1,5 +1,5 @@ plugin "terraform" { - enabled = true - version = "0.2.2" - source = "github.com/terraform-linters/tflint-ruleset-terraform" + enabled = true + version = "0.5.0" + source = "github.com/terraform-linters/tflint-ruleset-terraform" } diff --git a/composite-action/terragrunt/action.yml b/composite-action/terragrunt/action.yml new file mode 100644 index 000000000..d0ee8beb3 --- /dev/null +++ b/composite-action/terragrunt/action.yml @@ -0,0 +1,190 @@ +name: Custom Composite action to deploy terragrunt resources + +description: | + This action deploys the Forest infrastructure with Terragrunt + +inputs: + service_name: + description: 'Human-readable name of the service' + required: true + do_token: + description: 'The DigitalOcean access token to use for deploying the infrastructure' + required: true + aws_access_key_id: + description: 'S3 access keys id used by terraform and service like sync check, Deploy Snapshot Service etc' + required: true + aws_secret_access_key: + description: 'S3 secret access keys used by terraform and service like sync check, Deploy Snapshot Service etc' + required: true + working_directory: + description: 'The working Directory' + required: true + ssh_private_key: + description: 'The SSH private key to use for connecting to Droplets via SSH' + slack_token: + description: 'The slack token secret used to connect the Infrastructure to Slack' + new_relic_api_key: + description: 'The New Relic API KEY' + nr_license_key: + description: 'The New Relic Access Token' + new_relic_account_id: + description: 'The New Relic Platform Region' + r2_access_key: + description: 'CloudFlare R2 access key id' + r2_secret_key: + description: 'CloudFlare R2 private access key' + +runs: + using: "composite" + steps: + # Workaround for https://github.com/orgs/community/discussions/51280 + - name: Set TF/TG versions + shell: bash + run: | + echo "tf_version=1.6.6" >> $GITHUB_ENV + echo "tg_version=0.53.2" >> $GITHUB_ENV + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v2 + with: + terraform_version: v${{ env.tf_version }} + # This is required for Terragrunt to parse Terraform outputs. + terraform_wrapper: false + + - name: Setup Terragrunt + shell: bash + run: | + sudo wget -q -O /bin/terragrunt "https://github.com/gruntwork-io/terragrunt/releases/download/v${{ env.tg_version }}/terragrunt_linux_amd64" + sudo chmod +x /bin/terragrunt + terragrunt -v + + - name: Check terragrunt HCL + shell: bash + working-directory: ${{ inputs.working_directory }} + run: | + terragrunt hclfmt --terragrunt-check --terragrunt-diff + + - name: Validate + shell: bash + working-directory: ${{ inputs.working_directory }} + run: | + terragrunt validate + env: + AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} + AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} + + - name: Plan + if: github.event_name == 'pull_request' + id: plan + shell: bash + working-directory: ${{ inputs.working_directory }} + continue-on-error: true + env: + AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} + AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} + TF_VAR_digitalocean_token: ${{ inputs.do_token }} + TF_VAR_AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} + TF_VAR_AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} + TF_VAR_R2_ACCESS_KEY: ${{ inputs.r2_access_key }} + TF_VAR_R2_SECRET_KEY: ${{ inputs.r2_secret_key }} + TF_VAR_slack_token: ${{ inputs.slack_token }} + TF_VAR_new_relic_api_key: ${{ inputs.new_relic_api_key }} + TF_VAR_new_relic_account_id: ${{ inputs.new_relic_account_id }} + run: | + terragrunt plan -no-color --terragrunt-non-interactive -out ${{ github.workspace }}/tfplan | tee output + echo 'stdout<> $GITHUB_OUTPUT + cat output >> $GITHUB_OUTPUT + echo 'EOF' >> $GITHUB_OUTPUT + + - name: Find Comment + if: github.event.pull_request.draft == false && + github.event_name == 'pull_request' + uses: peter-evans/find-comment@v2 + id: fc + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-regex: "^### Forest: ${{ inputs.service_name }} Infrastructure Plan" + + + - name: Create or Update Comment + if: github.event.pull_request.draft == false && + github.event_name == 'pull_request' && + !contains(steps.plan.outputs.stdout, 'No changes. Your infrastructure matches the configuration.') + uses: peter-evans/create-or-update-comment@v2 + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body: | + ### Forest: ${{ inputs.service_name }} Infrastructure Plan: ${{ steps.plan.outcome }} + +
Show Plan + + ``` + ${{ steps.plan.outputs.stdout }} + ``` + +
+ edit-mode: replace + + - name: Delete Comment + uses: detomarco/delete-comments@v1.0.4 + if: github.event.pull_request.draft == false && + github.event_name == 'pull_request' && + contains(steps.plan.outputs.stdout, 'No changes. Your infrastructure matches the configuration.') + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + + - name: Terragrunt Plan Status + if: steps.plan.outcome == 'failure' + shell: bash + run: exit 1 + + - name: Configure ssh-agent + if: github.ref == 'refs/heads/main' && ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) + uses: webfactory/ssh-agent@v0.8.0 + with: + ssh-private-key: ${{ inputs.ssh_private_key }} + + - name: Terragrunt Apply + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + shell: bash + run: | + if grep -q 'No changes.' ${{ github.workspace }}/tfplan; then + echo "No changes detected." + else + echo "Changes detected. Redeploying everything..." + terragrunt destroy -auto-approve --terragrunt-non-interactive + terragrunt apply -auto-approve --terragrunt-non-interactive + fi + working-directory: ${{ inputs.working_directory }} + env: + AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} + AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} + TF_VAR_digitalocean_token: ${{ inputs.do_token }} + TF_VAR_AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} + TF_VAR_AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} + TF_VAR_R2_ACCESS_KEY: ${{ inputs.r2_access_key }} + TF_VAR_R2_SECRET_KEY: ${{ inputs.r2_secret_key }} + TF_VAR_slack_token: ${{ inputs.slack_token }} + TF_VAR_new_relic_api_key: ${{ inputs.new_relic_api_key }} + TF_VAR_new_relic_account_id: ${{ inputs.new_relic_account_id }} + + - name: Terragrunt Force Apply + if: github.ref == 'refs/heads/main' && github.event_name == 'workflow_dispatch' + working-directory: ${{ inputs.working_directory }} + env: + AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} + AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} + TF_VAR_digitalocean_token: ${{ inputs.do_token }} + TF_VAR_AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }} + TF_VAR_AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }} + TF_VAR_R2_ACCESS_KEY: ${{ inputs.r2_access_key }} + TF_VAR_R2_SECRET_KEY: ${{ inputs.r2_secret_key }} + TF_VAR_slack_token: ${{ inputs.slack_token }} + TF_VAR_new_relic_api_key: ${{ inputs.new_relic_api_key }} + TF_VAR_new_relic_account_id: ${{ inputs.new_relic_account_id }} + shell: bash + run: | + terragrunt destroy -auto-approve --terragrunt-non-interactive + terragrunt apply -auto-approve --terragrunt-non-interactive diff --git a/scripts/Gemfile b/scripts/Gemfile deleted file mode 100644 index c90bdd858..000000000 --- a/scripts/Gemfile +++ /dev/null @@ -1,7 +0,0 @@ -# frozen_string_literal: true - -source 'https://rubygems.org' - -gem 'docker-api', '>= 2.2.0' -gem 'slack-ruby-client', '>= 2.1.0' -gem 'sys-filesystem', '>= 1.4.3' diff --git a/scripts/Gemfile.lock b/scripts/Gemfile.lock deleted file mode 100644 index 7c8b69809..000000000 --- a/scripts/Gemfile.lock +++ /dev/null @@ -1,42 +0,0 @@ -GEM - remote: https://rubygems.org/ - specs: - did_you_mean (1.6.3) - docker-api (2.2.0) - excon (>= 0.47.0) - multi_json - excon (0.99.0) - faraday (2.7.4) - faraday-net_http (>= 2.0, < 3.1) - ruby2_keywords (>= 0.0.4) - faraday-mashify (0.1.1) - faraday (~> 2.0) - hashie - faraday-multipart (1.0.4) - multipart-post (~> 2) - faraday-net_http (3.0.2) - ffi (1.15.5) - gli (2.21.0) - hashie (5.0.0) - multi_json (1.15.0) - multipart-post (2.3.0) - ruby2_keywords (0.0.5) - slack-ruby-client (2.1.0) - faraday (>= 2.0) - faraday-mashify - faraday-multipart - gli - hashie - sys-filesystem (1.4.3) - ffi (~> 1.1) - -PLATFORMS - x86_64-linux - -DEPENDENCIES - docker-api (>= 2.2.0) - slack-ruby-client (>= 2.1.0) - sys-filesystem (>= 1.4.3) - -BUNDLED WITH - 2.3.4 diff --git a/scripts/install-new-relic.sh b/scripts/install-new-relic.sh deleted file mode 100755 index 2d2884156..000000000 --- a/scripts/install-new-relic.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# This script offers an easy way to install the New Relic infrastructure agent for -# basic monitoring on Ubuntu systems, without needing administrative privileges. -# To get started, simply set your New Relic license key with the command export NR_LICENSE_KEY=your_license_key_here. - -set -euo pipefail - -# Setting DEBIAN_FRONTEND to ensure non-interactive operations for APT -export DEBIAN_FRONTEND=noninteractive - -# Add New Relic's apt repository -curl -fsSL https://download.newrelic.com/infrastructure_agent/gpg/newrelic-infra.gpg | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/newrelic-infra.gpg -echo "deb https://download.newrelic.com/infrastructure_agent/linux/apt focal main" | sudo tee -a /etc/apt/sources.list.d/newrelic-infra.list - -# Check if NR_LICENSE_KEY is set, if not ask for it -if [[ -z "${NR_LICENSE_KEY:-}" ]]; then - read -rp "Please enter your NR_LICENSE_KEY: " NR_LICENSE_KEY -fi - -# Update the package list -sudo apt-get update - -# The provided configurations are specific to New Relic. To gain a deeper understanding of these configuration details, you can visit: -# https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent/configuration/infrastructure-agent-configuration-settings/#offline-time-to-reset -cat >> /etc/newrelic-infra.yml < -``` - -To ensure the production Snapshot service remains intact, modify certain variables in the `Main.tf` file: - -- Change `key = "sync_check.tfstate"` to `key = ".tfstate"`. -- Replace `name = "forest-sync-check"` with `name = ""`. -- Replace ` slack_channel = "#forest-notifications"` with `slack_channel = "#forest-dump"` - -Remember to replace ``, ``, and `` with appropriate values. - -To prepare terraform for other commands: -```bash -$ terraform init -``` - -To inspect a new deployment plan (it'll tell you which servers will be removed, -added, etc.): -```bash -$ terraform plan -``` -For Mac users, if you encounter the `Error: External Program Execution Failed`, you'll need to adjust the `prep_sources.sh` file located in the `../modules/sync_check` directory. Make the following changes: - -- Replace `--archive` with `-Rp`. -- Install `gnu-tar` using the command `brew install gnu-tar`. Afterward, switch `tar cf ../sources.tar` to `gtar cf ../sources.tar` - -To deploy the service: -```bash -$ terraform apply -``` - -To shutdown the service: -```bash -$ terraform destroy -``` diff --git a/terraform/sync_check/main.tf b/terraform/sync_check/main.tf deleted file mode 100644 index 9fb85069d..000000000 --- a/terraform/sync_check/main.tf +++ /dev/null @@ -1,49 +0,0 @@ -terraform { - required_version = "~> 1.3" - - backend "s3" { - # Note: This is the bucket for the internal terraform state. This bucket is - # completely independent from the bucket that contains snapshots. - bucket = "forest-iac" - # This key uniquely identifies the service. To create a new service (instead - # of modifying this one), use a new key. Unfortunately, variables may not be - # used here. - key = "sync_check.tfstate" - - # This value is completely unused by DO but _must_ be a known AWS region. - region = "us-west-1" - # The S3 region is determined by the endpoint. fra1 = Frankfurt. - # This region does not have to be shared by the droplet. - endpoints = { - s3 = "https://fra1.digitaloceanspaces.com" - } - - # Credentially can be validated through the Security Token Service (STS). - # Unfortunately, DigitalOcean does not support STS so we have to skip the - # validation. - skip_credentials_validation = "true" - skip_requesting_account_id = "true" - skip_s3_checksum = "true" - } -} - -module "sync_check" { - # Import the sync_check module - source = "../modules/sync_check" - - # Configure service: - name = "forest-sync-check" # droplet name - size = "s-4vcpu-16gb-amd" # droplet size - slack_channel = "#forest-notifications" # slack channel for notifications - - # Variable passthrough: - slack_token = var.slack_token - digitalocean_token = var.do_token - NEW_RELIC_API_KEY = var.NEW_RELIC_API_KEY - NEW_RELIC_ACCOUNT_ID = var.NEW_RELIC_ACCOUNT_ID -} - -# This ip address may be used in the future by monitoring software -output "ip" { - value = [module.sync_check.ip] -} diff --git a/terraform/sync_check/variable.tf b/terraform/sync_check/variable.tf deleted file mode 100644 index 2e5b27545..000000000 --- a/terraform/sync_check/variable.tf +++ /dev/null @@ -1,23 +0,0 @@ -variable "do_token" { - description = "Token for authentication." - type = string - sensitive = true -} - -variable "slack_token" { - description = "slack access token" - type = string - sensitive = true -} - -variable "NEW_RELIC_API_KEY" { - description = "New Relic API KEY" - type = string - sensitive = true -} - -variable "NEW_RELIC_ACCOUNT_ID" { - description = "The New Relic Account ID" - type = string - sensitive = true -} diff --git a/tf-managed/.gitignore b/tf-managed/.gitignore new file mode 100644 index 000000000..6304eb3c1 --- /dev/null +++ b/tf-managed/.gitignore @@ -0,0 +1,34 @@ +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log +crash.*.log + +# Exclude all .tfvars files, which are likely to contain sensitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +*.tfvars +*.tfvars.json + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Include override files you do wish to add to version control using negated pattern +# !example_override.tf + +# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan +# example: *tfplan* + +# Ignore CLI configuration files +.terraformrc +terraform.rc diff --git a/tf-managed/README.md b/tf-managed/README.md new file mode 100644 index 000000000..3c57cf427 --- /dev/null +++ b/tf-managed/README.md @@ -0,0 +1,47 @@ +# Terraform-managed + +This directory contains services and assets managed via Terraform/Terragrunt. + +# Structure + +``` +├── scripts # common code, shared between all modules +├── live # actual environment definitions, managed by Terragrunt +└── modules # Terraform modules, from which the environment is built +``` + +# Requirements + +### Software + +* [terraform](https://developer.hashicorp.com/terraform/install), +* [terragrunt](https://terragrunt.gruntwork.io/docs/getting-started/install/) + +For recommended versions, please refer to the [workflow file](../composite-action/terragrunt/action.yml). + +### Secrets + +Refer to [environment README](./live/README.md) or module-specific README. + +# Adding new services + +1. Create a Terraform module in [modules](./modules). A suggested structure of such a module is: + * `main.tf` - the core resources around the service. + * `variable.tf` - inputs to the module, e.g., enable Slack notifications. + * `outputs.tf` - outputs of the module, e.g., created VPS IP. + * `provider.tf` - `terraform` and `provider` blocks to keep the versioning in one place. + * `service/` - directory with the actual service implementation. + * Other files and directories based on needs, e.g., `monitoring` to generate monitoring resources. +Ensure that names in the module, when needed, contain the environment. This provides a basic level of separation. + +2. Create a Terragrunt service in your own development environment and assert that it works correctly: + * inside [live](./live), execute `make create-environment`. Go to that directory. + * inside the `applications/`, create your `fancy-app` directory and a `terragrunt.hcl` file. There, you will invoke the created module with input variables. + * run `terragrunt plan` to assert that all variables are set correctly and that the plan output matches your expectations, + * run `terragrunt apply` to apply the plan. + * perform necessary assertions (the resources are created, the server responds to requests, and monitoring outputs make sense). + * if all is good, teardown the service with `terragrunt destroy`. + +3. Copy the tested service to [dev](./live/environments/dev/applications) and to [prod](./live/environments/prod/applications). Remove your environment directory. + +4. Make a PR! diff --git a/tf-managed/live/.gitignore b/tf-managed/live/.gitignore new file mode 100644 index 000000000..8a151cc06 --- /dev/null +++ b/tf-managed/live/.gitignore @@ -0,0 +1,19 @@ +*.tf +.*.sw? +.idea +terragrunt.iml +vendor +.terraform +.vscode +*.tfstate +*.tfstate.backup +*.out +.terragrunt-cache +.bundle +.ruby-version +.terraform.lock.hcl +.DS_Store + +# Personal development environments +environments/dev-* +.dev_environment diff --git a/tf-managed/live/Makefile b/tf-managed/live/Makefile new file mode 100644 index 000000000..0a2a200a8 --- /dev/null +++ b/tf-managed/live/Makefile @@ -0,0 +1,41 @@ +# General-purpose Makefile for the managing the environments. + +DEV_ENVIRONMENT_FILE=.dev_environment + +# Creates a new environment. The name of the environment is generated +# randomly and stored in the .environment file. +${DEV_ENVIRONMENT_FILE}: + $(eval export ENVIRONMENT=dev-$(shell cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 8 | head -n 1)) + @cp -r environments/dev environments/$(ENVIRONMENT) + @echo "Environment: $(ENVIRONMENT). Happy hacking!" + @echo $(ENVIRONMENT) > ${DEV_ENVIRONMENT_FILE} + +# Creates a new environment. +create-environment: ${DEV_ENVIRONMENT_FILE} + +# Deploys the entire development environment. Feel free to remove the +# services you don't need. +deploy-dev: ${DEV_ENVIRONMENT_FILE} + $(eval export ENVIRONMENT=$(shell cat ${DEV_ENVIRONMENT_FILE})) + @test -n "$(ENVIRONMENT)" || (echo "ENVIRONMENT is not set" && exit 1) + @echo "Deploying $(ENVIRONMENT)..." + @cd environments/$(ENVIRONMENT) && terragrunt run-all apply + @echo "Environment $(ENVIRONMENT) deployed." + +# Tears down the entire development environment and removes the +# environment directory. +destroy-dev: ${DEV_ENVIRONMENT_FILE} + $(eval export ENVIRONMENT=$(shell cat ${DEV_ENVIRONMENT_FILE})) + @test -n "$(ENVIRONMENT)" || (echo "ENVIRONMENT is not set" && exit 1) + @echo "Destroying $(ENVIRONMENT)..." + @cd environments/$(ENVIRONMENT) && terragrunt run-all destroy + @rm -rf environments/$(ENVIRONMENT) + @rm ${DEV_ENVIRONMENT_FILE} + @echo "Environment $(ENVIRONMENT) destroyed." + +# Deploys the entire production environment. +deploy-prod: + @echo "Deploying to production..." + cd environments/prod && terragrunt run-all apply + +.PHONY: create-environment deploy-dev destroy-dev deploy-prod diff --git a/tf-managed/live/README.md b/tf-managed/live/README.md new file mode 100644 index 000000000..0c059def1 --- /dev/null +++ b/tf-managed/live/README.md @@ -0,0 +1,94 @@ +All Terragrunt configurations live here. To edit Terraform files, go to `../modules`. + +# Summary +The Terragrunt configurations manage the actual environments and, in principle, should reflect the current state of the given environment. + +# Development +As a developer, you should create your own environment, separated from the others. In this directory, execute `make environment`, which will create one for you. Do not work on the `dev` environment directly, as others may also be working on it. + +``` +❯ make create-environment +Environment: dev-7zryf85r. Happy hacking! +``` + +Inside the specific application in the environment, run: +``` +❯ terragrunt plan +``` +This command will show you the resources to be changed/created/destroyed. + +``` +❯ terragrunt apply +``` + +After ensuring the changes work correctly, merge the changes from your development environment to the base one and, possibly, `prod`. + +Remember to clean up your environment. Use `terragrunt destroy` or use `make destroy-dev`. Refer to the [Makefile](./Makefile) for details. + + +# Conventions + +## Environments + +There is no notion of a `staging` environment, though one may be introduced in the future. + +``` +. +├── dev # Development environment template for custom environments. +├── dev- # Personal development environment +└── prod # Production environment. It should reflect reality. +``` + +The `prod` environment should be deployed only by GH workers and not manually. + +Each environment contains its respective `applications/`. A `base-infrastructure` may be created to denote resources shared between applications. Each application should include a single `terragrunt.hcl` file which only sets its configuration and, optionally, defines dependencies. The application code itself should be defined in `../modules`. + + +``` +└── applications + ├── snapshot-monitoring + │   └── terragrunt.hcl + ├── snapshot-service + │   └── terragrunt.hcl + └── sync-check + └── terragrunt.hcl +``` + +The difference between a `prod` and a `dev` application should be minimal. This would include a different Slack notification channel (already handled by the root `terragrunt.hcl`) or using larger instances for the `prod` environment. + +## Tags + +Everywhere where it's applicable, the resources should include the following tags: +- `iac` - indicates the resource is governed by Terraform and should not be mutated outside of the infrastructure code, +- `` - indicates the environment name. + +# Secrets + +Several secrets need to be defined and provided for the services to work. You can find them in the team's password manager. Each service defines its own set of required variables, though all need access to DigitalOcean. See the modules' documentation for more details. + +``` +################################# +### Required for all services ### +################################# +# DigitalOcean personal access token: https://cloud.digitalocean.com/account/api/tokens +export TF_VAR_digitalocean_token= + +# S3 access keys used by Terraform for the remote state. +export AWS_ACCESS_KEY_ID= +export AWS_SECRET_ACCESS_KEY= + +################################# +####### Service-specific ######## +################################# + +# Required for services with Slack notifications +export TF_VAR_slack_token= + +# Required for access to Cloudflare R2 +export TF_VAR_R2_ACCESS_KEY= +export TF_VAR_R2_SECRET_KEY= + +# Required if NewRelic monitoring/alerting is enabled. +export TF_VAR_new_relic_api_key= +export TF_VAR_new_relic_account_id= +``` diff --git a/tf-managed/live/environments/dev/applications/snapshot-monitoring/terragrunt.hcl b/tf-managed/live/environments/dev/applications/snapshot-monitoring/terragrunt.hcl new file mode 100644 index 000000000..a92cb9f18 --- /dev/null +++ b/tf-managed/live/environments/dev/applications/snapshot-monitoring/terragrunt.hcl @@ -0,0 +1,10 @@ +# Automatically find the root terragrunt.hcl and inherit its +# configuration +include "root" { + path = find_in_parent_folders() +} + +# Load the actual Terraform module +terraform { + source = format("%s/../modules/snapshot-monitoring", get_parent_terragrunt_dir()) +} diff --git a/tf-managed/live/environments/dev/applications/snapshot-service/terragrunt.hcl b/tf-managed/live/environments/dev/applications/snapshot-service/terragrunt.hcl new file mode 100644 index 000000000..75c7d00a3 --- /dev/null +++ b/tf-managed/live/environments/dev/applications/snapshot-service/terragrunt.hcl @@ -0,0 +1,22 @@ +# Automatically find the root terragrunt.hcl and inherit its +# configuration +include "root" { + path = find_in_parent_folders() +} + +# Load the actual Terraform module +terraform { + source = format("%s/../modules/daily-snapshot", get_parent_terragrunt_dir()) +} + +inputs = { + name = "forest-snapshot" + size = "s-4vcpu-16gb-amd" + r2_endpoint = "https://2238a825c5aca59233eab1f221f7aefb.r2.cloudflarestorage.com/" + forest_tag = "latest" + snapshot_bucket = "forest-archive-dev" + + monitoring = { + enable = true, + } +} diff --git a/tf-managed/live/environments/dev/applications/sync-check/terragrunt.hcl b/tf-managed/live/environments/dev/applications/sync-check/terragrunt.hcl new file mode 100644 index 000000000..d8961db58 --- /dev/null +++ b/tf-managed/live/environments/dev/applications/sync-check/terragrunt.hcl @@ -0,0 +1,15 @@ +# Automatically find the root terragrunt.hcl and inherit its +# configuration +include "root" { + path = find_in_parent_folders() +} + +# Load the actual Terraform module +terraform { + source = format("%s/../modules/sync-check", get_parent_terragrunt_dir()) +} + +inputs = { + name = "sync-check" + size = "s-4vcpu-16gb-amd" +} diff --git a/tf-managed/live/environments/prod/applications/snapshot-monitoring/terragrunt.hcl b/tf-managed/live/environments/prod/applications/snapshot-monitoring/terragrunt.hcl new file mode 100644 index 000000000..a92cb9f18 --- /dev/null +++ b/tf-managed/live/environments/prod/applications/snapshot-monitoring/terragrunt.hcl @@ -0,0 +1,10 @@ +# Automatically find the root terragrunt.hcl and inherit its +# configuration +include "root" { + path = find_in_parent_folders() +} + +# Load the actual Terraform module +terraform { + source = format("%s/../modules/snapshot-monitoring", get_parent_terragrunt_dir()) +} diff --git a/tf-managed/live/environments/prod/applications/snapshot-service/terragrunt.hcl b/tf-managed/live/environments/prod/applications/snapshot-service/terragrunt.hcl new file mode 100644 index 000000000..aa384a3f4 --- /dev/null +++ b/tf-managed/live/environments/prod/applications/snapshot-service/terragrunt.hcl @@ -0,0 +1,22 @@ +# Automatically find the root terragrunt.hcl and inherit its +# configuration +include "root" { + path = find_in_parent_folders() +} + +# Load the actual Terraform module +terraform { + source = format("%s/../modules/daily-snapshot", get_parent_terragrunt_dir()) +} + +inputs = { + name = "forest-snapshot" + size = "s-4vcpu-16gb-amd" + r2_endpoint = "https://2238a825c5aca59233eab1f221f7aefb.r2.cloudflarestorage.com/" + forest_tag = "v0.16.4" + snapshot_bucket = "forest-archive" + + monitoring = { + enable = true, + } +} diff --git a/tf-managed/live/environments/prod/applications/sync-check/terragrunt.hcl b/tf-managed/live/environments/prod/applications/sync-check/terragrunt.hcl new file mode 100644 index 000000000..d8961db58 --- /dev/null +++ b/tf-managed/live/environments/prod/applications/sync-check/terragrunt.hcl @@ -0,0 +1,15 @@ +# Automatically find the root terragrunt.hcl and inherit its +# configuration +include "root" { + path = find_in_parent_folders() +} + +# Load the actual Terraform module +terraform { + source = format("%s/../modules/sync-check", get_parent_terragrunt_dir()) +} + +inputs = { + name = "sync-check" + size = "s-4vcpu-16gb-amd" +} diff --git a/tf-managed/live/terragrunt.hcl b/tf-managed/live/terragrunt.hcl new file mode 100644 index 000000000..56ac84314 --- /dev/null +++ b/tf-managed/live/terragrunt.hcl @@ -0,0 +1,51 @@ +# This is the root terragrunt file. It is used to define the remote state +# and the common inputs for all the services. + +locals { + # Parse the file path we're in to read the env name: e.g., env + # will be "dev" in the dev folder, "stage" in the stage folder, + # etc. + parsed = regex(".*/environments/(?P.*?)/.*", get_terragrunt_dir()) + env = local.parsed.env +} + +# Remote state, separate for each environment and service. +remote_state { + backend = "s3" + generate = { + path = "backend.tf" + if_exists = "overwrite_terragrunt" + } + config = { + // Provide some basic separation between development and production environments. + // Ideally, we'd use separate accounts for each environment, but that's not + // feasible at the moment. + bucket = (local.env == "prod" + ? "forest-iac-bucket-prod" + : "forest-iac-bucket-dev" + ) + key = "${path_relative_to_include()}/terraform.tfstate" + region = "eu-west-1" + endpoint = "https://fra1.digitaloceanspaces.com" + skip_bucket_versioning = true + skip_bucket_ssencryption = true + skip_bucket_root_access = true + skip_bucket_public_access_blocking = true + skip_bucket_enforced_tls = true + skip_credentials_validation = true + skip_metadata_api_check = true + skip_requesting_account_id = true + skip_s3_checksum = true + skip_region_validation = true + } +} + +# Common inputs for all the services. +inputs = { + # The common resources dir contains common code that we want to share across all services. + # This is a legacy from the previous version of the infrastructure, and will be removed + # in the future. + common_resources_dir = format("%s/../scripts", get_parent_terragrunt_dir()) + slack_channel = (local.env == "prod" ? "#forest-notifications" : "#forest-dump") + environment = local.env +} diff --git a/tf-managed/modules/daily-snapshot/firewall.tf b/tf-managed/modules/daily-snapshot/firewall.tf new file mode 100644 index 000000000..73c324bfb --- /dev/null +++ b/tf-managed/modules/daily-snapshot/firewall.tf @@ -0,0 +1,41 @@ +resource "digitalocean_firewall" "forest-firewall" { + name = format("%s-%s", var.environment, var.name) + + inbound_rule { + protocol = "tcp" + port_range = "22" + source_addresses = var.source_addresses + } + + inbound_rule { + protocol = "tcp" + port_range = "2345" + source_addresses = var.source_addresses + } + + inbound_rule { + protocol = "tcp" + port_range = "80" + source_addresses = var.source_addresses + } + + inbound_rule { + protocol = "udp" + port_range = "53" + source_addresses = var.source_addresses + } + + outbound_rule { + protocol = "tcp" + port_range = "all" + destination_addresses = var.destination_addresses + } + + outbound_rule { + protocol = "udp" + port_range = "53" + destination_addresses = var.destination_addresses + } + + droplet_ids = [digitalocean_droplet.forest.id] +} diff --git a/terraform/modules/daily_snapshot/main.tf b/tf-managed/modules/daily-snapshot/main.tf similarity index 61% rename from terraform/modules/daily_snapshot/main.tf rename to tf-managed/modules/daily-snapshot/main.tf index 5896116d4..c931fb43b 100644 --- a/terraform/modules/daily_snapshot/main.tf +++ b/tf-managed/modules/daily-snapshot/main.tf @@ -5,33 +5,9 @@ # - Copy over the zip file # - Run the init.sh script in the background -terraform { - required_version = "~> 1.3" - - required_providers { - digitalocean = { - source = "digitalocean/digitalocean" - version = "~> 2.0" - } - external = { - source = "hashicorp/external" - version = "~> 2.1" - } - local = { - source = "hashicorp/local" - version = "~> 2.1" - } - - } -} - -provider "digitalocean" { - token = var.digitalocean_token -} - // Ugly hack because 'archive_file' cannot mix files and folders. data "external" "sources_tar" { - program = ["sh", "${path.module}/prep_sources.sh", path.module] + program = ["bash", "${path.module}/prep_sources.sh", path.module, var.common_resources_dir] } @@ -42,8 +18,7 @@ data "local_file" "sources" { // Note: The init.sh file is also included in the sources.zip such that the hash // of the archive captures the entire state of the machine. // This is a workaround, and because of this, we need to suppress the tflint warning here -// for unused declarations related to the 'init.sh' file. -// tflint-ignore: terraform_unused_declarations +// for unused declarations related to the 'init.sh' file. tflint-ignore: terraform_unused_declarations data "local_file" "init" { filename = "${path.module}/service/init.sh" } @@ -65,9 +40,9 @@ locals { slack_channel = var.slack_channel, snapshot_bucket = var.snapshot_bucket, snapshot_endpoint = var.snapshot_endpoint, - NEW_RELIC_API_KEY = var.NEW_RELIC_API_KEY, - NEW_RELIC_ACCOUNT_ID = var.NEW_RELIC_ACCOUNT_ID, - NEW_RELIC_REGION = var.NEW_RELIC_REGION, + NEW_RELIC_API_KEY = var.new_relic_api_key, + NEW_RELIC_ACCOUNT_ID = var.new_relic_account_id, + NEW_RELIC_REGION = var.new_relic_region, BASE_FOLDER = "/root", forest_tag = var.forest_tag }) @@ -84,16 +59,18 @@ locals { # Exiting without a sleep sometimes kills the script :-/ "sleep 60s" ] + + service_name = format("%s-%s", var.environment, var.name) } resource "digitalocean_droplet" "forest" { image = var.image - name = var.name + name = local.service_name region = var.region size = var.size # Re-initialize resource if this hash changes: user_data = join("-", [data.local_file.sources.content_sha256, sha256(join("", local.init_commands))]) - tags = ["iac"] + tags = ["iac", var.environment] ssh_keys = data.digitalocean_ssh_keys.keys.ssh_keys[*].fingerprint monitoring = true @@ -128,49 +105,12 @@ resource "digitalocean_project_resources" "connect_forest_project" { resources = [digitalocean_droplet.forest.urn] } -resource "digitalocean_firewall" "forest-firewall" { - name = var.name - - inbound_rule { - protocol = "tcp" - port_range = "22" - source_addresses = var.source_addresses - } - - inbound_rule { - protocol = "tcp" - port_range = "2345" - source_addresses = var.source_addresses - } - - inbound_rule { - protocol = "tcp" - port_range = "80" - source_addresses = var.source_addresses - } - - inbound_rule { - protocol = "udp" - port_range = "53" - source_addresses = var.source_addresses - } - - outbound_rule { - protocol = "tcp" - port_range = "all" - destination_addresses = var.destination_addresses - } - - outbound_rule { - protocol = "udp" - port_range = "53" - destination_addresses = var.destination_addresses - } - - droplet_ids = [digitalocean_droplet.forest.id] -} - -# This ip address may be used in the future by monitoring software -output "ip" { - value = [digitalocean_droplet.forest.ipv4_address] +module "monitoring" { + count = var.monitoring.enable ? 1 : 0 + source = "./monitoring" + service_name = local.service_name + alert_email = var.monitoring.alert_email + slack_enable = var.monitoring.slack_enable + slack_destination_id = var.monitoring.slack_destination_id + slack_channel_id = var.monitoring.slack_channel_id } diff --git a/tf-managed/modules/daily-snapshot/monitoring/main.tf b/tf-managed/modules/daily-snapshot/monitoring/main.tf new file mode 100644 index 000000000..19079d5ca --- /dev/null +++ b/tf-managed/modules/daily-snapshot/monitoring/main.tf @@ -0,0 +1,120 @@ +resource "newrelic_alert_policy" "alert" { + name = format("%s alert policy", var.service_name) +} + +locals { + enable_email = var.alert_email != "" +} + +resource "newrelic_nrql_alert_condition" "disk_space" { + policy_id = newrelic_alert_policy.alert.id + type = "static" + name = "High Disk Utilization" + description = "Alert when disk space usage is high on an the service host" + enabled = true + + nrql { + query = "SELECT latest(diskUsedPercent) FROM StorageSample where entityName = '${var.service_name}'" + } + + critical { + operator = "above" + threshold = 95.0 + threshold_duration = 300 + threshold_occurrences = "ALL" + } + + warning { + operator = "above" + threshold = 85.0 + threshold_duration = 300 + threshold_occurrences = "ALL" + } +} + +resource "newrelic_notification_destination" "email" { + count = local.enable_email ? 1 : 0 + name = format("%s email", var.service_name) + type = "EMAIL" + + property { + key = "email" + value = var.alert_email + } +} + +resource "newrelic_notification_channel" "email-channel" { + count = local.enable_email ? 1 : 0 + name = format("%s email", var.service_name) + type = "EMAIL" + product = "IINT" + destination_id = newrelic_notification_destination.email[0].id + + property { + key = "subject" + value = format("%s alert", var.service_name) + } +} + +resource "newrelic_notification_channel" "slack-channel" { + count = var.slack_enable ? 1 : 0 + name = format("%s slack", var.service_name) + type = "SLACK" + destination_id = var.slack_destination_id + product = "IINT" + + property { + key = "channelId" + value = var.slack_channel_id + } + + property { + key = "customDetailsSlack" + value = "issue id - {{issueId}}" + } +} + + +resource "newrelic_workflow" "alerting-workflow-mails" { + count = local.enable_email ? 1 : 0 + name = format("%s mail alerting workflow", var.service_name) + muting_rules_handling = "NOTIFY_ALL_ISSUES" + + issues_filter { + name = format("%s alerting workflow filter", var.service_name) + type = "FILTER" + + predicate { + attribute = "labels.policyIds" + operator = "EXACTLY_MATCHES" + values = [newrelic_alert_policy.alert.id] + } + } + + destination { + channel_id = newrelic_notification_channel.email-channel[0].id + } +} + +# Limitation of NR provider - only one workflow can be created per channel. Might be resolved in the future. +# https://registry.terraform.io/providers/newrelic/newrelic/latest/docs/resources/workflow#nested-destination-blocks +resource "newrelic_workflow" "alerting-workflow-slack" { + count = var.slack_enable ? 1 : 0 + name = format("%s slack alerting workflow", var.service_name) + muting_rules_handling = "NOTIFY_ALL_ISSUES" + + issues_filter { + name = format("%s alerting workflow filter", var.service_name) + type = "FILTER" + + predicate { + attribute = "labels.policyIds" + operator = "EXACTLY_MATCHES" + values = [newrelic_alert_policy.alert.id] + } + } + + destination { + channel_id = newrelic_notification_channel.slack-channel[0].id + } +} diff --git a/tf-managed/modules/daily-snapshot/monitoring/provider.tf b/tf-managed/modules/daily-snapshot/monitoring/provider.tf new file mode 100644 index 000000000..992d8225f --- /dev/null +++ b/tf-managed/modules/daily-snapshot/monitoring/provider.tf @@ -0,0 +1,9 @@ +terraform { + required_version = "~> 1.6" + required_providers { + newrelic = { + source = "newrelic/newrelic" + version = "~> 3.0" + } + } +} diff --git a/tf-managed/modules/daily-snapshot/monitoring/variable.tf b/tf-managed/modules/daily-snapshot/monitoring/variable.tf new file mode 100644 index 000000000..76499aa60 --- /dev/null +++ b/tf-managed/modules/daily-snapshot/monitoring/variable.tf @@ -0,0 +1,26 @@ +variable "service_name" { + description = "The name of the service" + type = string +} + +variable "alert_email" { + description = "Email address to send alerts to" + type = string + default = "" +} + +variable "slack_enable" { + description = "Enable Slack notifications" + type = bool + default = false +} + +variable "slack_destination_id" { + description = "Slack destination id" + type = string +} + +variable "slack_channel_id" { + description = "Slack channel id" + type = string +} diff --git a/tf-managed/modules/daily-snapshot/outputs.tf b/tf-managed/modules/daily-snapshot/outputs.tf new file mode 100644 index 000000000..240c103f1 --- /dev/null +++ b/tf-managed/modules/daily-snapshot/outputs.tf @@ -0,0 +1,4 @@ +# This ip address may be used in the future by monitoring software +output "ip" { + value = [digitalocean_droplet.forest.ipv4_address] +} diff --git a/terraform/modules/sync_check/prep_sources.sh b/tf-managed/modules/daily-snapshot/prep_sources.sh similarity index 71% rename from terraform/modules/sync_check/prep_sources.sh rename to tf-managed/modules/daily-snapshot/prep_sources.sh index 50bd009d6..05d9a16df 100755 --- a/terraform/modules/sync_check/prep_sources.sh +++ b/tf-managed/modules/daily-snapshot/prep_sources.sh @@ -1,14 +1,14 @@ #!/bin/bash # Enable strict error handling and command tracing -set -ex +set -euxo pipefail # Copy local source files in a folder together with ruby_common and create a zip archive. -cd "$1" || exit -cp --archive ../../../scripts/ruby_common service/ || exit +cd "$1" +cp --archive "$2"/ruby_common service/ rm -f sources.tar -(cd service && tar cf ../sources.tar --sort=name --mtime='UTC 2019-01-01' ./* > /dev/null 2>&1) || exit +(cd service && tar cf ../sources.tar --sort=name --mtime='UTC 2019-01-01' ./* > /dev/null 2>&1) rm -fr service/ruby_common echo "{ \"path\": \"$1/sources.tar\" }" diff --git a/tf-managed/modules/daily-snapshot/provider.tf b/tf-managed/modules/daily-snapshot/provider.tf new file mode 100644 index 000000000..c553a31e0 --- /dev/null +++ b/tf-managed/modules/daily-snapshot/provider.tf @@ -0,0 +1,32 @@ +terraform { + required_version = "~> 1.6" + + required_providers { + digitalocean = { + source = "digitalocean/digitalocean" + version = "~> 2.0" + } + external = { + source = "hashicorp/external" + version = "~> 2.1" + } + local = { + source = "hashicorp/local" + version = "~> 2.1" + } + newrelic = { + source = "newrelic/newrelic" + version = "~> 3.0" + } + } +} + +provider "digitalocean" { + token = var.digitalocean_token +} + +provider "newrelic" { + account_id = var.new_relic_account_id + api_key = var.new_relic_api_key + region = var.new_relic_region +} diff --git a/terraform/modules/daily_snapshot/service/calibnet_cron_job b/tf-managed/modules/daily-snapshot/service/calibnet_cron_job similarity index 72% rename from terraform/modules/daily_snapshot/service/calibnet_cron_job rename to tf-managed/modules/daily-snapshot/service/calibnet_cron_job index a492ad45d..e1b5fdfa0 100755 --- a/terraform/modules/daily_snapshot/service/calibnet_cron_job +++ b/tf-managed/modules/daily-snapshot/service/calibnet_cron_job @@ -3,5 +3,5 @@ # shellcheck source=/dev/null source ~/.forest_env cd "$BASE_FOLDER" || exit -flock -n /tmp/calibnet.lock -c "ruby daily_snapshot.rb calibnet > logs/calibnet_log.txt 2>&1" -flock -n /tmp/calibnet_filops.lock -c "./upload_filops_snapshot.sh calibnet > logs/filops_calibnet_log.txt 2>&1" +flock -n /tmp/calibnet.lock -c "ruby daily_snapshot.rb calibnet >> logs/calibnet_log.txt 2>&1" +flock -n /tmp/calibnet_filops.lock -c "./upload_filops_snapshot.sh calibnet >> logs/filops_calibnet_log.txt 2>&1" diff --git a/terraform/modules/daily_snapshot/service/daily_snapshot.rb b/tf-managed/modules/daily-snapshot/service/daily_snapshot.rb similarity index 63% rename from terraform/modules/daily_snapshot/service/daily_snapshot.rb rename to tf-managed/modules/daily-snapshot/service/daily_snapshot.rb index 732906914..e9daa08ed 100644 --- a/terraform/modules/daily_snapshot/service/daily_snapshot.rb +++ b/tf-managed/modules/daily-snapshot/service/daily_snapshot.rb @@ -7,24 +7,11 @@ require 'date' require 'logger' require 'fileutils' -require 'active_support/time' BASE_FOLDER = get_and_assert_env_variable 'BASE_FOLDER' SLACK_TOKEN = get_and_assert_env_variable 'SLACK_API_TOKEN' CHANNEL = get_and_assert_env_variable 'SLACK_NOTIF_CHANNEL' -# Query the date of the most recent snapshot. -def latest_snapshot_date(chain_name = 'calibnet') - # We do not support HEAD requests but we _do_ support empty ranges. - filename = `curl --remote-name --remote-header-name --location --write-out "%{filename_effective}" --silent https://forest-archive.chainsafe.dev/latest/#{chain_name}/ -H "Range: bytes=0-0"` - # Curl will create a file with a single byte in it. Let's clean it up. - File.delete(filename) - snapshot_format = /^([^_]+?)_snapshot_(?[^_]+?)_(?\d{4}-\d{2}-\d{2})_height_(?\d+)(\.forest)?\.car.zst$/ - filename.match(snapshot_format) do |m| - m[:date].to_date - end -end - # Prune logs files(txt) older than 2 weeks def prune_logs(logs_folder = "logs") cutoff_date = Date.today - 14 # set the cutoff date to 14 days ago @@ -46,10 +33,7 @@ def prune_logs(logs_folder = "logs") LOG_EXPORT_METRICS = "logs/#{CHAIN_NAME}_#{DATE}_metrics.txt" client = SlackClient.new CHANNEL, SLACK_TOKEN - -# Query the date of the most recent snapshot. This is used to limit the number -# of victory messages to 1/day even if we upload multiple snapshots per day. -date_before_export = latest_snapshot_date(CHAIN_NAME) +logger = Logger.new($stdout) # conditionally add timestamps to logs without timestamps add_timestamps_cmd = %q[awk '{ if ($0 !~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}Z/) print strftime("[%Y-%m-%d %H:%M:%S]"), $0; else print $0; fflush(); }'] @@ -57,16 +41,16 @@ def prune_logs(logs_folder = "logs") timeout --signal=KILL 8h ./upload_snapshot.sh #{CHAIN_NAME} #{LOG_EXPORT_DAEMON} #{LOG_EXPORT_METRICS} | #{add_timestamps_cmd}" # The command needs to be run indirectly to avoid syntax errors in the shell. +logger.info "Running snapshot export script for #{CHAIN_NAME}..." snapshot_uploaded = system('bash', '-c', upload_cmd, %i[out err] => LOG_EXPORT_SCRIPT_RUN) +logger.info "Snapshot export script finished for #{CHAIN_NAME}." if snapshot_uploaded - date_after_export = latest_snapshot_date(CHAIN_NAME) - - # If this is the first new snapshot of the day, send a victory message to slack - unless date_before_export == date_after_export - client.post_message "✅ Snapshot uploaded for #{CHAIN_NAME}. 🌲🌳🌲🌳🌲" - end + # This log message is important, as it is used by the monitoring tools to determine whether the snapshot was + # successfully uploaded. + logger.info "Snapshot uploaded for #{CHAIN_NAME}." else + logger.error "Snapshot upload failed for #{CHAIN_NAME}." client.post_message "⛔ Snapshot failed for #{CHAIN_NAME}. 🔥🌲🔥 " # attach the log file and print the contents to STDOUT [LOG_EXPORT_SCRIPT_RUN, LOG_EXPORT_DAEMON, LOG_EXPORT_METRICS].each do |log_file| @@ -75,7 +59,7 @@ def prune_logs(logs_folder = "logs") end [LOG_EXPORT_SCRIPT_RUN, LOG_EXPORT_DAEMON, LOG_EXPORT_METRICS].each do |log_file| - puts "Snapshot export log:\n#{File.read(log_file)}\n\n" if File.exist?(log_file) + logger.info "Snapshot export log:\n#{File.read(log_file)}\n\n" if File.exist?(log_file) end # Prune logs files(txt) in the logs directory older than 2 weeks diff --git a/terraform/modules/daily_snapshot/service/forest-env.tpl b/tf-managed/modules/daily-snapshot/service/forest-env.tpl similarity index 100% rename from terraform/modules/daily_snapshot/service/forest-env.tpl rename to tf-managed/modules/daily-snapshot/service/forest-env.tpl diff --git a/terraform/modules/daily_snapshot/service/init.sh b/tf-managed/modules/daily-snapshot/service/init.sh similarity index 97% rename from terraform/modules/daily_snapshot/service/init.sh rename to tf-managed/modules/daily-snapshot/service/init.sh index 2ad8d923a..3d19b6dfb 100755 --- a/terraform/modules/daily_snapshot/service/init.sh +++ b/tf-managed/modules/daily-snapshot/service/init.sh @@ -35,7 +35,7 @@ mkdir --parents -- "$BASE_FOLDER/forest_db/filops" chmod +x ./upload_filops_snapshot.sh # Run new_relic and fail2ban scripts -bash newrelic_fail2ban.sh & +bash newrelic_fail2ban.sh # Setup cron jobs cp calibnet_cron_job mainnet_cron_job /etc/cron.hourly/ diff --git a/terraform/modules/daily_snapshot/service/mainnet_cron_job b/tf-managed/modules/daily-snapshot/service/mainnet_cron_job similarity index 100% rename from terraform/modules/daily_snapshot/service/mainnet_cron_job rename to tf-managed/modules/daily-snapshot/service/mainnet_cron_job diff --git a/terraform/modules/daily_snapshot/service/newrelic_fail2ban.sh b/tf-managed/modules/daily-snapshot/service/newrelic_fail2ban.sh similarity index 89% rename from terraform/modules/daily_snapshot/service/newrelic_fail2ban.sh rename to tf-managed/modules/daily-snapshot/service/newrelic_fail2ban.sh index 7e608ff65..0e62ed350 100644 --- a/terraform/modules/daily_snapshot/service/newrelic_fail2ban.sh +++ b/tf-managed/modules/daily-snapshot/service/newrelic_fail2ban.sh @@ -17,11 +17,11 @@ if [ -n "$NEW_RELIC_API_KEY" ] ; then # https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent/configuration/infrastructure-agent-configuration-settings/#offline-time-to-reset cat >> /etc/newrelic-infra.yml < /dev/null 2>&1) || exit +(cd service && tar cf ../sources.tar --sort=name --mtime='UTC 2019-01-01' ./* > /dev/null 2>&1) rm -fr service/ruby_common echo "{ \"path\": \"$1/sources.tar\" }" diff --git a/tf-managed/modules/sync-check/provider.tf b/tf-managed/modules/sync-check/provider.tf new file mode 100644 index 000000000..1f8588272 --- /dev/null +++ b/tf-managed/modules/sync-check/provider.tf @@ -0,0 +1,23 @@ +terraform { + required_version = "~> 1.6" + + required_providers { + digitalocean = { + source = "digitalocean/digitalocean" + version = "~> 2.0" + } + external = { + source = "hashicorp/external" + version = "~> 2.1" + } + local = { + source = "hashicorp/local" + version = "~> 2.1" + } + + } +} + +provider "digitalocean" { + token = var.digitalocean_token +} diff --git a/terraform/modules/sync_check/service/Dockerfile-tester b/tf-managed/modules/sync-check/service/Dockerfile-tester similarity index 100% rename from terraform/modules/sync_check/service/Dockerfile-tester rename to tf-managed/modules/sync-check/service/Dockerfile-tester diff --git a/terraform/modules/sync_check/service/Gemfile b/tf-managed/modules/sync-check/service/Gemfile similarity index 100% rename from terraform/modules/sync_check/service/Gemfile rename to tf-managed/modules/sync-check/service/Gemfile diff --git a/terraform/modules/sync_check/service/Gemfile.lock b/tf-managed/modules/sync-check/service/Gemfile.lock similarity index 100% rename from terraform/modules/sync_check/service/Gemfile.lock rename to tf-managed/modules/sync-check/service/Gemfile.lock diff --git a/terraform/modules/sync_check/service/docker-compose.yml b/tf-managed/modules/sync-check/service/docker-compose.yml similarity index 100% rename from terraform/modules/sync_check/service/docker-compose.yml rename to tf-managed/modules/sync-check/service/docker-compose.yml diff --git a/terraform/modules/sync_check/service/forest-env.tpl b/tf-managed/modules/sync-check/service/forest-env.tpl similarity index 100% rename from terraform/modules/sync_check/service/forest-env.tpl rename to tf-managed/modules/sync-check/service/forest-env.tpl diff --git a/terraform/modules/sync_check/service/health_check.sh b/tf-managed/modules/sync-check/service/health_check.sh similarity index 100% rename from terraform/modules/sync_check/service/health_check.sh rename to tf-managed/modules/sync-check/service/health_check.sh diff --git a/terraform/modules/sync_check/service/init.sh b/tf-managed/modules/sync-check/service/init.sh similarity index 100% rename from terraform/modules/sync_check/service/init.sh rename to tf-managed/modules/sync-check/service/init.sh diff --git a/terraform/modules/sync_check/service/restart.service b/tf-managed/modules/sync-check/service/restart.service similarity index 100% rename from terraform/modules/sync_check/service/restart.service rename to tf-managed/modules/sync-check/service/restart.service diff --git a/terraform/modules/sync_check/service/restart.sh b/tf-managed/modules/sync-check/service/restart.sh similarity index 100% rename from terraform/modules/sync_check/service/restart.sh rename to tf-managed/modules/sync-check/service/restart.sh diff --git a/terraform/modules/sync_check/service/run_service.sh b/tf-managed/modules/sync-check/service/run_service.sh similarity index 100% rename from terraform/modules/sync_check/service/run_service.sh rename to tf-managed/modules/sync-check/service/run_service.sh diff --git a/terraform/modules/sync_check/service/sync_check.rb b/tf-managed/modules/sync-check/service/sync_check.rb similarity index 100% rename from terraform/modules/sync_check/service/sync_check.rb rename to tf-managed/modules/sync-check/service/sync_check.rb diff --git a/terraform/modules/sync_check/service/sync_check.toml b/tf-managed/modules/sync-check/service/sync_check.toml similarity index 100% rename from terraform/modules/sync_check/service/sync_check.toml rename to tf-managed/modules/sync-check/service/sync_check.toml diff --git a/terraform/modules/sync_check/service/sync_check_process.rb b/tf-managed/modules/sync-check/service/sync_check_process.rb similarity index 100% rename from terraform/modules/sync_check/service/sync_check_process.rb rename to tf-managed/modules/sync-check/service/sync_check_process.rb diff --git a/terraform/modules/sync_check/variable.tf b/tf-managed/modules/sync-check/variables.tf similarity index 90% rename from terraform/modules/sync_check/variable.tf rename to tf-managed/modules/sync-check/variables.tf index 0d5760e21..a2f439fcd 100644 --- a/terraform/modules/sync_check/variable.tf +++ b/tf-managed/modules/sync-check/variables.tf @@ -62,3 +62,12 @@ variable "NEW_RELIC_ACCOUNT_ID" { type = string sensitive = true } + +variable "common_resources_dir" { + type = string +} + +variable "environment" { + description = "The environment name" + type = string +} diff --git a/scripts/ruby_common/docker_utils.rb b/tf-managed/scripts/ruby_common/docker_utils.rb similarity index 100% rename from scripts/ruby_common/docker_utils.rb rename to tf-managed/scripts/ruby_common/docker_utils.rb diff --git a/scripts/ruby_common/slack_client.rb b/tf-managed/scripts/ruby_common/slack_client.rb similarity index 100% rename from scripts/ruby_common/slack_client.rb rename to tf-managed/scripts/ruby_common/slack_client.rb diff --git a/scripts/ruby_common/utils.rb b/tf-managed/scripts/ruby_common/utils.rb similarity index 100% rename from scripts/ruby_common/utils.rb rename to tf-managed/scripts/ruby_common/utils.rb