Skip to content

Portal | Level: L2: Operations | Topics: Infrastructure Testing | Domain: DevOps & Tooling

Infrastructure Testing — Primer

Why This Matters

terraform plan shows what Terraform intends to do. It does not verify that the resulting infrastructure actually works. A subnet with the wrong CIDR, a security group that blocks the wrong port, an IAM role with insufficient permissions — none of these show up as plan errors. Infra testing closes that gap: you apply, validate behavior, then destroy. This is also how you enforce policy ("no S3 buckets should be public") and prove compliance ("all resources are tagged with cost-center").


Terratest

Who made it: Terratest was created by Gruntwork (Yevgeniy Brikman and team), the same company behind the "Terraform: Up & Running" book and the Terragrunt wrapper. They built it because they maintained hundreds of Terraform modules and needed automated testing that went beyond terraform validate.

Terratest is a Go library for testing Terraform (and Packer, Helm, Kubernetes, AWS, GCP) modules. The pattern: apply real infrastructure, run assertions against it, destroy.

Project Layout

modules/
  vpc/
    main.tf
    variables.tf
    outputs.tf
    test/
      vpc_test.go
      go.mod
      go.sum

Basic Test

package test

import (
    "testing"

    "github.com/gruntwork-io/terratest/modules/aws"
    "github.com/gruntwork-io/terratest/modules/terraform"
    "github.com/stretchr/testify/assert"
    "github.com/stretchr/testify/require"
)

func TestVpcModule(t *testing.T) {
    t.Parallel()  // run multiple tests concurrently

    awsRegion := "us-east-1"

    terraformOptions := &terraform.Options{
        TerraformDir: "../",
        Vars: map[string]interface{}{
            "aws_region":  awsRegion,
            "environment": "test",
            "vpc_cidr":    "10.100.0.0/16",
        },
        // Retry on known transient errors (AWS eventually consistent)
        RetryableTerraformErrors: map[string]string{
            "RequestError: send request failed": "Transient AWS API error",
            "NoSuchBucket":                      "S3 state bucket may not be ready yet",
        },
        MaxRetries:         3,
        TimeBetweenRetries: 5 * time.Second,
    }

    // Always destroy at the end, even if test fails
    defer terraform.Destroy(t, terraformOptions)

    // Init and apply
    terraform.InitAndApply(t, terraformOptions)

    // Read outputs
    vpcID := terraform.Output(t, terraformOptions, "vpc_id")
    privateSubnetIDs := terraform.OutputList(t, terraformOptions, "private_subnet_ids")

    // Validate via AWS API
    vpc := aws.GetVpcById(t, vpcID, awsRegion)
    assert.Equal(t, "10.100.0.0/16", aws.GetVpcCidr(t, vpcID, awsRegion))
    assert.Equal(t, "available", aws.GetVpcState(t, vpcID, awsRegion))
    assert.Equal(t, 3, len(privateSubnetIDs))

    // Validate tags
    tags := vpc.Tags
    require.Contains(t, tags, "Environment")
    assert.Equal(t, "test", *tags["Environment"])
}

HTTP Endpoint Validation

func TestECSServiceEndpoint(t *testing.T) {
    t.Parallel()

    opts := &terraform.Options{TerraformDir: "../ecs-service/"}
    defer terraform.Destroy(t, opts)
    terraform.InitAndApply(t, opts)

    albDNS := terraform.Output(t, opts, "alb_dns_name")
    url := fmt.Sprintf("http://%s/health", albDNS)

    // Retry until healthy — infrastructure often needs time to stabilize
    http_helper.HttpGetWithRetry(
        t,
        url,
        nil,   // TLS config (nil = no TLS)
        200,   // expected status
        "OK",  // expected body substring
        30,    // max retries
        10*time.Second, // sleep between retries
    )
}

SSH Validation

func TestEC2BootstrapScript(t *testing.T) {
    t.Parallel()

    opts := &terraform.Options{TerraformDir: "../ec2/"}
    defer terraform.Destroy(t, opts)
    terraform.InitAndApply(t, opts)

    publicIP := terraform.Output(t, opts, "public_ip")
    keyPair := aws.CreateAndImportEC2KeyPair(t, "us-east-1", "test-key")
    defer aws.DeleteEC2KeyPair(t, keyPair)

    sshOpts := &ssh.Options{
        Hostname:    publicIP,
        Port:        22,
        Username:    "ec2-user",
        KeyPair:     keyPair,
    }

    // Verify bootstrap script ran
    output := ssh.CheckSshCommand(t, sshOpts, "systemctl is-active nginx")
    assert.Equal(t, "active", strings.TrimSpace(output))
}

Parallel Test Runs

Terratest recommends t.Parallel() but you need isolated state per test. Use unique resource names:

func TestVpc(t *testing.T) {
    t.Parallel()

    // Generate unique suffix to avoid naming conflicts across parallel runs
    uniqueID := random.UniqueId()

    opts := &terraform.Options{
        TerraformDir: "../",
        Vars: map[string]interface{}{
            "name_prefix": fmt.Sprintf("test-%s", uniqueID),
        },
        // Use separate state per test
        BackendConfig: map[string]interface{}{
            "key": fmt.Sprintf("test/%s/terraform.tfstate", uniqueID),
        },
    }
    defer terraform.Destroy(t, opts)
    terraform.InitAndApply(t, opts)
    // ...
}

Run tests: go test -v -timeout 30m ./test/


InSpec

InSpec is a compliance and configuration testing framework by Chef. Write human-readable controls, run them against servers, containers, or cloud APIs.

Profile Structure

profiles/
  my-baseline/
    inspec.yml
    controls/
      ssh.rb
      os_hardening.rb
    libraries/
      custom_resource.rb

inspec.yml:

name: my-baseline
title: Security Baseline Profile
maintainer: Platform Team
version: 1.0.0
supports:
  - platform: linux
depends:
  - name: linux-baseline
    url: https://github.com/dev-sec/linux-baseline/archive/master.tar.gz

Writing Controls

# controls/ssh.rb
control 'ssh-01' do
  impact 1.0  # 0.0-1.0: critical/high/medium/low
  title 'SSH daemon configuration'
  desc 'Ensure SSH is configured securely'

  describe sshd_config do
    its('PermitRootLogin') { should eq 'no' }
    its('PasswordAuthentication') { should eq 'no' }
    its('Protocol') { should eq '2' }
    its('MaxAuthTries') { should cmp <= 4 }
  end

  describe port(22) do
    it { should be_listening }
    its('protocols') { should include 'tcp' }
  end
end

control 'packages-01' do
  impact 0.7
  title 'Ensure unattended-upgrades is installed'

  describe package('unattended-upgrades') do
    it { should be_installed }
  end

  describe service('unattended-upgrades') do
    it { should be_enabled }
    it { should be_running }
  end
end

AWS Compliance Scanning

# controls/s3.rb
control 'aws-s3-01' do
  impact 1.0
  title 'S3 buckets must not be publicly accessible'

  aws_s3_buckets.bucket_names.each do |bucket|
    describe aws_s3_bucket(bucket) do
      it { should_not be_public }
      it { should have_default_encryption_enabled }
      it { should have_versioning_enabled }
    end
  end
end

control 'aws-iam-01' do
  impact 1.0
  title 'Root account should not have access keys'

  describe aws_iam_root_user do
    it { should_not have_access_key }
    it { should have_mfa_enabled }
  end
end
# Run against local machine
inspec exec my-baseline/

# Run against remote host
inspec exec my-baseline/ -t ssh://user@host --key-files ~/.ssh/id_rsa

# Run against AWS
inspec exec my-baseline/ -t aws://us-east-1

# Run against Docker container
inspec exec my-baseline/ -t docker://container-id

# Output formats
inspec exec my-baseline/ --reporter cli json:results.json html:report.html

Conftest

Name origin: Conftest combines "conf" (configuration) + "test." It was created by Gareth Rushgrove at Snyk. It is a thin CLI wrapper around OPA's Rego engine, purpose-built for testing static config files in CI pipelines rather than runtime policy enforcement.

Conftest uses OPA (Open Policy Agent) Rego policies to test configuration files — Terraform plan JSON, Kubernetes manifests, Dockerfiles, Helm charts, CI configs.

Rego Policy Basics

# policy/terraform/deny_public_s3.rego
package main

import input.resource_changes

# Deny any S3 bucket with public ACL
deny[msg] {
    resource_changes[_].type == "aws_s3_bucket"
    resource_changes[_].change.after.acl == "public-read"
    msg := sprintf(
        "S3 bucket '%s' must not have public-read ACL",
        [resource_changes[_].address]
    )
}

# Deny S3 buckets without encryption
deny[msg] {
    resource := resource_changes[_]
    resource.type == "aws_s3_bucket"
    not has_encryption(resource)
    msg := sprintf(
        "S3 bucket '%s' must have server-side encryption",
        [resource.address]
    )
}

has_encryption(resource) {
    resource.change.after.server_side_encryption_configuration[_].rule[_]
        .apply_server_side_encryption_by_default[_].sse_algorithm != ""
}

Kubernetes Manifest Policies

# policy/k8s/deny_latest_tag.rego
package main

deny[msg] {
    input.kind == "Deployment"
    container := input.spec.template.spec.containers[_]
    endswith(container.image, ":latest")
    msg := sprintf(
        "Container '%s' uses ':latest' tag — pin to a specific version",
        [container.name]
    )
}

deny[msg] {
    input.kind == "Deployment"
    container := input.spec.template.spec.containers[_]
    not container.resources.limits.memory
    msg := sprintf(
        "Container '%s' has no memory limit",
        [container.name]
    )
}

warn[msg] {
    input.kind == "Deployment"
    not input.spec.template.spec.securityContext.runAsNonRoot
    msg := "Deployment does not set runAsNonRoot: true"
}

Running Conftest

# Test Terraform plan JSON
terraform plan -out=plan.tfplan
terraform show -json plan.tfplan > plan.json
conftest test plan.json --policy policy/terraform/

# Test Kubernetes manifests
conftest test k8s/deployment.yaml --policy policy/k8s/

# Test all manifests in directory
conftest test k8s/ --policy policy/k8s/

# Test with multiple policy directories
conftest test plan.json --policy policy/terraform/ --policy policy/shared/

# Output formats
conftest test plan.json --policy policy/ --output json
conftest test plan.json --policy policy/ --output tap

# Strict mode (warnings treated as failures)
conftest test plan.json --policy policy/ --strict

CI Pipeline Integration (Terraform)

# .github/workflows/terraform.yml
jobs:
  validate:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Terraform init
        run: terraform init

      - name: Terraform plan
        run: terraform plan -out=plan.tfplan

      - name: Convert plan to JSON
        run: terraform show -json plan.tfplan > plan.json

      - name: Run Conftest policies
        uses: docker://openpolicyagent/conftest:latest
        with:
          args: test plan.json --policy policy/terraform/ --output json

      - name: Terraform apply (only on main)
        if: github.ref == 'refs/heads/main'
        run: terraform apply plan.tfplan

Testing Kubernetes Manifests

kubeval / kubeconform

# kubeconform (faster, actively maintained replacement for kubeval)
# Validate against Kubernetes schemas
kubeconform -strict -summary k8s/

# Validate against specific Kubernetes version
kubeconform -kubernetes-version 1.29.0 k8s/

# Include CRDs (cert-manager, Argo, etc.)
kubeconform \
  -schema-location default \
  -schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \
  k8s/

# JSON output for CI
kubeconform -output json k8s/ | jq '.summary'

Kyverno Policies (Runtime Enforcement + Testing)

# policy/require-labels.yaml
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
  name: require-labels
spec:
  validationFailureAction: enforce
  rules:
    - name: check-team-label
      match:
        any:
          - resources:
              kinds: [Deployment, StatefulSet, DaemonSet]
      validate:
        message: "The label 'team' is required"
        pattern:
          metadata:
            labels:
              team: "?*"
# Test policies with kyverno CLI
kyverno test .

# Apply policy check against manifests
kyverno apply policy/require-labels.yaml --resource k8s/deployment.yaml

Module Output Testing

Test Terraform module outputs independently of full apply with terraform-docs and output validation:

// Test that module produces correct output structure
func TestModuleOutputs(t *testing.T) {
    t.Parallel()

    opts := &terraform.Options{
        TerraformDir: "../modules/iam-role",
        Vars: map[string]interface{}{
            "role_name":    "test-role",
            "service":      "lambda.amazonaws.com",
            "policy_arns":  []string{"arn:aws:iam::aws:policy/ReadOnlyAccess"},
        },
    }
    defer terraform.Destroy(t, opts)
    terraform.InitAndApply(t, opts)

    roleARN := terraform.Output(t, opts, "role_arn")
    assert.Regexp(t, `^arn:aws:iam::\d+:role/test-role$`, roleARN)

    roleName := terraform.Output(t, opts, "role_name")
    assert.Equal(t, "test-role", roleName)
}

Quick Reference

Terratest Test Helpers

// AWS
aws.GetVpcById(t, vpcID, region)
aws.GetSubnetsForVpc(t, vpcID, region)
aws.GetS3BucketVersioning(t, region, bucket)
aws.GetTagsForEc2Instances(t, region, filter, values)
aws.GetRdsEndpoint(t, dbID, region)

// HTTP
http_helper.HttpGet(t, url, nil)
http_helper.HttpGetWithRetry(t, url, nil, 200, "OK", 30, 10*time.Second)

// SSH
ssh.CheckSshCommand(t, sshOpts, "command")
ssh.CheckSshConnectionWithRetry(t, sshOpts, 10, 10*time.Second)

// Kubernetes
k8s.WaitUntilDeploymentAvailable(t, k8sOpts, "my-deployment", 10, 10*time.Second)
k8s.GetService(t, k8sOpts, "my-service")
k8s.GetNodes(t, k8sOpts)

// Retry helpers
retry.DoWithRetry(t, "description", 10, 5*time.Second, func() (string, error) {
    // something that might fail transiently
})

Conftest Policy Structure

package main

# deny[msg] { ... }   — failures block deployment
# warn[msg] { ... }   — warnings logged but don't block
# violation[msg] { ... }  — alias for deny in some versions

Wiki Navigation

Prerequisites