jq - Street-Level Ops¶
Real-world workflows for processing JSON from APIs, kubectl, and cloud CLIs.
Kubernetes JSON Processing¶
# List pods that are NOT running
kubectl get pods -o json | jq -r '.items[] | select(.status.phase != "Running") | .metadata.name'
# Get pod names, statuses, and restart counts
kubectl get pods -o json | jq -r '.items[] | {
name: .metadata.name,
status: .status.phase,
restarts: (.status.containerStatuses[0].restartCount // 0)
}'
# Find pods with high restart counts
kubectl get pods -A -o json | jq -r '.items[] |
select(.status.containerStatuses[]?.restartCount > 5) |
[.metadata.namespace, .metadata.name, (.status.containerStatuses[0].restartCount | tostring)] |
join("\t")'
# Output:
# production api-7d4b8c-x2k9l 12
# staging worker-5f6a7b-m3n7 8
# Get all container images running in the cluster
kubectl get pods -A -o json | jq -r '[.items[].spec.containers[].image] | unique[]'
# Find nodes with disk pressure
kubectl get nodes -o json | jq -r '.items[] |
select(.status.conditions[] | select(.type=="DiskPressure" and .status=="True")) |
.metadata.name'
AWS CLI JSON Processing¶
# List running EC2 instances with name and IP
aws ec2 describe-instances | jq -r '.Reservations[].Instances[] |
select(.State.Name=="running") |
[(.Tags[]? | select(.Key=="Name") | .Value) // "unnamed", .InstanceId, .PrivateIpAddress] |
@tsv'
# Output:
# web-prod i-0abc123def 10.0.1.50
# api-prod i-0def456ghi 10.0.1.51
# Find unencrypted S3 buckets
aws s3api list-buckets | jq -r '.Buckets[].Name' | while read -r bucket; do
enc=$(aws s3api get-bucket-encryption --bucket "${bucket}" 2>/dev/null | jq -r '.ServerSideEncryptionConfiguration')
[[ "${enc}" == "null" ]] && echo "UNENCRYPTED: ${bucket}"
done
# Find security groups with 0.0.0.0/0 ingress
aws ec2 describe-security-groups | jq -r '.SecurityGroups[] |
select(.IpPermissions[].IpRanges[].CidrIp == "0.0.0.0/0") |
{GroupId, GroupName, OpenPorts: [.IpPermissions[] | select(.IpRanges[].CidrIp == "0.0.0.0/0") | .FromPort]}'
API Response Processing¶
# GitHub: list open PRs with author and title
curl -s https://api.github.com/repos/owner/repo/pulls | \
jq -r '.[] | ["#\(.number)", .user.login, .title] | @tsv'
# Output:
# #42 dev1 Fix connection pool leak
# #41 dev2 Add health check endpoint
# Extract nested pagination results
curl -s "https://api.example.com/items?page=1" | jq '.data[] | {id, name, status}'
# Parse JSON logs (one JSON object per line)
cat app.log | jq -r 'select(.level == "ERROR") | [.timestamp, .message] | @tsv'
# Count errors by type from JSON logs
cat app.log | jq -s 'map(select(.level == "ERROR")) | group_by(.error_type) |
map({type: .[0].error_type, count: length}) | sort_by(-.count)'
Data Transformation¶
# Convert JSON array to CSV
echo '[{"name":"web","port":80},{"name":"api","port":8080}]' | \
jq -r '.[] | [.name, .port] | @csv'
# Output:
# "web",80
# "api",8080
# Merge multiple JSON files
jq -s 'add' file1.json file2.json file3.json
# Flatten nested structure
echo '{"servers":{"web":{"port":80},"api":{"port":8080}}}' | \
jq '.servers | to_entries[] | {name: .key, port: .value.port}'
# Add a field to every object in an array
echo '[{"host":"web1"},{"host":"web2"}]' | \
jq 'map(. + {"env": "production", "region": "us-east-1"})'
# Group and aggregate
echo '[{"svc":"api","status":200},{"svc":"api","status":500},{"svc":"web","status":200}]' | \
jq 'group_by(.svc) | map({service: .[0].svc, total: length, errors: map(select(.status >= 500)) | length})'
Shell Variable Integration¶
# Pass shell variables into jq
HOSTNAME="web-prod"
echo '{}' | jq --arg h "${HOSTNAME}" '. + {host: $h}'
# Use jq output in shell conditions
STATUS=$(curl -s localhost:8080/health | jq -r '.status')
if [[ "${STATUS}" != "healthy" ]]; then
echo "ALERT: service unhealthy"
fi
# Loop over jq output
kubectl get pods -o json | jq -r '.items[].metadata.name' | while read -r pod; do
echo "Processing: ${pod}"
kubectl logs "${pod}" --tail=5
done
# Use --exit-status for conditional checks
if echo '{"ready":true}' | jq -e '.ready' > /dev/null; then
echo "Service is ready"
fi
Common One-Liners¶
# Pretty-print and colorize JSON
curl -s localhost:9200/_cluster/health | jq .
# Extract just the keys from an object
echo '{"name":"web","port":80,"env":"prod"}' | jq 'keys'
# Get length of an array
kubectl get pods -o json | jq '.items | length'
# Find the max value in an array
echo '[{"cpu":45},{"cpu":82},{"cpu":31}]' | jq 'max_by(.cpu)'
# Deduplicate an array
echo '["a","b","a","c","b"]' | jq 'unique'
# Default value for missing fields
echo '{}' | jq '.missing // "default_value"'
# Type checking
echo '"hello"' | jq 'type'
# "string"
Debugging jq Expressions¶
# Build incrementally — start with . and add one filter at a time
kubectl get pods -o json | jq '.' # Full output
kubectl get pods -o json | jq '.items' # Just the array
kubectl get pods -o json | jq '.items[0]' # First item
kubectl get pods -o json | jq '.items[0].metadata.name' # The field you want
# Use debug to see intermediate values
echo '[1,2,3]' | jq '.[] | debug | . * 2'
# Check the type of what you are working with
echo '{"a":[1,2]}' | jq '.a | type'
# "array"