Database Internals - Street-Level Ops¶
Real-world workflows for managing replication, diagnosing locks, fixing slow queries, and operating read replicas. PostgreSQL-focused with MySQL notes where relevant.
Replication Health¶
# Check replication status on the primary
psql -c "SELECT client_addr, state, sent_lsn, replay_lsn,
pg_wal_lsn_diff(sent_lsn, replay_lsn) AS lag_bytes,
replay_lag
FROM pg_stat_replication;"
# client_addr | state | lag_bytes | replay_lag
# 10.0.1.2 | streaming | 4096 | 00:00:00.012
# 10.0.1.3 | streaming | 81920 | 00:00:02.341
# Check lag in seconds on a replica
psql -c "SELECT CASE WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()
THEN 0
ELSE EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp())
END AS lag_seconds;"
# Is this instance a replica?
psql -c "SELECT pg_is_in_recovery();"
# t = replica, f = primary
Find and Kill Long-Running Queries¶
# Queries running longer than 5 minutes
psql -c "SELECT pid, now() - xact_start AS duration, state, left(query, 80) AS query
FROM pg_stat_activity
WHERE state != 'idle'
AND xact_start < now() - interval '5 minutes'
ORDER BY duration DESC;"
# Terminate a specific backend
psql -c "SELECT pg_terminate_backend(12345);"
# Cancel a query without killing the connection
psql -c "SELECT pg_cancel_backend(12345);"
# Set a statement timeout to prevent future runaways
psql -c "ALTER DATABASE mydb SET statement_timeout = '30s';"
Lock Diagnosis¶
# Find blocking locks
psql -c "SELECT blocked_locks.pid AS blocked_pid,
blocked_activity.usename AS blocked_user,
left(blocked_activity.query, 60) AS blocked_query,
blocking_locks.pid AS blocking_pid,
left(blocking_activity.query, 60) AS blocking_query
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks
ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;"
# Check for deadlocks in logs
grep -i "deadlock detected" /var/log/postgresql/postgresql-*.log | tail -5
# Set lock timeout to fail fast during migrations
psql -c "SET lock_timeout = '5s'; ALTER TABLE orders ADD COLUMN new_col TEXT;"
Indexing Operations¶
# Find missing indexes: tables with high sequential scans
psql -c "SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan,
CASE WHEN seq_scan > 0 THEN seq_tup_read / seq_scan ELSE 0 END AS avg_seq_tup
FROM pg_stat_user_tables
WHERE seq_scan > 100
ORDER BY seq_tup_read DESC LIMIT 10;"
# Check index sizes and usage
psql -c "SELECT indexrelname, pg_size_pretty(pg_relation_size(indexrelid)) AS size,
idx_scan AS times_used
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY pg_relation_size(indexrelid) DESC LIMIT 15;"
# Find unused indexes (candidates for removal)
psql -c "SELECT indexrelname, pg_size_pretty(pg_relation_size(indexrelid)) AS size
FROM pg_stat_user_indexes
WHERE idx_scan = 0 AND indexrelname NOT LIKE '%_pkey'
ORDER BY pg_relation_size(indexrelid) DESC;"
# Create an index without locking the table
psql -c "CREATE INDEX CONCURRENTLY idx_orders_customer ON orders (customer_id);"
# Reindex without downtime (PostgreSQL 12+)
psql -c "REINDEX INDEX CONCURRENTLY idx_orders_customer;"
Query Performance¶
# Analyze a slow query (always use ANALYZE for real timings)
psql -c "EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT * FROM orders WHERE customer_id = 42 AND created_at > '2025-01-01';"
# Look for: Seq Scan on large tables, high actual vs planned rows, low buffer hits
# Update statistics for the query planner
psql -c "ANALYZE orders;"
# Top queries by total execution time (requires pg_stat_statements)
psql -c "SELECT left(query, 80) AS query, calls, total_exec_time::int AS total_ms,
mean_exec_time::int AS avg_ms, rows
FROM pg_stat_statements
ORDER BY total_exec_time DESC LIMIT 10;"
Transaction Management¶
# Check current isolation level
psql -c "SHOW transaction_isolation;"
# read committed (PostgreSQL default)
# Find open transactions holding locks
psql -c "SELECT pid, now() - xact_start AS tx_duration, state, left(query, 60) AS query
FROM pg_stat_activity
WHERE xact_start IS NOT NULL AND state != 'idle'
ORDER BY tx_duration DESC;"
# Check for idle-in-transaction connections (holding locks but doing nothing)
psql -c "SELECT pid, now() - state_change AS idle_duration, left(query, 60) AS last_query
FROM pg_stat_activity
WHERE state = 'idle in transaction'
ORDER BY idle_duration DESC;"
# Set idle-in-transaction timeout
psql -c "ALTER DATABASE mydb SET idle_in_transaction_session_timeout = '60s';"
Read Replica Operations¶
# Check replica lag from the primary
psql -c "SELECT application_name,
pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) AS lag_bytes,
replay_lag
FROM pg_stat_replication;"
# Promote a replica to primary (emergency failover)
pg_ctl promote -D /var/lib/postgresql/data
# Or via SQL:
psql -c "SELECT pg_promote();"
# Check how many replicas are connected
psql -c "SELECT count(*) AS replica_count FROM pg_stat_replication;"
# Verify a replica is in recovery mode
psql -h replica-host -c "SELECT pg_is_in_recovery();"
# t
VACUUM and Maintenance¶
# Check tables that need vacuuming (dead tuple ratio)
psql -c "SELECT schemaname, relname, n_dead_tup, n_live_tup,
ROUND(n_dead_tup::numeric / GREATEST(n_live_tup, 1) * 100, 1) AS dead_pct,
last_autovacuum
FROM pg_stat_user_tables
WHERE n_dead_tup > 1000
ORDER BY n_dead_tup DESC LIMIT 10;"
# Manual vacuum on a specific table
psql -c "VACUUM (VERBOSE, ANALYZE) orders;"
# Check autovacuum is running
psql -c "SELECT pid, query FROM pg_stat_activity WHERE query LIKE 'autovacuum%';"
Connection Monitoring¶
# Current connection count vs max
psql -c "SELECT count(*) AS current, setting AS max
FROM pg_stat_activity, pg_settings
WHERE pg_settings.name = 'max_connections'
GROUP BY setting;"
# Connections by state
psql -c "SELECT state, count(*) FROM pg_stat_activity GROUP BY state ORDER BY count DESC;"
# active 12
# idle 45
# idle in transaction 3