Skip to content

Database Internals - Street-Level Ops

Real-world workflows for managing replication, diagnosing locks, fixing slow queries, and operating read replicas. PostgreSQL-focused with MySQL notes where relevant.

Replication Health

# Check replication status on the primary
psql -c "SELECT client_addr, state, sent_lsn, replay_lsn,
         pg_wal_lsn_diff(sent_lsn, replay_lsn) AS lag_bytes,
         replay_lag
  FROM pg_stat_replication;"
# client_addr  | state     | lag_bytes | replay_lag
# 10.0.1.2     | streaming |     4096  | 00:00:00.012
# 10.0.1.3     | streaming |    81920  | 00:00:02.341

# Check lag in seconds on a replica
psql -c "SELECT CASE WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()
         THEN 0
         ELSE EXTRACT(EPOCH FROM now() - pg_last_xact_replay_timestamp())
         END AS lag_seconds;"

# Is this instance a replica?
psql -c "SELECT pg_is_in_recovery();"
# t = replica, f = primary

Find and Kill Long-Running Queries

# Queries running longer than 5 minutes
psql -c "SELECT pid, now() - xact_start AS duration, state, left(query, 80) AS query
  FROM pg_stat_activity
  WHERE state != 'idle'
    AND xact_start < now() - interval '5 minutes'
  ORDER BY duration DESC;"

# Terminate a specific backend
psql -c "SELECT pg_terminate_backend(12345);"

# Cancel a query without killing the connection
psql -c "SELECT pg_cancel_backend(12345);"

# Set a statement timeout to prevent future runaways
psql -c "ALTER DATABASE mydb SET statement_timeout = '30s';"

Lock Diagnosis

# Find blocking locks
psql -c "SELECT blocked_locks.pid AS blocked_pid,
         blocked_activity.usename AS blocked_user,
         left(blocked_activity.query, 60) AS blocked_query,
         blocking_locks.pid AS blocking_pid,
         left(blocking_activity.query, 60) AS blocking_query
  FROM pg_catalog.pg_locks blocked_locks
  JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
  JOIN pg_catalog.pg_locks blocking_locks
    ON blocking_locks.locktype = blocked_locks.locktype
   AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
   AND blocking_locks.pid != blocked_locks.pid
  JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
  WHERE NOT blocked_locks.granted;"

# Check for deadlocks in logs
grep -i "deadlock detected" /var/log/postgresql/postgresql-*.log | tail -5

# Set lock timeout to fail fast during migrations
psql -c "SET lock_timeout = '5s'; ALTER TABLE orders ADD COLUMN new_col TEXT;"

Indexing Operations

# Find missing indexes: tables with high sequential scans
psql -c "SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan,
         CASE WHEN seq_scan > 0 THEN seq_tup_read / seq_scan ELSE 0 END AS avg_seq_tup
  FROM pg_stat_user_tables
  WHERE seq_scan > 100
  ORDER BY seq_tup_read DESC LIMIT 10;"

# Check index sizes and usage
psql -c "SELECT indexrelname, pg_size_pretty(pg_relation_size(indexrelid)) AS size,
         idx_scan AS times_used
  FROM pg_stat_user_indexes
  WHERE schemaname = 'public'
  ORDER BY pg_relation_size(indexrelid) DESC LIMIT 15;"

# Find unused indexes (candidates for removal)
psql -c "SELECT indexrelname, pg_size_pretty(pg_relation_size(indexrelid)) AS size
  FROM pg_stat_user_indexes
  WHERE idx_scan = 0 AND indexrelname NOT LIKE '%_pkey'
  ORDER BY pg_relation_size(indexrelid) DESC;"

# Create an index without locking the table
psql -c "CREATE INDEX CONCURRENTLY idx_orders_customer ON orders (customer_id);"

# Reindex without downtime (PostgreSQL 12+)
psql -c "REINDEX INDEX CONCURRENTLY idx_orders_customer;"

Query Performance

# Analyze a slow query (always use ANALYZE for real timings)
psql -c "EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
  SELECT * FROM orders WHERE customer_id = 42 AND created_at > '2025-01-01';"

# Look for: Seq Scan on large tables, high actual vs planned rows, low buffer hits

# Update statistics for the query planner
psql -c "ANALYZE orders;"

# Top queries by total execution time (requires pg_stat_statements)
psql -c "SELECT left(query, 80) AS query, calls, total_exec_time::int AS total_ms,
         mean_exec_time::int AS avg_ms, rows
  FROM pg_stat_statements
  ORDER BY total_exec_time DESC LIMIT 10;"

Transaction Management

# Check current isolation level
psql -c "SHOW transaction_isolation;"
# read committed (PostgreSQL default)

# Find open transactions holding locks
psql -c "SELECT pid, now() - xact_start AS tx_duration, state, left(query, 60) AS query
  FROM pg_stat_activity
  WHERE xact_start IS NOT NULL AND state != 'idle'
  ORDER BY tx_duration DESC;"

# Check for idle-in-transaction connections (holding locks but doing nothing)
psql -c "SELECT pid, now() - state_change AS idle_duration, left(query, 60) AS last_query
  FROM pg_stat_activity
  WHERE state = 'idle in transaction'
  ORDER BY idle_duration DESC;"

# Set idle-in-transaction timeout
psql -c "ALTER DATABASE mydb SET idle_in_transaction_session_timeout = '60s';"

Read Replica Operations

# Check replica lag from the primary
psql -c "SELECT application_name,
         pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) AS lag_bytes,
         replay_lag
  FROM pg_stat_replication;"

# Promote a replica to primary (emergency failover)
pg_ctl promote -D /var/lib/postgresql/data
# Or via SQL:
psql -c "SELECT pg_promote();"

# Check how many replicas are connected
psql -c "SELECT count(*) AS replica_count FROM pg_stat_replication;"

# Verify a replica is in recovery mode
psql -h replica-host -c "SELECT pg_is_in_recovery();"
# t

VACUUM and Maintenance

# Check tables that need vacuuming (dead tuple ratio)
psql -c "SELECT schemaname, relname, n_dead_tup, n_live_tup,
         ROUND(n_dead_tup::numeric / GREATEST(n_live_tup, 1) * 100, 1) AS dead_pct,
         last_autovacuum
  FROM pg_stat_user_tables
  WHERE n_dead_tup > 1000
  ORDER BY n_dead_tup DESC LIMIT 10;"

# Manual vacuum on a specific table
psql -c "VACUUM (VERBOSE, ANALYZE) orders;"

# Check autovacuum is running
psql -c "SELECT pid, query FROM pg_stat_activity WHERE query LIKE 'autovacuum%';"

Connection Monitoring

# Current connection count vs max
psql -c "SELECT count(*) AS current, setting AS max
  FROM pg_stat_activity, pg_settings
  WHERE pg_settings.name = 'max_connections'
  GROUP BY setting;"

# Connections by state
psql -c "SELECT state, count(*) FROM pg_stat_activity GROUP BY state ORDER BY count DESC;"
# active              12
# idle                45
# idle in transaction  3