Skip to content

find - Street-Level Ops

Real-world find patterns for production diagnosis, cleanup, security audits, and operational automation.


Quick Diagnosis Commands

Finding Large Files

# Top 20 largest files on the root filesystem (stay on one mount)
find / -xdev -type f -printf "%s\t%p\n" 2>/dev/null | sort -rn | head -20

# Files over 1G, human-readable
find / -xdev -type f -size +1G -exec ls -lh {} + 2>/dev/null

# Large files that appeared in the last 24 hours (the likely cause of a sudden disk alert)
find / -xdev -type f -size +100M -mtime -1 -printf "%s\t%T+\t%p\n" 2>/dev/null | sort -rn

# Per-directory disk usage for directories containing large files
find /var -xdev -type f -size +50M -printf "%h\n" | sort | uniq -c | sort -rn | head -10

Finding Recently Modified Files

# Files changed in the last 15 minutes (incident triage — what just changed?)
find /etc /app /srv -type f -mmin -15 -ls 2>/dev/null

# Files changed in the last hour, sorted by modification time
find /app -type f -mmin -60 -printf "%T@ %Tc %p\n" | sort -rn

# Config files changed since the last deploy marker
touch -d "2026-03-19 02:00:00" /tmp/deploy_marker
find /etc -type f \( -name "*.conf" -o -name "*.yaml" -o -name "*.json" \) \
  -newer /tmp/deploy_marker -ls

# What changed today in /etc? (change-time, catches permission changes too)
find /etc -type f -ctime -1 -printf "%Cc  %p\n" | sort

Finding Orphaned Files

Gotcha: Orphaned files appear after deleting a user with userdel but not userdel -r. The UID in the inode no longer maps to /etc/passwd, so ls -l shows a raw number instead of a username. These files retain whatever permissions the deleted user had.

# Files with no valid owner (user deleted but files remain)
find / -xdev -nouser -ls 2>/dev/null

# Files with no valid group
find / -xdev -nogroup -ls 2>/dev/null

# Combine: orphaned files, skip virtual filesystems
find / -path /proc -prune -o -path /sys -prune -o -path /dev -prune -o \
  \( -nouser -o -nogroup \) -print 2>/dev/null

Finding World-Writable Files

# World-writable files (excluding known safe locations)
find / -path /proc -prune -o -path /sys -prune -o -path /dev -prune -o \
  -path /tmp -prune -o -path /var/tmp -prune -o \
  -type f -perm /002 -printf "%m %u:%g %p\n" 2>/dev/null

# World-writable directories without sticky bit (anyone can delete others' files)
find / -path /proc -prune -o -path /sys -prune -o \
  -type d -perm /002 ! -perm -1000 -ls 2>/dev/null

Common Scenarios

Disk Space Cleanup

# Find and compress logs older than 7 days
find /var/log/app -name "*.log" -type f -mtime +7 -exec gzip -v {} +

# Remove compressed logs older than 90 days
find /var/log -name "*.log.gz" -type f -mtime +90 -delete

# Remove core dumps
find / -xdev \( -name "core" -o -name "core.*" \) -type f -delete 2>/dev/null

# Clean up old package manager caches
find /var/cache/apt/archives -name "*.deb" -mtime +30 -delete
find /var/cache/yum -type f -mtime +30 -delete 2>/dev/null

# Remove stale pip wheel caches
find /tmp -maxdepth 2 -name "pip-*" -type d -mtime +7 -exec rm -rf {} +

# Report before deleting — show what would be cleaned and total size
find /var/log -name "*.log.gz" -mtime +90 -printf "%s\n" | \
  awk '{total += $1} END {printf "Files: %d, Total: %.1f MB\n", NR, total/1048576}'

Security Audits: SUID and SGID Binaries

# List all SUID binaries (runs as file owner, usually root)
find / -type f -perm -4000 -ls 2>/dev/null

# List all SGID binaries (runs with file group privileges)
find / -type f -perm -2000 -ls 2>/dev/null

# Both SUID and SGID
find / -type f \( -perm -4000 -o -perm -2000 \) -ls 2>/dev/null

# Save a baseline and diff against it later
find / -type f \( -perm -4000 -o -perm -2000 \) -printf "%m %u %g %p\n" \
  2>/dev/null | sort > /var/lib/suid_baseline.txt

# Next audit: compare
find / -type f \( -perm -4000 -o -perm -2000 \) -printf "%m %u %g %p\n" \
  2>/dev/null | sort | diff /var/lib/suid_baseline.txt - || echo "SUID/SGID binaries changed!"

# Find SUID binaries not owned by root (unusual — investigate)
find / -type f -perm -4000 ! -user root -ls 2>/dev/null

Log Rotation and Management

# Rotate application logs: compress anything older than 1 day, remove older than 30 days
find /var/log/myapp -name "*.log" -mtime +1 -exec gzip {} +
find /var/log/myapp -name "*.log.gz" -mtime +30 -delete

# Find log files that are open but deleted (consuming disk but invisible to ls)
# These show as (deleted) in /proc — find uses the filesystem, but lsof catches these
find /proc/*/fd -ls 2>/dev/null | grep '(deleted)' | grep '\.log'

# Find log files over 500M that may need rotation
find /var/log -type f -size +500M -printf "%s\t%p\n" | sort -rn | \
  awk -F'\t' '{printf "%.0f MB\t%s\n", $1/1048576, $2}'

# Truncate (not delete) large active log files — preserves the file handle
find /var/log -name "*.log" -size +1G -exec truncate -s 0 {} \;

Finding Stale Temp Files

# Temp files older than 7 days
find /tmp -type f -mtime +7 -ls

# Stale session files
find /var/lib/php/sessions -type f -mmin +1440 -delete

# Old build artifacts in /tmp
find /tmp -maxdepth 1 -name "tmp.*" -type d -mtime +3 -exec rm -rf {} +
find /tmp -maxdepth 1 -name "pip-*" -type d -mtime +3 -exec rm -rf {} +

# Stale lock files (PID files for processes that no longer exist)
find /var/run -name "*.pid" -type f -exec sh -c '
  for f; do
    pid=$(cat "$f" 2>/dev/null)
    if [ -n "$pid" ] && ! kill -0 "$pid" 2>/dev/null; then
      echo "STALE: $f (pid=$pid)"
    fi
  done
' _ {} +

Permission Remediation

# Fix web content: directories 755, files 644
find /var/www -type d -exec chmod 755 {} +
find /var/www -type f -exec chmod 644 {} +

# Fix ownership for deployed app
find /app -not -user appuser -exec chown appuser:appgroup {} +

# Make all shell scripts executable
find /opt/scripts -name "*.sh" -type f -exec chmod 755 {} +

# Remove execute bit from non-script files (data files that got +x somehow)
find /data -type f -perm /111 \
  ! -name "*.sh" ! -name "*.py" ! -name "*.pl" ! -name "*.rb" \
  -exec chmod -x {} +

# Fix a common Docker mistake: files owned by root inside container volumes
find /srv/app/data -uid 0 -exec chown 1000:1000 {} +

Under the hood: find -type l matches all symlinks, but -exec test -e {} follows the link and tests the target. When the target is gone, test -e fails. The ! inverts the result, giving you only broken links. This two-step logic is why there is no single -broken flag in POSIX find.

# Find symlinks whose targets do not exist
find /usr/local -type l ! -exec test -e {} \; -print

# Verbose: show what each broken link points to
find /usr/local -type l ! -exec test -e {} \; -printf "%p -> %l\n"

# Find and remove broken symlinks
find /usr/local/bin -type l ! -exec test -e {} \; -delete

# Check an entire system (excluding virtual filesystems)
find / -path /proc -prune -o -path /sys -prune -o \
  -type l ! -exec test -e {} \; -printf "BROKEN: %p -> %l\n" 2>/dev/null

Docker Layer Optimization

# Inside a container or build context: find the largest files
# Useful for reducing Docker image size
find / -xdev -type f -printf "%s\t%p\n" 2>/dev/null | sort -rn | head -30

# Find package manager caches left behind in the image
find / -xdev -type d \( \
  -name "apt" -path "*/cache/*" -o \
  -name "yum" -path "*/cache/*" -o \
  -name "pip" -path "*/cache/*" -o \
  -name "npm" -path "*/cache/*" \
  \) -exec du -sh {} + 2>/dev/null

# Find documentation and man pages (often removable in prod images)
find /usr/share/doc /usr/share/man /usr/share/info -type f 2>/dev/null | wc -l

# Find locale data (often bloating images)
find /usr/share/locale -type f 2>/dev/null | wc -l

# Find .pyc files and __pycache__ dirs in a Python image
find /usr/lib/python* /usr/local/lib/python* \
  \( -name "*.pyc" -o -name "__pycache__" \) -type f 2>/dev/null | wc -l

# Multi-stage build audit: compare what is in the final stage
# Run from host against an exported image filesystem
docker export $(docker create myimage:latest) | tar tf - | \
  sort > /tmp/image_files.txt
grep -cE '\.(pyc|pyo|cache)' /tmp/image_files.txt

Operational Patterns

Combining find with tar for Backup

# Back up all config files changed in the last 24 hours
find /etc -type f -mtime -1 -print0 | tar czf /backup/etc-changes-$(date +%F).tar.gz --null -T -

# Incremental backup using a timestamp file
touch /backup/.last_backup  # set this after each backup
find /data -newer /backup/.last_backup -type f -print0 | \
  tar czf /backup/incremental-$(date +%F_%H%M).tar.gz --null -T -
touch /backup/.last_backup

# Back up specific file types from a project
find /app -type f \( -name "*.py" -o -name "*.yaml" -o -name "*.conf" \) -print0 | \
  tar czf /backup/app-config-$(date +%F).tar.gz --null -T -

# Create a tarball of files larger than 100M (for offloading to cold storage)
find /data/archive -type f -size +100M -print0 | \
  tar czf /cold-storage/large-files-$(date +%F).tar.gz --null -T -

find + rsync

# Sync only recently changed files to a remote host
find /app/uploads -type f -mtime -1 -printf "%P\n" > /tmp/sync_list.txt
rsync -av --files-from=/tmp/sync_list.txt /app/uploads/ remote:/app/uploads/

# Rsync with exclude patterns derived from find
find /app -name "node_modules" -type d -printf "--exclude=%P\n" > /tmp/excludes.txt
rsync -av $(cat /tmp/excludes.txt) /app/ remote:/app/

find-Based Cron Cleanup Jobs

# /etc/cron.daily/cleanup-tmp
#!/bin/bash
# Remove temp files older than 7 days
find /tmp -type f -mtime +7 -delete 2>/dev/null
# Remove empty temp directories older than 3 days
find /tmp -mindepth 1 -type d -empty -mtime +3 -delete 2>/dev/null
# Compress application logs older than 1 day
find /var/log/app -name "*.log" -mtime +1 -exec gzip {} + 2>/dev/null
# Remove compressed logs older than 60 days
find /var/log/app -name "*.log.gz" -mtime +60 -delete 2>/dev/null
# Log what we did
echo "$(date): cleanup completed" >> /var/log/cleanup-cron.log
# /etc/cron.hourly/cleanup-sessions
#!/bin/bash
# Remove stale PHP sessions older than 24 hours
find /var/lib/php/sessions -type f -mmin +1440 -delete 2>/dev/null
# Remove stale upload chunks
find /var/www/uploads/tmp -type f -mmin +360 -delete 2>/dev/null
# /etc/cron.weekly/audit-permissions
#!/bin/bash
AUDIT_DIR="/var/lib/security-audit"
mkdir -p "$AUDIT_DIR"

# Snapshot SUID/SGID binaries
find / -path /proc -prune -o -path /sys -prune -o \
  -type f \( -perm -4000 -o -perm -2000 \) -printf "%m %u %g %p\n" \
  2>/dev/null | sort > "$AUDIT_DIR/suid_sgid_current.txt"

# Diff against baseline
if [ -f "$AUDIT_DIR/suid_sgid_baseline.txt" ]; then
  diff "$AUDIT_DIR/suid_sgid_baseline.txt" "$AUDIT_DIR/suid_sgid_current.txt" > "$AUDIT_DIR/suid_diff.txt"
  if [ -s "$AUDIT_DIR/suid_diff.txt" ]; then
    echo "SUID/SGID binary changes detected" | mail -s "Security Audit Alert" ops@example.com
  fi
fi

# World-writable files outside temp dirs
find / -path /proc -prune -o -path /sys -prune -o -path /dev -prune -o \
  -path /tmp -prune -o -path /var/tmp -prune -o \
  -type f -perm /002 -printf "%m %u %p\n" 2>/dev/null > "$AUDIT_DIR/world_writable.txt"

Counting and Reporting

# Count files by extension in a project
find /app -type f -printf "%f\n" | sed 's/.*\.//' | sort | uniq -c | sort -rn | head -20

# Count files by directory (find hot spots)
find /var/log -type f -printf "%h\n" | sort | uniq -c | sort -rn | head -10

# Sum total size of files matching a pattern
find /data -name "*.csv" -type f -printf "%s\n" | \
  awk '{t+=$1} END {printf "%.2f GB in %d files\n", t/1073741824, NR}'

# File age distribution: how many files per age bracket
find /data -type f -printf "%T@\n" | awk -v now="$(date +%s)" '{
  age = (now - $1) / 86400
  if (age < 1) bucket="< 1 day"
  else if (age < 7) bucket="1-7 days"
  else if (age < 30) bucket="7-30 days"
  else if (age < 90) bucket="30-90 days"
  else if (age < 365) bucket="90-365 days"
  else bucket="> 1 year"
  counts[bucket]++
} END {
  for (b in counts) printf "%6d  %s\n", counts[b], b
}' | sort -rn

Emergency: Find What Is Filling the Disk Right Now

# Continuous watch: files growing in the last 2 minutes
watch -n 10 'find / -xdev -type f -mmin -2 -printf "%T+ %s %p\n" 2>/dev/null | sort -r | head -20'

Integration: find + inotifywait

# Watch for new files and process them (inotify for real-time, find as fallback)
inotifywait -m -r -e create /data/incoming --format '%w%f' | while read path; do
  echo "New file: $path"
done

# Periodically catch anything inotify missed
find /data/incoming -type f -mmin -5 -exec process_file.sh {} +