Skip to content

Network Automation — Street-Level Ops

Quick Diagnosis Commands

# Test NAPALM connectivity to a device
python3 -c "
from napalm import get_network_driver
d = get_network_driver('eos')('192.168.1.1', 'netops', 'pass')
d.open(); print(d.get_facts()); d.close()
"

# Test Netmiko connectivity
python3 -c "
from netmiko import ConnectHandler
c = ConnectHandler(device_type='cisco_ios', host='10.0.0.1', username='netops', password='pass')
print(c.send_command('show version | include Software'))
c.disconnect()
"

# Verify NETCONF is enabled and reachable
nmap -p 830 10.0.0.1
ssh -p 830 -o StrictHostKeyChecking=no netops@10.0.0.1 netconf

# Check gNMI capabilities
gnmic capabilities --address spine1:6030 -u netops -p pass --insecure

# Run Nornir task against a single host for testing
python3 -c "
from nornir import InitNornir
from nornir_utils.plugins.functions import print_result
from nornir_napalm.plugins.tasks import napalm_get
nr = InitNornir(config_file='config.yaml')
r = nr.filter(name='leaf1').run(task=napalm_get, getters=['facts'])
print_result(r)
"

Gotcha: NAPALM compare_config() Returns Empty but Config Differs

Rule: The diff is computed against the candidate config buffer, not against the running config. If you call compare_config() before loading a candidate, the buffer is empty and the diff is empty — even if the running config is different from your intent.

# Wrong — will always return empty diff
device.open()
diff = device.compare_config()  # No candidate loaded yet
print(diff)  # ""

# Correct — load first, then diff
device.load_merge_candidate(filename="changes/leaf1.conf")
diff = device.compare_config()
if diff:
    print(diff)
    device.commit_config()
else:
    device.discard_config()
    print("No changes needed")

One-liner: NAPALM's compare_config() diffing only works after load_merge_candidate() or load_replace_candidate(). No load, no diff. This is the single most common NAPALM mistake -- people call compare before load and assume the running config matches their intent because the diff is empty.

Gotcha: Netmiko send_config_set Does Not Save

Commands pushed via send_config_set are in the running config but not saved to NVRAM/startup-config. A device reload will lose all changes.

# Always save after pushing config
conn.send_config_set(config_commands)
conn.save_config()  # sends 'write memory' or 'copy running-config startup-config'

Gotcha: Nornir Results Are Not Exceptions

When a Nornir task fails, it does not raise an exception in the calling code — it stores the exception in the result object. Your script appears to succeed even if 50% of devices failed.

results = nr.run(task=backup_configs)

# WRONG — no error visibility
print("Done")

# CORRECT — always check for failures
failed = [host for host, result in results.items() if result.failed]
if failed:
    print(f"FAILED on {len(failed)} hosts: {failed}")
    for host in failed:
        print(f"  {host}: {results[host].exception}")
    sys.exit(1)

Gotcha: SPF 10-Lookup Limit on Ansible include: Chains

When using include: in Ansible network automation to include external SPF records via API lookups — actually, this is about NETCONF namespaces. If your NETCONF XML filter omits the namespace, you get an empty result silently:

# Wrong — missing namespace, returns empty
filter_xml = "<filter><interfaces/></filter>"

# Correct — include the YANG module namespace
filter_xml = """
<filter type="subtree">
  <interfaces xmlns="urn:ietf:params:xml:ns:yang:ietf-interfaces"/>
</filter>
"""
result = m.get_config(source="running", filter=filter_xml)

Gotcha: NETCONF returns empty results (not errors) when the XML namespace is missing or wrong. This is the silent-failure equivalent of a typo in an API call returning 200 with an empty body. Always check that your filter XML includes the correct xmlns for the YANG module you are querying.

Pattern: Pre/Post Validation Workflow

Always capture state before and after a change. Automate the comparison:

from napalm import get_network_driver
import json, sys
from datetime import datetime

def main():
    driver = get_network_driver("eos")
    device = driver("spine1.lab", "netops", "pass")
    device.open()

    # PRE state
    pre = {
        "bgp_peers": set(device.get_bgp_neighbors()
                         .get("global", {}).get("peers", {}).keys()),
        "up_interfaces": {k for k, v in device.get_interfaces().items() if v["is_up"]},
        "route_count": len(device.get_route_to("0.0.0.0/0")),
    }
    print(f"PRE: {len(pre['bgp_peers'])} BGP peers, "
          f"{len(pre['up_interfaces'])} up interfaces")

    # CHANGE
    device.load_merge_candidate(filename="change.conf")
    diff = device.compare_config()
    print("Diff:\n", diff)
    input("Press enter to commit, Ctrl-C to abort...")
    device.commit_config()

    import time; time.sleep(10)  # wait for convergence

    # POST state
    post = {
        "bgp_peers": set(device.get_bgp_neighbors()
                         .get("global", {}).get("peers", {}).keys()),
        "up_interfaces": {k for k, v in device.get_interfaces().items() if v["is_up"]},
    }

    lost_peers = pre["bgp_peers"] - post["bgp_peers"]
    lost_intfs = pre["up_interfaces"] - post["up_interfaces"]

    if lost_peers or lost_intfs:
        print(f"REGRESSION: lost BGP peers={lost_peers}, lost interfaces={lost_intfs}")
        print("Rolling back...")
        device.rollback()
        sys.exit(1)

    print("POST: All BGP peers and interfaces stable. Change accepted.")
    device.close()

Pattern: Nornir Inventory from Existing Sources

If your inventory already exists in Netbox or another CMDB, use a custom inventory plugin instead of maintaining hosts.yaml manually:

# Custom inventory plugin that reads from Netbox API
from nornir.core.inventory import Inventory, Host, Group, Groups, Hosts, Defaults

class NetboxInventory:
    def load(self) -> Inventory:
        import requests
        r = requests.get(
            "https://netbox.corp/api/dcim/devices/?status=active&tag=automation",
            headers={"Authorization": "Token abc123"},
        )
        devices = r.json()["results"]

        hosts = Hosts()
        for d in devices:
            hosts[d["name"]] = Host(
                name=d["name"],
                hostname=d["primary_ip"]["address"].split("/")[0],
                platform=d["platform"]["slug"].replace("-", "_"),
                data={
                    "site": d["site"]["slug"],
                    "role": d["device_role"]["slug"],
                },
            )
        return Inventory(hosts=hosts, groups=Groups(), defaults=Defaults())

Scenario: Push Config to 200 Switches During Maintenance Window

from nornir import InitNornir
from nornir.core.filter import F
from nornir_netmiko.tasks import netmiko_send_config
from nornir_utils.plugins.functions import print_result
import sys

nr = InitNornir(config_file="config.yaml")

# Filter to target devices only
target = nr.filter(F(groups__contains="access-layer") & F(data__site="dc-east"))
print(f"Targeting {len(target.inventory.hosts)} devices")

# Dry run first: show diff on first 3 devices
sample = nr.filter(name__in=["access-sw-01", "access-sw-02", "access-sw-03"])
sample.run(task=napalm_get, getters=["config"])  # capture current state

# Push config
results = target.run(
    task=netmiko_send_config,
    config_commands=[
        "ip domain-name corp.example.com",
        "ntp server 10.0.0.1 prefer",
        "ntp server 10.0.0.2",
        "logging host 10.100.0.50",
    ],
)

# Report
failed = {h: r for h, r in results.items() if r.failed}
succeeded = {h: r for h, r in results.items() if not r.failed}
print(f"Succeeded: {len(succeeded)}  Failed: {len(failed)}")
if failed:
    for host, result in failed.items():
        print(f"  FAILED {host}: {result.exception}")
    sys.exit(1)

# Save configs on all succeeded devices
nr.filter(name__in=list(succeeded.keys())).run(
    task=netmiko_send_config,
    config_commands=["do write memory"],
)

Emergency: Device Locked After Partial Config Push

If automation fails mid-push and leaves the device in config mode or with a pending candidate config:

NX-OS:

# Check if config session is locked
show configuration session
# Abort any pending session
configure session test123
  abort

# Or kill the config lock
show system internal cfg-mgr lock
debug logfile cfgmgr_lock 10000
# Reload is the nuclear option — warn on-call first

IOS:

# Check for config lock (IOS config replace lock)
show configuration lock
# Release it
configure terminal
abort

NAPALM — discard uncommitted candidate:

device.open()
device.discard_config()  # throws away any pending candidate
device.close()

Junos — rollback:

# From CLI
configure
rollback 1    # revert to last committed config
commit

Emergency: Wrong Config Pushed to Production — Rollback

# NAPALM rollback (if device supports it)
device.open()
device.rollback()
device.close()

# Netmiko — manually push previous config
with ConnectHandler(**device_params) as conn:
    with open("backups/spine1_20260317.txt") as f:
        old_config = f.read()
    conn.send_config_from_file(config_file="backups/spine1_20260317.txt")
    conn.save_config()

Useful One-Liners

# Get IOS version from all routers in parallel with Nornir
python3 -c "
from nornir import InitNornir
from nornir_netmiko.tasks import netmiko_send_command
nr = InitNornir(config_file='config.yaml')
r = nr.run(task=netmiko_send_command, command_string='show version | include Software')
for h, result in r.items():
    print(f'{h}: {result[0].result.strip()}')
"

# Check BGP summary on all spines
python3 -c "
from nornir import InitNornir; from nornir.core.filter import F
from nornir_napalm.plugins.tasks import napalm_get
import json
nr = InitNornir(config_file='config.yaml')
r = nr.filter(F(groups__contains='spine')).run(task=napalm_get, getters=['bgp_neighbors'])
for h, result in r.items():
    peers = result[0].result['bgp_neighbors'].get('global', {}).get('peers', {})
    up = sum(1 for p in peers.values() if p['is_up'])
    print(f'{h}: {up}/{len(peers)} BGP peers up')
"

# Backup all device configs with Nornir
python3 -c "
from nornir import InitNornir
from nornir_napalm.plugins.tasks import napalm_get
from pathlib import Path; from datetime import datetime
nr = InitNornir(config_file='config.yaml')
def backup(task):
    r = task.run(task=napalm_get, getters=['config'])
    cfg = r[0].result['config']['running']
    Path(f'backups/{task.host.name}_{datetime.now():%Y%m%d}.txt').write_text(cfg)
nr.run(task=backup)
"

# Find all Netmiko-supported device types
python3 -c "from netmiko import platforms; print('\n'.join(sorted(platforms)))"

# Parse show output with TextFSM manually
python3 -c "
import textfsm, sys
with open('/usr/lib/python3/dist-packages/ntc_templates/templates/cisco_ios_show_ip_interface_brief.textfsm') as f:
    parser = textfsm.TextFSM(f)
output = open('show_ip_int_brief.txt').read()
result = parser.ParseText(output)
for row in result:
    print(dict(zip(parser.header, row)))
"