Network Automation — Street-Level Ops¶
Quick Diagnosis Commands¶
# Test NAPALM connectivity to a device
python3 -c "
from napalm import get_network_driver
d = get_network_driver('eos')('192.168.1.1', 'netops', 'pass')
d.open(); print(d.get_facts()); d.close()
"
# Test Netmiko connectivity
python3 -c "
from netmiko import ConnectHandler
c = ConnectHandler(device_type='cisco_ios', host='10.0.0.1', username='netops', password='pass')
print(c.send_command('show version | include Software'))
c.disconnect()
"
# Verify NETCONF is enabled and reachable
nmap -p 830 10.0.0.1
ssh -p 830 -o StrictHostKeyChecking=no netops@10.0.0.1 netconf
# Check gNMI capabilities
gnmic capabilities --address spine1:6030 -u netops -p pass --insecure
# Run Nornir task against a single host for testing
python3 -c "
from nornir import InitNornir
from nornir_utils.plugins.functions import print_result
from nornir_napalm.plugins.tasks import napalm_get
nr = InitNornir(config_file='config.yaml')
r = nr.filter(name='leaf1').run(task=napalm_get, getters=['facts'])
print_result(r)
"
Gotcha: NAPALM compare_config() Returns Empty but Config Differs¶
Rule: The diff is computed against the candidate config buffer, not against the running config. If you call compare_config() before loading a candidate, the buffer is empty and the diff is empty — even if the running config is different from your intent.
# Wrong — will always return empty diff
device.open()
diff = device.compare_config() # No candidate loaded yet
print(diff) # ""
# Correct — load first, then diff
device.load_merge_candidate(filename="changes/leaf1.conf")
diff = device.compare_config()
if diff:
print(diff)
device.commit_config()
else:
device.discard_config()
print("No changes needed")
One-liner: NAPALM's
compare_config()diffing only works afterload_merge_candidate()orload_replace_candidate(). No load, no diff. This is the single most common NAPALM mistake -- people call compare before load and assume the running config matches their intent because the diff is empty.
Gotcha: Netmiko send_config_set Does Not Save¶
Commands pushed via send_config_set are in the running config but not saved to NVRAM/startup-config. A device reload will lose all changes.
# Always save after pushing config
conn.send_config_set(config_commands)
conn.save_config() # sends 'write memory' or 'copy running-config startup-config'
Gotcha: Nornir Results Are Not Exceptions¶
When a Nornir task fails, it does not raise an exception in the calling code — it stores the exception in the result object. Your script appears to succeed even if 50% of devices failed.
results = nr.run(task=backup_configs)
# WRONG — no error visibility
print("Done")
# CORRECT — always check for failures
failed = [host for host, result in results.items() if result.failed]
if failed:
print(f"FAILED on {len(failed)} hosts: {failed}")
for host in failed:
print(f" {host}: {results[host].exception}")
sys.exit(1)
Gotcha: SPF 10-Lookup Limit on Ansible include: Chains¶
When using include: in Ansible network automation to include external SPF records via API lookups — actually, this is about NETCONF namespaces. If your NETCONF XML filter omits the namespace, you get an empty result silently:
# Wrong — missing namespace, returns empty
filter_xml = "<filter><interfaces/></filter>"
# Correct — include the YANG module namespace
filter_xml = """
<filter type="subtree">
<interfaces xmlns="urn:ietf:params:xml:ns:yang:ietf-interfaces"/>
</filter>
"""
result = m.get_config(source="running", filter=filter_xml)
Gotcha: NETCONF returns empty results (not errors) when the XML namespace is missing or wrong. This is the silent-failure equivalent of a typo in an API call returning 200 with an empty body. Always check that your filter XML includes the correct
xmlnsfor the YANG module you are querying.
Pattern: Pre/Post Validation Workflow¶
Always capture state before and after a change. Automate the comparison:
from napalm import get_network_driver
import json, sys
from datetime import datetime
def main():
driver = get_network_driver("eos")
device = driver("spine1.lab", "netops", "pass")
device.open()
# PRE state
pre = {
"bgp_peers": set(device.get_bgp_neighbors()
.get("global", {}).get("peers", {}).keys()),
"up_interfaces": {k for k, v in device.get_interfaces().items() if v["is_up"]},
"route_count": len(device.get_route_to("0.0.0.0/0")),
}
print(f"PRE: {len(pre['bgp_peers'])} BGP peers, "
f"{len(pre['up_interfaces'])} up interfaces")
# CHANGE
device.load_merge_candidate(filename="change.conf")
diff = device.compare_config()
print("Diff:\n", diff)
input("Press enter to commit, Ctrl-C to abort...")
device.commit_config()
import time; time.sleep(10) # wait for convergence
# POST state
post = {
"bgp_peers": set(device.get_bgp_neighbors()
.get("global", {}).get("peers", {}).keys()),
"up_interfaces": {k for k, v in device.get_interfaces().items() if v["is_up"]},
}
lost_peers = pre["bgp_peers"] - post["bgp_peers"]
lost_intfs = pre["up_interfaces"] - post["up_interfaces"]
if lost_peers or lost_intfs:
print(f"REGRESSION: lost BGP peers={lost_peers}, lost interfaces={lost_intfs}")
print("Rolling back...")
device.rollback()
sys.exit(1)
print("POST: All BGP peers and interfaces stable. Change accepted.")
device.close()
Pattern: Nornir Inventory from Existing Sources¶
If your inventory already exists in Netbox or another CMDB, use a custom inventory plugin instead of maintaining hosts.yaml manually:
# Custom inventory plugin that reads from Netbox API
from nornir.core.inventory import Inventory, Host, Group, Groups, Hosts, Defaults
class NetboxInventory:
def load(self) -> Inventory:
import requests
r = requests.get(
"https://netbox.corp/api/dcim/devices/?status=active&tag=automation",
headers={"Authorization": "Token abc123"},
)
devices = r.json()["results"]
hosts = Hosts()
for d in devices:
hosts[d["name"]] = Host(
name=d["name"],
hostname=d["primary_ip"]["address"].split("/")[0],
platform=d["platform"]["slug"].replace("-", "_"),
data={
"site": d["site"]["slug"],
"role": d["device_role"]["slug"],
},
)
return Inventory(hosts=hosts, groups=Groups(), defaults=Defaults())
Scenario: Push Config to 200 Switches During Maintenance Window¶
from nornir import InitNornir
from nornir.core.filter import F
from nornir_netmiko.tasks import netmiko_send_config
from nornir_utils.plugins.functions import print_result
import sys
nr = InitNornir(config_file="config.yaml")
# Filter to target devices only
target = nr.filter(F(groups__contains="access-layer") & F(data__site="dc-east"))
print(f"Targeting {len(target.inventory.hosts)} devices")
# Dry run first: show diff on first 3 devices
sample = nr.filter(name__in=["access-sw-01", "access-sw-02", "access-sw-03"])
sample.run(task=napalm_get, getters=["config"]) # capture current state
# Push config
results = target.run(
task=netmiko_send_config,
config_commands=[
"ip domain-name corp.example.com",
"ntp server 10.0.0.1 prefer",
"ntp server 10.0.0.2",
"logging host 10.100.0.50",
],
)
# Report
failed = {h: r for h, r in results.items() if r.failed}
succeeded = {h: r for h, r in results.items() if not r.failed}
print(f"Succeeded: {len(succeeded)} Failed: {len(failed)}")
if failed:
for host, result in failed.items():
print(f" FAILED {host}: {result.exception}")
sys.exit(1)
# Save configs on all succeeded devices
nr.filter(name__in=list(succeeded.keys())).run(
task=netmiko_send_config,
config_commands=["do write memory"],
)
Emergency: Device Locked After Partial Config Push¶
If automation fails mid-push and leaves the device in config mode or with a pending candidate config:
NX-OS:
# Check if config session is locked
show configuration session
# Abort any pending session
configure session test123
abort
# Or kill the config lock
show system internal cfg-mgr lock
debug logfile cfgmgr_lock 10000
# Reload is the nuclear option — warn on-call first
IOS:
# Check for config lock (IOS config replace lock)
show configuration lock
# Release it
configure terminal
abort
NAPALM — discard uncommitted candidate:
Junos — rollback:
Emergency: Wrong Config Pushed to Production — Rollback¶
# NAPALM rollback (if device supports it)
device.open()
device.rollback()
device.close()
# Netmiko — manually push previous config
with ConnectHandler(**device_params) as conn:
with open("backups/spine1_20260317.txt") as f:
old_config = f.read()
conn.send_config_from_file(config_file="backups/spine1_20260317.txt")
conn.save_config()
Useful One-Liners¶
# Get IOS version from all routers in parallel with Nornir
python3 -c "
from nornir import InitNornir
from nornir_netmiko.tasks import netmiko_send_command
nr = InitNornir(config_file='config.yaml')
r = nr.run(task=netmiko_send_command, command_string='show version | include Software')
for h, result in r.items():
print(f'{h}: {result[0].result.strip()}')
"
# Check BGP summary on all spines
python3 -c "
from nornir import InitNornir; from nornir.core.filter import F
from nornir_napalm.plugins.tasks import napalm_get
import json
nr = InitNornir(config_file='config.yaml')
r = nr.filter(F(groups__contains='spine')).run(task=napalm_get, getters=['bgp_neighbors'])
for h, result in r.items():
peers = result[0].result['bgp_neighbors'].get('global', {}).get('peers', {})
up = sum(1 for p in peers.values() if p['is_up'])
print(f'{h}: {up}/{len(peers)} BGP peers up')
"
# Backup all device configs with Nornir
python3 -c "
from nornir import InitNornir
from nornir_napalm.plugins.tasks import napalm_get
from pathlib import Path; from datetime import datetime
nr = InitNornir(config_file='config.yaml')
def backup(task):
r = task.run(task=napalm_get, getters=['config'])
cfg = r[0].result['config']['running']
Path(f'backups/{task.host.name}_{datetime.now():%Y%m%d}.txt').write_text(cfg)
nr.run(task=backup)
"
# Find all Netmiko-supported device types
python3 -c "from netmiko import platforms; print('\n'.join(sorted(platforms)))"
# Parse show output with TextFSM manually
python3 -c "
import textfsm, sys
with open('/usr/lib/python3/dist-packages/ntc_templates/templates/cisco_ios_show_ip_interface_brief.textfsm') as f:
parser = textfsm.TextFSM(f)
output = open('show_ip_int_brief.txt').read()
result = parser.ParseText(output)
for row in result:
print(dict(zip(parser.header, row)))
"