Python Automation Scripts Every Developer Should Know
Discover essential Python automation scripts that can save hours of manual work. From file management and data processing to API integration and web scraping, these practical examples will boost your productivity.
Why Python is ideal for automation
Automation is essentially βturn repeated manual steps into reliable code.β Python is a favorite for this because itβs readable, fast to iterate, and has libraries for almost every kind of automation: file operations, data pipelines, APIs, browsers, and system tooling.
The scripts below are designed to be practical. Copy them into a `scripts/` folder, parameterize them, then schedule them (Task Scheduler/cron) to run on a cadence.
File management scripts
File automation pays off instantly: renaming, organizing, cleaning old build artifacts, and creating backups before batch changes.
1) Batch rename files by pattern
Rename files safely using a preview step and a dry-run flag.
from __future__ import annotations
import re
from pathlib import Path
def batch_rename(directory: str, pattern: str, replacement: str, dry_run: bool = True) -> None:
root = Path(directory)
rx = re.compile(pattern)
candidates: list[tuple[Path, Path]] = []
for p in root.iterdir():
if not p.is_file():
continue
new_name = rx.sub(replacement, p.name)
if new_name != p.name:
candidates.append((p, p.with_name(new_name)))
for src, dst in candidates:
print(f"{src.name} -> {dst.name}")
if dry_run:
print("Dry-run enabled. No files were renamed.")
return
for src, dst in candidates:
src.rename(dst)
if __name__ == "__main__":
batch_rename("./downloads", r"\s+", "_", dry_run=True)2) Organize a folder by file extension
Great for Downloads/screenshots folders that get messy fast.
from pathlib import Path
import shutil
def organize_by_extension(directory: str) -> None:
root = Path(directory)
for p in root.iterdir():
if not p.is_file():
continue
ext = p.suffix.lower().lstrip(".") or "no-extension"
target_dir = root / ext
target_dir.mkdir(exist_ok=True)
shutil.move(str(p), str(target_dir / p.name))
if __name__ == "__main__":
organize_by_extension("./downloads")3) Clean up old files
Delete files older than N days (and optionally empty directories).
import time
from pathlib import Path
def cleanup_old_files(directory: str, days_old: int = 30, remove_empty_dirs: bool = True) -> None:
root = Path(directory)
cutoff = time.time() - (days_old * 86400)
for p in root.rglob("*"):
if p.is_file() and p.stat().st_mtime < cutoff:
p.unlink(missing_ok=True)
if remove_empty_dirs:
for d in sorted([x for x in root.rglob("*") if x.is_dir()], reverse=True):
if not any(d.iterdir()):
d.rmdir()
if __name__ == "__main__":
cleanup_old_files("./temp", days_old=7)Data processing scripts
Data processing automation is where Python shines: validating CSVs, normalizing columns, generating reports, and preparing datasets for dashboards.
4) Validate and normalize CSV files
Enforce required columns, trim whitespace, and write out a clean version.
import csv
from pathlib import Path
REQUIRED = {"email", "created_at"}
def normalize_csv(input_file: str, output_file: str) -> None:
src = Path(input_file)
dst = Path(output_file)
with src.open("r", newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
if not reader.fieldnames:
raise ValueError("CSV has no header")
missing = REQUIRED - set([c.strip() for c in reader.fieldnames])
if missing:
raise ValueError(f"Missing required columns: {sorted(missing)}")
rows = []
for row in reader:
cleaned = {k.strip(): (v.strip() if isinstance(v, str) else v) for k, v in row.items()}
rows.append(cleaned)
with dst.open("w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=reader.fieldnames)
writer.writeheader()
writer.writerows(rows)
if __name__ == "__main__":
normalize_csv("./raw/users.csv", "./clean/users.clean.csv")5) Generate a quick HTML report
Turn a CSV into a simple HTML summary to share internally.
import csv
from pathlib import Path
def html_report(csv_file: str, out_file: str, max_rows: int = 25) -> None:
src = Path(csv_file)
dst = Path(out_file)
with src.open("r", newline="", encoding="utf-8") as f:
reader = csv.reader(f)
rows = list(reader)
header, data = rows[0], rows[1:max_rows + 1]
html = ["<html><body>", f"<h2>Report: {src.name}</h2>", "<table border='1' cellpadding='6'>"]
html.append("<tr>" + "".join([f"<th>{h}</th>" for h in header]) + "</tr>")
for r in data:
html.append("<tr>" + "".join([f"<td>{c}</td>" for c in r]) + "</tr>")
html += ["</table>", "</body></html>"]
dst.write_text("\n".join(html), encoding="utf-8")
if __name__ == "__main__":
html_report("./clean/users.clean.csv", "./reports/users.html")API integration scripts
APIs are automation gold. They let you sync systems, generate tickets, trigger workflows, or build internal bots. Below is a small, reusable client pattern you can expand.
6) Simple REST client with retries
Good defaults: timeouts, retries, and centralized error handling.
from __future__ import annotations
import time
import requests
class RestClient:
def __init__(self, base_url: str, token: str | None = None, timeout_s: int = 20):
self.base_url = base_url.rstrip("/")
self.timeout_s = timeout_s
self.session = requests.Session()
if token:
self.session.headers.update({"Authorization": f"Bearer {token}"})
def get(self, path: str, retries: int = 3):
url = f"{self.base_url}/{path.lstrip('/')}"
for i in range(retries):
try:
r = self.session.get(url, timeout=self.timeout_s)
r.raise_for_status()
return r.json()
except requests.RequestException:
if i == retries - 1:
raise
time.sleep(2 ** i)
if __name__ == "__main__":
api = RestClient("https://api.github.com")
data = api.get("repos/python/cpython")
print(data["full_name"], data["stargazers_count"]) 7) Sync API data to a local JSON file
Useful for caching responses for reports and dashboards.
import json
from datetime import datetime
from pathlib import Path
def write_snapshot(payload: dict, out_dir: str) -> Path:
root = Path(out_dir)
root.mkdir(parents=True, exist_ok=True)
stamp = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
out = root / f"snapshot_{stamp}.json"
out.write_text(json.dumps(payload, indent=2), encoding="utf-8")
return out
if __name__ == "__main__":
payload = {"status": "ok", "items": [1, 2, 3]}
print(write_snapshot(payload, "./snapshots"))Web scraping scripts
Scraping is powerful, but be responsible: respect `robots.txt`, terms of service, and rate limits. When possible, prefer an official API.
8) Extract titles from a page (BeautifulSoup)
Basic pattern: fetch HTML, parse, extract, store results.
import requests
from bs4 import BeautifulSoup
def extract_titles(url: str) -> list[str]:
r = requests.get(url, timeout=20, headers={"User-Agent": "CyberGleanBot/1.0"})
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
return [h.get_text(strip=True) for h in soup.select("h1, h2")][:25]
if __name__ == "__main__":
for t in extract_titles("https://www.python.org/"):
print(t)9) Price-check skeleton (alert hook)
Keep selectors isolated; websites change frequently.
import re
import requests
from bs4 import BeautifulSoup
def parse_price(text: str) -> float | None:
m = re.search(r"(\d+[\.,]?\d*)", text.replace(",", ""))
return float(m.group(1)) if m else None
def get_price(url: str, selector: str) -> float | None:
r = requests.get(url, timeout=20, headers={"User-Agent": "CyberGleanBot/1.0"})
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
node = soup.select_one(selector)
return parse_price(node.get_text(" ", strip=True)) if node else None
if __name__ == "__main__":
price = get_price("https://example.com", ".price")
print("Price:", price)Dev/ops automation scripts
Developer automation often means making local workflows repeatable: cleaning build outputs, running checks, generating changelogs, or orchestrating commands.
10) Run commands and capture output
Useful for glue scripts (format + lint + tests) with clean logs.
import subprocess
def run(cmd: list[str]) -> None:
print("$", " ".join(cmd))
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.stdout:
print(proc.stdout)
if proc.returncode != 0:
if proc.stderr:
print(proc.stderr)
raise SystemExit(proc.returncode)
if __name__ == "__main__":
run(["python", "--version"]) 11) Schedule tasks (Windows Task Scheduler / cron)
On Windows, use Task Scheduler; on Linux/macOS, use cron. Inside Python, you can also use a scheduler loop for simple cases.
import time
from datetime import datetime
def job() -> None:
print("Job ran at", datetime.now().isoformat(timespec="seconds"))
if __name__ == "__main__":
while True:
job()
time.sleep(60 * 60) # hourly| Category | Best libraries | Where it shines | Risk level |
|---|---|---|---|
| File management | pathlib, shutil, re | Renames, cleanups, organizing assets | Low (add dry-run + backups) |
| Data processing | csv, pandas | Transforms, normalization, reporting | Medium (validate + version outputs) |
| API integration | requests | Sync systems, automate workflows | Medium (timeouts + retries) |
| Web scraping | beautifulsoup4, playwright/selenium | Extract data when no API exists | High (TOS + site changes) |
| Dev/ops scripting | subprocess, pathlib | Repeatable local workflows | Low (log everything) |
Best practices + hardening
Automation scripts become production systems faster than you expect. A little structure makes them safer and easier to share.
Make scripts safe by default
12) Minimal logging setup
import logging
def setup_logging() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(message)s",
handlers=[logging.StreamHandler(), logging.FileHandler("automation.log")],
)
if __name__ == "__main__":
setup_logging()
logging.info("Automation started")Want more automation ideas?
If you want help turning a recurring manual workflow into a robust internal tool, CyberGlean can help you design, implement, and operationalize it.
Talk to CyberGleanπ Suggested Articles
Latest Trends in Web Automation 2025: AI-Driven Revolution
Discover how artificial intelligence is transforming web automation with intelligent data extraction.
Read ArticleWhy Custom Software is Essential for Business Growth
Explore the critical advantages of tailored software solutions over off-the-shelf products.
Read ArticleWeb Development Trends 2025: Complete Guide to Modern Web Technologies
Discover cutting-edge technologies, frameworks, and best practices shaping the future of web development.
Read Article