Add home-security-agent with PostgreSQL persistence for dashboard.

The autonomous agent writes all observations to agent.* tables consumed by Homelab Command on port 8765.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
mo
2026-05-17 21:57:16 +02:00
parent 43c4ed7a6d
commit 02b1d155d4
17 changed files with 1024 additions and 0 deletions
@@ -0,0 +1 @@
# EL-KADI Home Security Agent
+243
View File
@@ -0,0 +1,243 @@
"""Agentisch brein — LLM met tools, of regel-fallback zonder API."""
import json
import os
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional
from agent.observer import (
ObservationReport,
load_yaml,
probe_http,
probe_proxmox,
probe_tcp,
)
from agent.state import recent_incidents
@dataclass
class AgentDecision:
alert: bool
severity: str
title: str
body: str
fingerprint: str
actions: List[str]
TOOLS_SCHEMA = [
{
"type": "function",
"function": {
"name": "probe_tcp",
"description": "Test of een TCP-poort open is op een host",
"parameters": {
"type": "object",
"properties": {
"host": {"type": "string"},
"port": {"type": "integer"},
},
"required": ["host", "port"],
},
},
},
{
"type": "function",
"function": {
"name": "probe_http",
"description": "HTTP GET check op een URL",
"parameters": {
"type": "object",
"properties": {
"url": {"type": "string"},
"insecure_tls": {"type": "boolean"},
},
"required": ["url"],
},
},
},
{
"type": "function",
"function": {
"name": "probe_proxmox",
"description": "Check Proxmox web UI bereikbaarheid",
"parameters": {
"type": "object",
"properties": {"host": {"type": "string"}},
"required": ["host"],
},
},
},
]
def _run_tool(name: str, args: dict) -> str:
if name == "probe_tcp":
ok, detail = probe_tcp(args["host"], int(args["port"]))
return json.dumps({"ok": ok, "detail": detail})
if name == "probe_http":
ok, detail = probe_http(args["url"], insecure=bool(args.get("insecure_tls")))
return json.dumps({"ok": ok, "detail": detail})
if name == "probe_proxmox":
ok, detail = probe_proxmox(args["host"])
return json.dumps({"ok": ok, "detail": detail})
return json.dumps({"error": "unknown tool"})
def _rule_based_decide(report: ObservationReport, policies: dict) -> AgentDecision:
rules = policies.get("rules") or {}
failed = [f for f in report.findings if not f.ok]
if not failed:
return AgentDecision(
alert=False,
severity="info",
title="Alles OK",
body=f"{len(report.findings)} checks geslaagd.",
fingerprint="all_ok",
actions=[],
)
worst = "low"
lines = []
for f in failed:
lines.append(f"{f.kind}/{f.name}: {f.detail}")
if f.kind == "proxmox" and not f.ok:
worst = rules.get("proxmox_unreachable", "critical")
elif f.kind == "nas" and not f.ok:
worst = max_sev(worst, rules.get("nas_unreachable", "critical"))
elif f.kind == "service" and not f.ok:
worst = max_sev(worst, rules.get("any_service_down", "high"))
elif f.kind == "docker" and not f.ok:
worst = max_sev(worst, "high")
fp = "fail:" + "|".join(sorted(f"{f.kind}:{f.name}" for f in failed))[:200]
return AgentDecision(
alert=True,
severity=worst,
title=f"{len(failed)} probleem(en) gedetecteerd",
body="\n".join(lines),
fingerprint=fp,
actions=["Herhaal check over 5 min", "Controleer host handmatig"],
)
def max_sev(a: str, b: str) -> str:
order = ["info", "low", "medium", "high", "critical"]
return a if order.index(a) >= order.index(b) else b
def _in_quiet_hours(policies: dict) -> bool:
qh = policies.get("quiet_hours") or {}
if not qh:
return False
try:
from zoneinfo import ZoneInfo
tz = ZoneInfo(qh.get("timezone", "UTC"))
except Exception:
tz = None
now = datetime.now(tz) if tz else datetime.now()
start = qh.get("start", "23:00")
end = qh.get("end", "07:00")
h, m = map(int, now.strftime("%H %M").split())
cur = h * 60 + m
sh, sm = map(int, start.split(":"))
eh, em = map(int, end.split(":"))
s, e = sh * 60 + sm, eh * 60 + em
if s <= e:
return s <= cur < e
return cur >= s or cur < e
return False
def decide(report: ObservationReport) -> AgentDecision:
policies = load_yaml("policies.yaml")
history = recent_incidents(8)
api_key = os.environ.get("OPENAI_API_KEY", "").strip()
model = os.environ.get("AGENT_MODEL", "gpt-4o-mini")
if not api_key:
return _rule_based_decide(report, policies)
try:
from openai import OpenAI
client = OpenAI(api_key=api_key)
system = """Je bent de autonome security agent voor het EL-KADI homelab thuis.
Je krijgt ruwe observaties (eigen probes, geen Wazuh/Uptime Kuma).
Beslis of de gebruiker een Telegram-melding moet krijgen.
Wees conservatief: alleen alert bij echt problemen of verdachte wijzigingen.
Antwoord uiteindelijk ALTIJD met JSON:
{"alert":bool,"severity":"info|low|medium|high|critical","title":"...","body":"...","fingerprint":"korte_sleutel","actions":["..."]}
Je mag tools aanroepen om te verifiëren voordat je alert=true zet."""
user_msg = json.dumps(
{
"observations": report.to_dict(),
"recent_incidents": history,
"policies": {
"quiet_hours": policies.get("quiet_hours"),
"dedupe_minutes": policies.get("dedupe_minutes"),
},
},
indent=2,
default=str,
)
messages = [
{"role": "system", "content": system},
{"role": "user", "content": user_msg},
]
for _ in range(5):
resp = client.chat.completions.create(
model=model,
messages=messages,
tools=TOOLS_SCHEMA,
tool_choice="auto",
)
msg = resp.choices[0].message
if msg.tool_calls:
messages.append(msg)
for tc in msg.tool_calls:
result = _run_tool(tc.function.name, json.loads(tc.function.arguments))
messages.append(
{
"role": "tool",
"tool_call_id": tc.id,
"content": result,
}
)
continue
text = (msg.content or "").strip()
if "```" in text:
text = text.split("```")[1].replace("json", "").strip()
data = json.loads(text)
return AgentDecision(
alert=bool(data.get("alert")),
severity=str(data.get("severity", "medium")),
title=str(data.get("title", "Security")),
body=str(data.get("body", "")),
fingerprint=str(data.get("fingerprint", "llm")),
actions=list(data.get("actions") or []),
)
except Exception as e:
dec = _rule_based_decide(report, policies)
dec.body += f"\n\n(LLM fallback: {e})"
return dec
return _rule_based_decide(report, policies)
def should_notify(decision: AgentDecision, policies: dict) -> bool:
if not decision.alert:
return False
allowed = policies.get("severity_telegram") or ["critical", "high"]
if decision.severity not in allowed:
return False
if _in_quiet_hours(policies):
return decision.severity == (policies.get("quiet_hours") or {}).get(
"allow_severity", "critical"
)
return True
+91
View File
@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""EL-KADI Home Security Agent — autonome loop."""
import os
import sys
import time
from pathlib import Path
# Package root on path
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
from dotenv import load_dotenv
load_dotenv(ROOT / ".env")
from agent.brain import decide, should_notify
from agent.observer import load_yaml, run_observation
from agent.pg_store import (
persist_incident as pg_record_incident,
persist_observation,
was_notified_recently_pg,
)
from agent.state import log_run, record_incident, was_notified_recently
from agent.telegram_notify import format_alert, send_message
def _record_incident(
run_id,
fingerprint: str,
severity: str,
title: str,
body: str,
notified: bool,
) -> None:
record_incident(fingerprint, severity, title, body, notified)
pg_record_incident(run_id, fingerprint, severity, title, body, notified)
def run_once() -> int:
policies = load_yaml("policies.yaml")
report = run_observation()
decision = decide(report)
log_run(decision.title, report.to_dict())
run_id = persist_observation(report, decision)
print(f"[{report.timestamp}] {decision.severity}: {decision.title} (alert={decision.alert})")
if run_id:
print(f" → PostgreSQL run #{run_id}")
if not should_notify(decision, policies):
if decision.alert:
_record_incident(run_id, decision.fingerprint, decision.severity, decision.title, decision.body, False)
return 0
dedupe = int(policies.get("dedupe_minutes", 30))
if was_notified_recently(decision.fingerprint, dedupe) or was_notified_recently_pg(
decision.fingerprint, dedupe
):
print(f" dedupe skip ({decision.fingerprint})")
return 0
text = format_alert(decision.severity, decision.title, decision.body, decision.actions)
if send_message(text):
_record_incident(run_id, decision.fingerprint, decision.severity, decision.title, decision.body, True)
print(" → Telegram verzonden")
return 0
print(" → Telegram mislukt (check TELEGRAM_BOT_TOKEN / TELEGRAM_CHAT_ID)")
_record_incident(run_id, decision.fingerprint, decision.severity, decision.title, decision.body, False)
return 1
def main():
if len(sys.argv) > 1 and sys.argv[1] == "once":
sys.exit(run_once())
policies = load_yaml("policies.yaml")
interval = int(policies.get("interval_seconds", 300))
print(f"EL-KADI Security Agent — loop elke {interval}s (Ctrl+C stop)")
while True:
try:
run_once()
except KeyboardInterrupt:
raise
except Exception as e:
print(f"run error: {e}")
time.sleep(interval)
if __name__ == "__main__":
main()
+159
View File
@@ -0,0 +1,159 @@
"""Directe observatie — geen Wazuh/Uptime Kuma/n8n."""
import socket
import subprocess
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
import httpx
import yaml
CONFIG_DIR = __import__("pathlib").Path(__file__).resolve().parent.parent / "config"
@dataclass
class Finding:
kind: str
name: str
ok: bool
detail: str
meta: dict = field(default_factory=dict)
@dataclass
class ObservationReport:
timestamp: str
findings: list[Finding]
def to_dict(self) -> dict:
return {
"timestamp": self.timestamp,
"findings": [
{"kind": f.kind, "name": f.name, "ok": f.ok, "detail": f.detail, "meta": f.meta}
for f in self.findings
],
"summary": {
"total": len(self.findings),
"failed": sum(1 for f in self.findings if not f.ok),
},
}
def load_yaml(name: str) -> dict:
with open(CONFIG_DIR / name, encoding="utf-8") as f:
return yaml.safe_load(f) or {}
def probe_tcp(host: str, port: int, timeout: float = 4) -> tuple[bool, str]:
try:
with socket.create_connection((host, port), timeout=timeout):
return True, "open"
except Exception as e:
return False, str(e)
def probe_http(url: str, insecure: bool = False, timeout: float = 8) -> tuple[bool, str]:
try:
r = httpx.get(url, timeout=timeout, verify=not insecure, follow_redirects=True)
if r.status_code < 500:
return True, f"HTTP {r.status_code}"
return False, f"HTTP {r.status_code}"
except Exception as e:
return False, str(e)
def probe_proxmox(host: str, port: int = 8006) -> tuple[bool, str]:
ok, detail = probe_tcp(host, port)
if not ok:
return False, detail
try:
r = httpx.get(f"https://{host}:{port}/", timeout=6, verify=False)
return r.status_code in (200, 301, 302, 401, 403), f"HTTPS {r.status_code}"
except Exception as e:
return False, str(e)
def docker_container_states() -> list[Finding]:
"""Leest lokale Docker socket (NAS)."""
findings = []
try:
out = subprocess.run(
["docker", "ps", "-a", "--format", "{{.Names}}|{{.Status}}"],
capture_output=True,
text=True,
timeout=30,
)
if out.returncode != 0:
return [Finding("docker", "docker", False, out.stderr.strip() or "docker ps failed")]
for line in out.stdout.strip().splitlines():
if not line or "|" not in line:
continue
name, status = line.split("|", 1)
up = status.lower().startswith("up")
findings.append(
Finding(
"docker",
name,
up,
status,
{"exited": "exited" in status.lower()},
)
)
except FileNotFoundError:
findings.append(Finding("docker", "docker", False, "docker CLI niet beschikbaar"))
return findings
def lan_ping_watch(subnet: str, sample_hosts: list[str] | None = None) -> list[Finding]:
"""Lightweight: ping gateway + optioneel lijst — geen mass scan standaard."""
findings = []
gateway = ".".join(subnet.split(".")[:3]) + ".1"
ok, detail = _ping_once(gateway)
findings.append(Finding("lan", "gateway", ok, detail, {"ip": gateway}))
return findings
def _ping_once(ip: str) -> tuple[bool, str]:
try:
out = subprocess.run(
["ping", "-c", "1", "-W", "2", ip],
capture_output=True,
text=True,
timeout=5,
)
return out.returncode == 0, "reachable" if out.returncode == 0 else "no reply"
except Exception as e:
return False, str(e)
def run_observation() -> ObservationReport:
targets = load_yaml("targets.yaml")
findings: list[Finding] = []
nas = targets.get("nas") or {}
host = nas.get("host", "192.168.1.211")
for chk in nas.get("checks") or []:
name = chk.get("name", "check")
if chk.get("type") == "tcp":
ok, detail = probe_tcp(host, int(chk["port"]))
elif chk.get("type") == "http":
ok, detail = probe_http(chk["url"])
else:
ok, detail = False, "unknown check type"
findings.append(Finding("nas", name, ok, detail))
for px in targets.get("proxmox_hosts") or []:
ok, detail = probe_proxmox(px["host"], int(px.get("port", 8006)))
findings.append(Finding("proxmox", px.get("name", px["host"]), ok, detail))
for svc in targets.get("services") or []:
ok, detail = probe_http(svc["url"], insecure=bool(svc.get("insecure_tls")))
findings.append(Finding("service", svc.get("name", svc["url"]), ok, detail))
findings.extend(docker_container_states())
lan = targets.get("lan_watch") or {}
if lan.get("enabled"):
findings.extend(lan_ping_watch(lan.get("subnet", "192.168.1.0/24")))
return ObservationReport(timestamp=datetime.utcnow().isoformat() + "Z", findings=findings)
+152
View File
@@ -0,0 +1,152 @@
"""PostgreSQL — alle observaties voor dashboard :8765."""
from __future__ import annotations
import json
import logging
import os
from datetime import datetime, timezone
from typing import Any, Optional
import psycopg2
import psycopg2.extras
from agent.brain import AgentDecision
from agent.observer import ObservationReport
logger = logging.getLogger(__name__)
def _pg_enabled() -> bool:
return os.getenv("PG_DISABLED", "").lower() not in ("1", "true", "yes")
def _connect():
url = os.getenv("DATABASE_URL", "").strip()
if url:
return psycopg2.connect(url)
return psycopg2.connect(
host=os.getenv("PG_HOST", "192.168.1.211"),
port=int(os.getenv("PG_PORT", "5433")),
user=os.getenv("PG_USER", "mo"),
password=os.getenv("PG_PASSWORD", ""),
dbname=os.getenv("PG_DATABASE", "homelab"),
)
def _parse_ts(ts: str) -> datetime:
if ts.endswith("Z"):
ts = ts[:-1] + "+00:00"
return datetime.fromisoformat(ts)
def persist_observation(report: ObservationReport, decision: AgentDecision) -> Optional[int]:
"""Schrijf run + findings naar agent.*; retourneert run_id."""
if not _pg_enabled():
return None
summary = report.to_dict().get("summary") or {}
decision_json = {
"alert": decision.alert,
"severity": decision.severity,
"title": decision.title,
"body": decision.body,
"fingerprint": decision.fingerprint,
"actions": decision.actions,
}
try:
conn = _connect()
conn.autocommit = False
with conn.cursor() as cur:
cur.execute("SET search_path TO agent, public;")
cur.execute(
"""
INSERT INTO observation_runs
(observed_at, total_checks, failed_checks, summary, decision)
VALUES (%s, %s, %s, %s::jsonb, %s::jsonb)
RETURNING id
""",
(
_parse_ts(report.timestamp),
int(summary.get("total", len(report.findings))),
int(summary.get("failed", sum(1 for f in report.findings if not f.ok))),
json.dumps(report.to_dict(), default=str),
json.dumps(decision_json, default=str),
),
)
run_id = cur.fetchone()[0]
rows = [
(
run_id,
f.kind,
f.name,
f.ok,
f.detail,
json.dumps(f.meta or {}, default=str),
)
for f in report.findings
]
if rows:
psycopg2.extras.execute_batch(
cur,
"""
INSERT INTO findings (run_id, kind, name, ok, detail, meta)
VALUES (%s, %s, %s, %s, %s, %s::jsonb)
""",
rows,
)
conn.commit()
conn.close()
logger.info("PostgreSQL run %s: %d findings", run_id, len(rows))
return int(run_id)
except Exception as e:
logger.warning("PostgreSQL observatie mislukt: %s", e)
return None
def persist_incident(
run_id: Optional[int],
fingerprint: str,
severity: str,
title: str,
body: str,
notified: bool,
) -> None:
if not _pg_enabled():
return
try:
conn = _connect()
conn.autocommit = True
with conn.cursor() as cur:
cur.execute("SET search_path TO agent, public;")
cur.execute(
"""
INSERT INTO incidents (run_id, fingerprint, severity, title, body, notified)
VALUES (%s, %s, %s, %s, %s, %s)
""",
(run_id, fingerprint, severity, title, body, notified),
)
conn.close()
except Exception as e:
logger.warning("PostgreSQL incident mislukt: %s", e)
def was_notified_recently_pg(fingerprint: str, minutes: int) -> bool:
if not _pg_enabled():
return False
try:
conn = _connect()
with conn.cursor() as cur:
cur.execute("SET search_path TO agent, public;")
cur.execute(
"""
SELECT 1 FROM incidents
WHERE fingerprint = %s AND notified = true
AND created_at > NOW() - (%s || ' minutes')::interval
LIMIT 1
""",
(fingerprint, str(int(minutes))),
)
row = cur.fetchone()
conn.close()
return row is not None
except Exception:
return False
+75
View File
@@ -0,0 +1,75 @@
"""SQLite — geheugen van de agent (dedupe, incidenten)."""
import json
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
DB_PATH = Path(__file__).resolve().parent.parent / "data" / "agent.db"
def connect():
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
conn.executescript(
"""
CREATE TABLE IF NOT EXISTS incidents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
fingerprint TEXT NOT NULL,
severity TEXT NOT NULL,
title TEXT NOT NULL,
body TEXT NOT NULL,
created_at TEXT NOT NULL,
notified INTEGER DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_fp ON incidents(fingerprint);
CREATE TABLE IF NOT EXISTS runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
started_at TEXT NOT NULL,
summary TEXT,
raw_observations TEXT
);
"""
)
return conn
def was_notified_recently(fingerprint: str, minutes: int) -> bool:
since = (datetime.utcnow() - timedelta(minutes=minutes)).isoformat()
conn = connect()
row = conn.execute(
"SELECT 1 FROM incidents WHERE fingerprint=? AND notified=1 AND created_at>?",
(fingerprint, since),
).fetchone()
conn.close()
return row is not None
def record_incident(fingerprint: str, severity: str, title: str, body: str, notified: bool):
conn = connect()
conn.execute(
"INSERT INTO incidents (fingerprint, severity, title, body, created_at, notified) VALUES (?,?,?,?,?,?)",
(fingerprint, severity, title, body, datetime.utcnow().isoformat(), 1 if notified else 0),
)
conn.commit()
conn.close()
def recent_incidents(limit: int = 10) -> list[dict]:
conn = connect()
rows = conn.execute(
"SELECT severity, title, body, created_at FROM incidents ORDER BY id DESC LIMIT ?",
(limit,),
).fetchall()
conn.close()
return [dict(r) for r in rows]
def log_run(summary: str, observations: dict):
conn = connect()
conn.execute(
"INSERT INTO runs (started_at, summary, raw_observations) VALUES (?,?,?)",
(datetime.utcnow().isoformat(), summary, json.dumps(observations, default=str)),
)
conn.commit()
conn.close()
@@ -0,0 +1,42 @@
"""Telegram — enige externe meldkanaal."""
import os
import httpx
def send_message(text: str, parse_mode: str = "HTML") -> bool:
token = os.environ.get("TELEGRAM_BOT_TOKEN", "").strip()
chat_id = os.environ.get("TELEGRAM_CHAT_ID", "").strip()
if not token or not chat_id:
return False
url = f"https://api.telegram.org/bot{token}/sendMessage"
# Telegram max ~4096
if len(text) > 4000:
text = text[:3990] + ""
r = httpx.post(
url,
data={"chat_id": chat_id, "text": text, "parse_mode": parse_mode},
timeout=30,
)
return r.status_code == 200
def format_alert(severity: str, title: str, body: str, actions=None) -> str:
icon = {"critical": "🔴", "high": "🟠", "medium": "🟡", "low": "🔵", "info": ""}.get(
severity, ""
)
lines = [
f"{icon} <b>EL-KADI SECURITY</b> · {severity.upper()}",
f"<b>{_esc(title)}</b>",
"",
_esc(body),
]
if actions:
lines.append("")
lines.append("<b>Agent acties:</b>")
for a in actions:
lines.append(f"{_esc(a)}")
return "\n".join(lines)
def _esc(s: str) -> str:
return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")