diff --git a/doc/hmdl.schema.json b/doc/hmdl.schema.json index f606b5a..2804d2b 100644 --- a/doc/hmdl.schema.json +++ b/doc/hmdl.schema.json @@ -205,6 +205,14 @@ }, "required": [] }, + "includes": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "list of relative or absolute paths to other hmdl files on the local machine, which shall be subsumed in the overall monitoring task" + }, "checks": { "type": "array", "items": { @@ -613,18 +621,29 @@ "type": "object", "additionalProperties": false, "properties": { - "host": { + "ssh_host": { "type": "string" }, - "port": { - "type": "integer", - "default": 22 + "ssh_port": { + "type": [ + "null", + "integer" + ], + "default": null }, - "user": { - "type": "string" + "ssh_user": { + "type": [ + "null", + "string" + ], + "default": null }, "ssh_key": { - "type": "string" + "type": [ + "null", + "string" + ], + "default": null }, "mount_point": { "type": "string", @@ -641,9 +660,7 @@ } }, "required": [ - "host", - "user", - "ssh_key" + "ssh_host" ] } }, @@ -658,8 +675,5 @@ } } }, - "required": [ - "defaults", - "checks" - ] + "required": [] } diff --git a/examples/test-1.hmdl.json b/examples/test-1.hmdl.json new file mode 100644 index 0000000..7d4b159 --- /dev/null +++ b/examples/test-1.hmdl.json @@ -0,0 +1,14 @@ +{ + "checks": [ + { + "name": "test", + "kind": "file_state", + "parameters": { + "path": "/tmp/test", + "exist": true, + "age_threshold": 60, + "size_threshold": 1 + } + } + ] +} diff --git a/examples/test-2.hmdl.json b/examples/test-2.hmdl.json new file mode 100644 index 0000000..60fc349 --- /dev/null +++ b/examples/test-2.hmdl.json @@ -0,0 +1,12 @@ +{ + "checks": [ + { + "name": "test", + "kind": "script", + "parameters": { + "path": "/tmp/script", + "arguments": [] + } + } + ] +} diff --git a/examples/test-3.hmdl.json b/examples/test-3.hmdl.json new file mode 100644 index 0000000..64edf2d --- /dev/null +++ b/examples/test-3.hmdl.json @@ -0,0 +1,28 @@ +{ + "defaults": { + "threshold": 1, + "schedule": { + "regular_interval": 10, + "attentive_interval": 1 + }, + "notifications": [ + { + "kind": "console", + "parameters": { + } + } + ] + }, + "checks": [ + { + "name": "test", + "kind": "generic_remote", + "parameters": { + "ssh_host": "bragi.pool.greenscale.de", + "ssh_user": "fenris", + "ssh_port": 8192, + "ssh_key": "/home/fenris/.ssh/keypairs/gs-bragi" + } + } + ] +} diff --git a/examples/test.hmdl.json b/examples/test.hmdl.json index 37ac4e4..b1eea19 100644 --- a/examples/test.hmdl.json +++ b/examples/test.hmdl.json @@ -1,31 +1,20 @@ { "defaults": { - }, - "checks": [ - { - "name": "test", - "threshold": 3, - "annoy": false, - "schedule": { - "regular_interval": 15, - "attentive_interval": 1 - }, - "notifications": [ - { - "kind": "console", - "parameters": { - } + "schedule": { + "regular_interval": 10, + "attentive_interval": 1 + }, + "notifications": [ + { + "kind": "console", + "parameters": { } - ], - "kind": "generic_remote", - "parameters": { - "host" : "iks-vvd-cn.greenscale.lan", - "user" : "root", - "ssh_key" : "/home/christiann/.ssh/id_ed25519.pub", - "mount_point" : "/", - "threshold" : 95, - "strict" : false } - } + ] + }, + "includes": [ + "test-1.hmdl.json", + "test-2.hmdl.json", + "test-3.hmdl.json" ] } diff --git a/source/logic/checks/generic_remote.py b/source/logic/checks/generic_remote.py index ad33581..abb0df2 100644 --- a/source/logic/checks/generic_remote.py +++ b/source/logic/checks/generic_remote.py @@ -8,18 +8,20 @@ class implementation_check_kind_generic_remote(interface_check_kind): "type": "object", "additionalProperties": False, "properties": { - "host" : { + "ssh_host" : { "type" : "string" }, - "port": { - "type": "integer", - "default": 22 + "ssh_port": { + "type": ["null", "integer"], + "default": None }, - "user" : { - "type" : "string" + "ssh_user" : { + "type" : ["null", "string"], + "default": None, }, "ssh_key" : { - "type" : "string" + "type" : ["null", "string"], + "default": None, }, "mount_point" : { "type" : "string", @@ -36,7 +38,7 @@ class implementation_check_kind_generic_remote(interface_check_kind): } }, "required": [ - "host", "user", "ssh_key" + "ssh_host" ] } @@ -45,46 +47,68 @@ class implementation_check_kind_generic_remote(interface_check_kind): [implementation] ''' def normalize_conf_node(self, node): - if not "host" in node \ - or not "user" in node \ - or not "ssh_key" in node: - raise ValueError("MISSING STUFF!") - if not "port" in node: - node["port"] = 22 - if not "mount_point" in node: - node["mount_point"] = "/" - if not "threshold" in node: - node["threshold"] = 95 - if not "strict" in node: - node["strict"] = "/" - - return node - + if (not "ssh_host" in node): + raise ValueError("mandatory parameter \"ssh_host\" missing") + else: + return dict_merge( + { + "ssh_port": None, + "ssh_user": None, + "ssh_key": None, + "mount_point": "/", + "threshold": 95, + "strict": False, + }, + node + ) + + ''' [implementation] ''' def run(self, parameters): - SSH_COMMAND = string_coin("ssh -i {{ssh_key}} -p {{port}} {{user}}@{{host}} \"df {{mount_point}} | tr -s ' '\"", parameters) + inner_command = string_coin( + "df {{mount_point}} | tr -s \" \"", + { + "mount_point": parameters["mount_point"], + } + ) - retval=shell_command(SSH_COMMAND) - - if retval["return_code"] > 0: + outer_command_parts = [] + if True: + outer_command_parts.append("ssh"); + if True: + outer_command_parts.append(string_coin("{{host}}", {"host": parameters["ssh_host"]})); + if (parameters["ssh_port"] is not None): + outer_command_parts.append(string_coin("-p {{port}}", {"port": ("%u" % parameters["ssh_port"])})); + if (parameters["ssh_user"] is not None): + outer_command_parts.append(string_coin("-l {{user}}", {"user": parameters["ssh_user"]})); + if (parameters["ssh_key"] is not None): + outer_command_parts.append(string_coin("-i {{key}}", {"key": parameters["ssh_key"]})); + if True: + outer_command_parts.append(string_coin("-o BatchMode=yes", {})) + if True: + outer_command_parts.append(string_coin("'{{inner_command}}'", {"inner_command": inner_command})) + outer_command = " ".join(outer_command_parts) + + result = shell_command(outer_command) + + if (result["return_code"] > 0): return { - "condition" : enum_condition.unknown, - "info" : { - "error" : retval["stderr"] + "condition": enum_condition.unknown, + "info": { + "error": result["stderr"], } } else: - parts=retval["stdout"].split("\n")[-2].split(" ") - ret={ - "device" : parts[0], - "used" : parts[2], - "avail" : parts[3], - "perc" : int(parts[4][:-1]) + stuff = result["stdout"].split("\n")[-2].split(" ") + data = { + "device": stuff[0], + "used": stuff[2], + "avail": stuff[3], + "perc": int(stuff[4][:-1]), } - - if ret["perc"] > parameters["threshold"]: + if (data["perc"] > parameters["threshold"]): return { "condition": ( enum_condition.critical @@ -92,20 +116,20 @@ class implementation_check_kind_generic_remote(interface_check_kind): enum_condition.warning ), "info": { + "ssh_host": parameters["ssh_host"], "mount_point": parameters["mount_point"], - "device": ret["device"], - "used": ret["used"], # ToDo: Humanlesbarkeit herstellen - "available": ret["avail"], # ToDo: Humanlesbarkeit herstellen - "percentage": str(ret["perc"]) + "%", - "host" : parameters["host"], + "device": data["device"], + "used": data["used"], # ToDo: Humanlesbarkeit herstellen + "available": data["avail"], # ToDo: Humanlesbarkeit herstellen + "percentage": (str(data["perc"]) + "%"), "faults": [ translation_get("checks.generic_remote.overflow") - ] + ], } } - else: return { "condition": enum_condition.ok, - "info" : {} + "info": {} } + diff --git a/source/logic/conf.py b/source/logic/conf.py index 61dd9d8..f587f4c 100644 --- a/source/logic/conf.py +++ b/source/logic/conf.py @@ -114,6 +114,14 @@ def conf_schema_root(check_kind_implementations, notification_channel_implementa "required": [ ], }, + "includes": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "list of relative or absolute paths to other hmdl files on the local machine, which shall be subsumed in the overall monitoring task" + }, "checks": { "type": "array", "items": { @@ -167,8 +175,6 @@ def conf_schema_root(check_kind_implementations, notification_channel_implementa } }, "required": [ - "defaults", - "checks", ] } @@ -284,9 +290,18 @@ def conf_normalize_check(check_kind_implementations, notification_channel_implem } -def conf_normalize_root(check_kind_implementations, notification_channel_implementations, node): +def conf_normalize_root( + check_kind_implementations, + notification_channel_implementations, + node +): counts = {} - for node_ in node["checks"]: + checks_raw = ( + node["checks"] + if ("checks" in node) else + [] + ) + for node_ in checks_raw: if (node_["name"] not in counts): counts[node_["name"]] = 0 counts[node_["name"]] += 1 @@ -301,9 +316,22 @@ def conf_normalize_root(check_kind_implementations, notification_channel_impleme ) ) else: - defaults = conf_normalize_defaults(notification_channel_implementations, node["defaults"]) + defaults = conf_normalize_defaults( + notification_channel_implementations, + ( + node["defaults"] + if ("defaults" in node) else + {} + ) + ) + includes = ( + node["includes"] + if ("includes" in node) else + [] + ) return { "defaults": defaults, + "includes": includes, "checks": list( map( lambda node_: conf_normalize_check( @@ -312,8 +340,67 @@ def conf_normalize_root(check_kind_implementations, notification_channel_impleme defaults, node_ ), - node["checks"] + checks_raw ) ) } + +def conf_load( + check_kind_implementations, + notification_channel_implementations, + path, + already_included = None +): + if (already_included is None): + already_included = set([]) + if (path in already_included): + raise ValueError("circular dependency detected") + else: + already_included.add(path) + conf_raw = _json.loads(file_read(path)) + includes = ( + conf_raw["includes"] + if ("includes" in conf_raw) else + [] + ) + for index in range(len(includes)): + path_ = includes[index] + sub_conf = conf_load( + check_kind_implementations, + notification_channel_implementations, + ( + path_ + if _os.path.isabs(path_) else + _os.path.join(_os.path.dirname(path), path_) + ), + already_included + ) + if (not "checks" in conf_raw): + conf_raw["checks"] = [] + conf_raw["checks"].extend( + list( + map( + lambda check: dict_merge( + check, + { + "name": string_coin( + "x{{number}}.{{original_name}}", + { + "number": ("%u" % (index + 1)), + "original_name": check["name"], + } + ), + } + ), + sub_conf["checks"] + ) + ) + ) + conf_raw["includes"] = [] + return conf_normalize_root( + check_kind_implementations, + notification_channel_implementations, + conf_raw + ) + diff --git a/source/logic/lib.py b/source/logic/lib.py index 9854a3f..fb686c6 100644 --- a/source/logic/lib.py +++ b/source/logic/lib.py @@ -14,7 +14,7 @@ def file_write(path, content): def string_coin(template, arguments): result = template for (key, value, ) in arguments.items(): - result = result.replace("{{%s}}" % key, str(value)) + result = result.replace("{{%s}}" % key, value) return result diff --git a/source/logic/main.py b/source/logic/main.py index ac44b92..51bed75 100644 --- a/source/logic/main.py +++ b/source/logic/main.py @@ -141,13 +141,13 @@ def main(): ) ### get configuration data - conf = conf_normalize_root( + conf = conf_load( check_kind_implementations, notification_channel_implementations, - _json.loads(file_read(args.conf_path)) + _os.path.abspath(args.conf_path) ) if (args.expose_full_conf): - _sys.stdout.write(_json.dumps(checks, indent = "\t") + "\n") + _sys.stdout.write(_json.dumps(conf, indent = "\t") + "\n") _sys.exit(1) else: ### get state data diff --git a/todo.md b/todo.md index 22de17d..0766f5e 100644 --- a/todo.md +++ b/todo.md @@ -1,10 +1,12 @@ - parallele Zugriffe auf die Zustands-Datei verhindern -- fehlertolerantere Implementierung +- Benachrichtigungen versenden, wenn ein Zustand sich wieder normalisiert hat (aber vorher über dem Schwellwert oft nicht OK war) +- erneute Benachrichtigung über nicht-OK-Zustand nach einer Weile (siehe https://gitlab.greenscale.de/tools/heimdall/-/issues/3) +- längere Statistiken über Metriken führen um auch Anstiege/Abfälle auszuwerten (z.B. "Speicherplatzverbrauch innerhalb einer Woche um 5GB gestiegen") - Selbst-Test - Benachrichtigungs-Kanäle: - Matrix + - evtl. die Kanäle ganz auslagern und nur als Library anbinden - Möglichkeit dauerhaft laufen zulassen (evtl. als systemd-Dienst) - Versionierung - Test-Routinen -- neu schreiben in TypeScript (und plankton dafür nutzen)? -- Benachrichtigungen versenden, wenn ein Zustand sich wieder normalisiert hat (aber vorher über dem Schwellwert oft nicht OK war) +- neu schreiben in TypeScript (und plankton dafür nutzen?)