[issue-1] [mod] update

This commit is contained in:
Christian Fraß 2023-03-03 19:38:04 +01:00
commit 89d29646bc
10 changed files with 268 additions and 98 deletions

View file

@ -205,6 +205,14 @@
}, },
"required": [] "required": []
}, },
"includes": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "list of relative or absolute paths to other hmdl files on the local machine, which shall be subsumed in the overall monitoring task"
},
"checks": { "checks": {
"type": "array", "type": "array",
"items": { "items": {
@ -613,18 +621,29 @@
"type": "object", "type": "object",
"additionalProperties": false, "additionalProperties": false,
"properties": { "properties": {
"host": { "ssh_host": {
"type": "string" "type": "string"
}, },
"port": { "ssh_port": {
"type": "integer", "type": [
"default": 22 "null",
"integer"
],
"default": null
}, },
"user": { "ssh_user": {
"type": "string" "type": [
"null",
"string"
],
"default": null
}, },
"ssh_key": { "ssh_key": {
"type": "string" "type": [
"null",
"string"
],
"default": null
}, },
"mount_point": { "mount_point": {
"type": "string", "type": "string",
@ -641,9 +660,7 @@
} }
}, },
"required": [ "required": [
"host", "ssh_host"
"user",
"ssh_key"
] ]
} }
}, },
@ -658,8 +675,5 @@
} }
} }
}, },
"required": [ "required": []
"defaults",
"checks"
]
} }

14
examples/test-1.hmdl.json Normal file
View file

@ -0,0 +1,14 @@
{
"checks": [
{
"name": "test",
"kind": "file_state",
"parameters": {
"path": "/tmp/test",
"exist": true,
"age_threshold": 60,
"size_threshold": 1
}
}
]
}

12
examples/test-2.hmdl.json Normal file
View file

@ -0,0 +1,12 @@
{
"checks": [
{
"name": "test",
"kind": "script",
"parameters": {
"path": "/tmp/script",
"arguments": []
}
}
]
}

28
examples/test-3.hmdl.json Normal file
View file

@ -0,0 +1,28 @@
{
"defaults": {
"threshold": 1,
"schedule": {
"regular_interval": 10,
"attentive_interval": 1
},
"notifications": [
{
"kind": "console",
"parameters": {
}
}
]
},
"checks": [
{
"name": "test",
"kind": "generic_remote",
"parameters": {
"ssh_host": "bragi.pool.greenscale.de",
"ssh_user": "fenris",
"ssh_port": 8192,
"ssh_key": "/home/fenris/.ssh/keypairs/gs-bragi"
}
}
]
}

View file

@ -1,31 +1,20 @@
{ {
"defaults": { "defaults": {
}, "schedule": {
"checks": [ "regular_interval": 10,
{ "attentive_interval": 1
"name": "test", },
"threshold": 3, "notifications": [
"annoy": false, {
"schedule": { "kind": "console",
"regular_interval": 15, "parameters": {
"attentive_interval": 1
},
"notifications": [
{
"kind": "console",
"parameters": {
}
} }
],
"kind": "generic_remote",
"parameters": {
"host" : "iks-vvd-cn.greenscale.lan",
"user" : "root",
"ssh_key" : "/home/christiann/.ssh/id_ed25519.pub",
"mount_point" : "/",
"threshold" : 95,
"strict" : false
} }
} ]
},
"includes": [
"test-1.hmdl.json",
"test-2.hmdl.json",
"test-3.hmdl.json"
] ]
} }

View file

@ -8,18 +8,20 @@ class implementation_check_kind_generic_remote(interface_check_kind):
"type": "object", "type": "object",
"additionalProperties": False, "additionalProperties": False,
"properties": { "properties": {
"host" : { "ssh_host" : {
"type" : "string" "type" : "string"
}, },
"port": { "ssh_port": {
"type": "integer", "type": ["null", "integer"],
"default": 22 "default": None
}, },
"user" : { "ssh_user" : {
"type" : "string" "type" : ["null", "string"],
"default": None,
}, },
"ssh_key" : { "ssh_key" : {
"type" : "string" "type" : ["null", "string"],
"default": None,
}, },
"mount_point" : { "mount_point" : {
"type" : "string", "type" : "string",
@ -36,7 +38,7 @@ class implementation_check_kind_generic_remote(interface_check_kind):
} }
}, },
"required": [ "required": [
"host", "user", "ssh_key" "ssh_host"
] ]
} }
@ -45,46 +47,68 @@ class implementation_check_kind_generic_remote(interface_check_kind):
[implementation] [implementation]
''' '''
def normalize_conf_node(self, node): def normalize_conf_node(self, node):
if not "host" in node \ if (not "ssh_host" in node):
or not "user" in node \ raise ValueError("mandatory parameter \"ssh_host\" missing")
or not "ssh_key" in node: else:
raise ValueError("MISSING STUFF!") return dict_merge(
if not "port" in node: {
node["port"] = 22 "ssh_port": None,
if not "mount_point" in node: "ssh_user": None,
node["mount_point"] = "/" "ssh_key": None,
if not "threshold" in node: "mount_point": "/",
node["threshold"] = 95 "threshold": 95,
if not "strict" in node: "strict": False,
node["strict"] = "/" },
node
)
return node
''' '''
[implementation] [implementation]
''' '''
def run(self, parameters): def run(self, parameters):
SSH_COMMAND = string_coin("ssh -i {{ssh_key}} -p {{port}} {{user}}@{{host}} \"df {{mount_point}} | tr -s ' '\"", parameters) inner_command = string_coin(
"df {{mount_point}} | tr -s \" \"",
{
"mount_point": parameters["mount_point"],
}
)
retval=shell_command(SSH_COMMAND) outer_command_parts = []
if True:
outer_command_parts.append("ssh");
if True:
outer_command_parts.append(string_coin("{{host}}", {"host": parameters["ssh_host"]}));
if (parameters["ssh_port"] is not None):
outer_command_parts.append(string_coin("-p {{port}}", {"port": ("%u" % parameters["ssh_port"])}));
if (parameters["ssh_user"] is not None):
outer_command_parts.append(string_coin("-l {{user}}", {"user": parameters["ssh_user"]}));
if (parameters["ssh_key"] is not None):
outer_command_parts.append(string_coin("-i {{key}}", {"key": parameters["ssh_key"]}));
if True:
outer_command_parts.append(string_coin("-o BatchMode=yes", {}))
if True:
outer_command_parts.append(string_coin("'{{inner_command}}'", {"inner_command": inner_command}))
outer_command = " ".join(outer_command_parts)
if retval["return_code"] > 0: result = shell_command(outer_command)
if (result["return_code"] > 0):
return { return {
"condition" : enum_condition.unknown, "condition": enum_condition.unknown,
"info" : { "info": {
"error" : retval["stderr"] "error": result["stderr"],
} }
} }
else: else:
parts=retval["stdout"].split("\n")[-2].split(" ") stuff = result["stdout"].split("\n")[-2].split(" ")
ret={ data = {
"device" : parts[0], "device": stuff[0],
"used" : parts[2], "used": stuff[2],
"avail" : parts[3], "avail": stuff[3],
"perc" : int(parts[4][:-1]) "perc": int(stuff[4][:-1]),
} }
if (data["perc"] > parameters["threshold"]):
if ret["perc"] > parameters["threshold"]:
return { return {
"condition": ( "condition": (
enum_condition.critical enum_condition.critical
@ -92,20 +116,20 @@ class implementation_check_kind_generic_remote(interface_check_kind):
enum_condition.warning enum_condition.warning
), ),
"info": { "info": {
"ssh_host": parameters["ssh_host"],
"mount_point": parameters["mount_point"], "mount_point": parameters["mount_point"],
"device": ret["device"], "device": data["device"],
"used": ret["used"], # ToDo: Humanlesbarkeit herstellen "used": data["used"], # ToDo: Humanlesbarkeit herstellen
"available": ret["avail"], # ToDo: Humanlesbarkeit herstellen "available": data["avail"], # ToDo: Humanlesbarkeit herstellen
"percentage": str(ret["perc"]) + "%", "percentage": (str(data["perc"]) + "%"),
"host" : parameters["host"],
"faults": [ "faults": [
translation_get("checks.generic_remote.overflow") translation_get("checks.generic_remote.overflow")
] ],
} }
} }
else: else:
return { return {
"condition": enum_condition.ok, "condition": enum_condition.ok,
"info" : {} "info": {}
} }

View file

@ -114,6 +114,14 @@ def conf_schema_root(check_kind_implementations, notification_channel_implementa
"required": [ "required": [
], ],
}, },
"includes": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "list of relative or absolute paths to other hmdl files on the local machine, which shall be subsumed in the overall monitoring task"
},
"checks": { "checks": {
"type": "array", "type": "array",
"items": { "items": {
@ -167,8 +175,6 @@ def conf_schema_root(check_kind_implementations, notification_channel_implementa
} }
}, },
"required": [ "required": [
"defaults",
"checks",
] ]
} }
@ -284,9 +290,18 @@ def conf_normalize_check(check_kind_implementations, notification_channel_implem
} }
def conf_normalize_root(check_kind_implementations, notification_channel_implementations, node): def conf_normalize_root(
check_kind_implementations,
notification_channel_implementations,
node
):
counts = {} counts = {}
for node_ in node["checks"]: checks_raw = (
node["checks"]
if ("checks" in node) else
[]
)
for node_ in checks_raw:
if (node_["name"] not in counts): if (node_["name"] not in counts):
counts[node_["name"]] = 0 counts[node_["name"]] = 0
counts[node_["name"]] += 1 counts[node_["name"]] += 1
@ -301,9 +316,22 @@ def conf_normalize_root(check_kind_implementations, notification_channel_impleme
) )
) )
else: else:
defaults = conf_normalize_defaults(notification_channel_implementations, node["defaults"]) defaults = conf_normalize_defaults(
notification_channel_implementations,
(
node["defaults"]
if ("defaults" in node) else
{}
)
)
includes = (
node["includes"]
if ("includes" in node) else
[]
)
return { return {
"defaults": defaults, "defaults": defaults,
"includes": includes,
"checks": list( "checks": list(
map( map(
lambda node_: conf_normalize_check( lambda node_: conf_normalize_check(
@ -312,8 +340,67 @@ def conf_normalize_root(check_kind_implementations, notification_channel_impleme
defaults, defaults,
node_ node_
), ),
node["checks"] checks_raw
) )
) )
} }
def conf_load(
check_kind_implementations,
notification_channel_implementations,
path,
already_included = None
):
if (already_included is None):
already_included = set([])
if (path in already_included):
raise ValueError("circular dependency detected")
else:
already_included.add(path)
conf_raw = _json.loads(file_read(path))
includes = (
conf_raw["includes"]
if ("includes" in conf_raw) else
[]
)
for index in range(len(includes)):
path_ = includes[index]
sub_conf = conf_load(
check_kind_implementations,
notification_channel_implementations,
(
path_
if _os.path.isabs(path_) else
_os.path.join(_os.path.dirname(path), path_)
),
already_included
)
if (not "checks" in conf_raw):
conf_raw["checks"] = []
conf_raw["checks"].extend(
list(
map(
lambda check: dict_merge(
check,
{
"name": string_coin(
"x{{number}}.{{original_name}}",
{
"number": ("%u" % (index + 1)),
"original_name": check["name"],
}
),
}
),
sub_conf["checks"]
)
)
)
conf_raw["includes"] = []
return conf_normalize_root(
check_kind_implementations,
notification_channel_implementations,
conf_raw
)

View file

@ -14,7 +14,7 @@ def file_write(path, content):
def string_coin(template, arguments): def string_coin(template, arguments):
result = template result = template
for (key, value, ) in arguments.items(): for (key, value, ) in arguments.items():
result = result.replace("{{%s}}" % key, str(value)) result = result.replace("{{%s}}" % key, value)
return result return result

View file

@ -141,13 +141,13 @@ def main():
) )
### get configuration data ### get configuration data
conf = conf_normalize_root( conf = conf_load(
check_kind_implementations, check_kind_implementations,
notification_channel_implementations, notification_channel_implementations,
_json.loads(file_read(args.conf_path)) _os.path.abspath(args.conf_path)
) )
if (args.expose_full_conf): if (args.expose_full_conf):
_sys.stdout.write(_json.dumps(checks, indent = "\t") + "\n") _sys.stdout.write(_json.dumps(conf, indent = "\t") + "\n")
_sys.exit(1) _sys.exit(1)
else: else:
### get state data ### get state data

View file

@ -1,10 +1,12 @@
- parallele Zugriffe auf die Zustands-Datei verhindern - parallele Zugriffe auf die Zustands-Datei verhindern
- fehlertolerantere Implementierung - Benachrichtigungen versenden, wenn ein Zustand sich wieder normalisiert hat (aber vorher über dem Schwellwert oft nicht OK war)
- erneute Benachrichtigung über nicht-OK-Zustand nach einer Weile (siehe https://gitlab.greenscale.de/tools/heimdall/-/issues/3)
- längere Statistiken über Metriken führen um auch Anstiege/Abfälle auszuwerten (z.B. "Speicherplatzverbrauch innerhalb einer Woche um 5GB gestiegen")
- Selbst-Test - Selbst-Test
- Benachrichtigungs-Kanäle: - Benachrichtigungs-Kanäle:
- Matrix - Matrix
- evtl. die Kanäle ganz auslagern und nur als Library anbinden
- Möglichkeit dauerhaft laufen zulassen (evtl. als systemd-Dienst) - Möglichkeit dauerhaft laufen zulassen (evtl. als systemd-Dienst)
- Versionierung - Versionierung
- Test-Routinen - Test-Routinen
- neu schreiben in TypeScript (und plankton dafür nutzen)? - neu schreiben in TypeScript (und plankton dafür nutzen?)
- Benachrichtigungen versenden, wenn ein Zustand sich wieder normalisiert hat (aber vorher über dem Schwellwert oft nicht OK war)