[issue-1] [mod] update

This commit is contained in:
Christian Fraß 2023-03-03 19:38:04 +01:00
commit 89d29646bc
10 changed files with 268 additions and 98 deletions

View file

@ -205,6 +205,14 @@
},
"required": []
},
"includes": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "list of relative or absolute paths to other hmdl files on the local machine, which shall be subsumed in the overall monitoring task"
},
"checks": {
"type": "array",
"items": {
@ -613,18 +621,29 @@
"type": "object",
"additionalProperties": false,
"properties": {
"host": {
"ssh_host": {
"type": "string"
},
"port": {
"type": "integer",
"default": 22
"ssh_port": {
"type": [
"null",
"integer"
],
"default": null
},
"user": {
"type": "string"
"ssh_user": {
"type": [
"null",
"string"
],
"default": null
},
"ssh_key": {
"type": "string"
"type": [
"null",
"string"
],
"default": null
},
"mount_point": {
"type": "string",
@ -641,9 +660,7 @@
}
},
"required": [
"host",
"user",
"ssh_key"
"ssh_host"
]
}
},
@ -658,8 +675,5 @@
}
}
},
"required": [
"defaults",
"checks"
]
"required": []
}

14
examples/test-1.hmdl.json Normal file
View file

@ -0,0 +1,14 @@
{
"checks": [
{
"name": "test",
"kind": "file_state",
"parameters": {
"path": "/tmp/test",
"exist": true,
"age_threshold": 60,
"size_threshold": 1
}
}
]
}

12
examples/test-2.hmdl.json Normal file
View file

@ -0,0 +1,12 @@
{
"checks": [
{
"name": "test",
"kind": "script",
"parameters": {
"path": "/tmp/script",
"arguments": []
}
}
]
}

28
examples/test-3.hmdl.json Normal file
View file

@ -0,0 +1,28 @@
{
"defaults": {
"threshold": 1,
"schedule": {
"regular_interval": 10,
"attentive_interval": 1
},
"notifications": [
{
"kind": "console",
"parameters": {
}
}
]
},
"checks": [
{
"name": "test",
"kind": "generic_remote",
"parameters": {
"ssh_host": "bragi.pool.greenscale.de",
"ssh_user": "fenris",
"ssh_port": 8192,
"ssh_key": "/home/fenris/.ssh/keypairs/gs-bragi"
}
}
]
}

View file

@ -1,31 +1,20 @@
{
"defaults": {
},
"checks": [
{
"name": "test",
"threshold": 3,
"annoy": false,
"schedule": {
"regular_interval": 15,
"attentive_interval": 1
},
"notifications": [
{
"kind": "console",
"parameters": {
}
"schedule": {
"regular_interval": 10,
"attentive_interval": 1
},
"notifications": [
{
"kind": "console",
"parameters": {
}
],
"kind": "generic_remote",
"parameters": {
"host" : "iks-vvd-cn.greenscale.lan",
"user" : "root",
"ssh_key" : "/home/christiann/.ssh/id_ed25519.pub",
"mount_point" : "/",
"threshold" : 95,
"strict" : false
}
}
]
},
"includes": [
"test-1.hmdl.json",
"test-2.hmdl.json",
"test-3.hmdl.json"
]
}

View file

@ -8,18 +8,20 @@ class implementation_check_kind_generic_remote(interface_check_kind):
"type": "object",
"additionalProperties": False,
"properties": {
"host" : {
"ssh_host" : {
"type" : "string"
},
"port": {
"type": "integer",
"default": 22
"ssh_port": {
"type": ["null", "integer"],
"default": None
},
"user" : {
"type" : "string"
"ssh_user" : {
"type" : ["null", "string"],
"default": None,
},
"ssh_key" : {
"type" : "string"
"type" : ["null", "string"],
"default": None,
},
"mount_point" : {
"type" : "string",
@ -36,7 +38,7 @@ class implementation_check_kind_generic_remote(interface_check_kind):
}
},
"required": [
"host", "user", "ssh_key"
"ssh_host"
]
}
@ -45,46 +47,68 @@ class implementation_check_kind_generic_remote(interface_check_kind):
[implementation]
'''
def normalize_conf_node(self, node):
if not "host" in node \
or not "user" in node \
or not "ssh_key" in node:
raise ValueError("MISSING STUFF!")
if not "port" in node:
node["port"] = 22
if not "mount_point" in node:
node["mount_point"] = "/"
if not "threshold" in node:
node["threshold"] = 95
if not "strict" in node:
node["strict"] = "/"
if (not "ssh_host" in node):
raise ValueError("mandatory parameter \"ssh_host\" missing")
else:
return dict_merge(
{
"ssh_port": None,
"ssh_user": None,
"ssh_key": None,
"mount_point": "/",
"threshold": 95,
"strict": False,
},
node
)
return node
'''
[implementation]
'''
def run(self, parameters):
SSH_COMMAND = string_coin("ssh -i {{ssh_key}} -p {{port}} {{user}}@{{host}} \"df {{mount_point}} | tr -s ' '\"", parameters)
inner_command = string_coin(
"df {{mount_point}} | tr -s \" \"",
{
"mount_point": parameters["mount_point"],
}
)
retval=shell_command(SSH_COMMAND)
outer_command_parts = []
if True:
outer_command_parts.append("ssh");
if True:
outer_command_parts.append(string_coin("{{host}}", {"host": parameters["ssh_host"]}));
if (parameters["ssh_port"] is not None):
outer_command_parts.append(string_coin("-p {{port}}", {"port": ("%u" % parameters["ssh_port"])}));
if (parameters["ssh_user"] is not None):
outer_command_parts.append(string_coin("-l {{user}}", {"user": parameters["ssh_user"]}));
if (parameters["ssh_key"] is not None):
outer_command_parts.append(string_coin("-i {{key}}", {"key": parameters["ssh_key"]}));
if True:
outer_command_parts.append(string_coin("-o BatchMode=yes", {}))
if True:
outer_command_parts.append(string_coin("'{{inner_command}}'", {"inner_command": inner_command}))
outer_command = " ".join(outer_command_parts)
if retval["return_code"] > 0:
result = shell_command(outer_command)
if (result["return_code"] > 0):
return {
"condition" : enum_condition.unknown,
"info" : {
"error" : retval["stderr"]
"condition": enum_condition.unknown,
"info": {
"error": result["stderr"],
}
}
else:
parts=retval["stdout"].split("\n")[-2].split(" ")
ret={
"device" : parts[0],
"used" : parts[2],
"avail" : parts[3],
"perc" : int(parts[4][:-1])
stuff = result["stdout"].split("\n")[-2].split(" ")
data = {
"device": stuff[0],
"used": stuff[2],
"avail": stuff[3],
"perc": int(stuff[4][:-1]),
}
if ret["perc"] > parameters["threshold"]:
if (data["perc"] > parameters["threshold"]):
return {
"condition": (
enum_condition.critical
@ -92,20 +116,20 @@ class implementation_check_kind_generic_remote(interface_check_kind):
enum_condition.warning
),
"info": {
"ssh_host": parameters["ssh_host"],
"mount_point": parameters["mount_point"],
"device": ret["device"],
"used": ret["used"], # ToDo: Humanlesbarkeit herstellen
"available": ret["avail"], # ToDo: Humanlesbarkeit herstellen
"percentage": str(ret["perc"]) + "%",
"host" : parameters["host"],
"device": data["device"],
"used": data["used"], # ToDo: Humanlesbarkeit herstellen
"available": data["avail"], # ToDo: Humanlesbarkeit herstellen
"percentage": (str(data["perc"]) + "%"),
"faults": [
translation_get("checks.generic_remote.overflow")
]
],
}
}
else:
return {
"condition": enum_condition.ok,
"info" : {}
"info": {}
}

View file

@ -114,6 +114,14 @@ def conf_schema_root(check_kind_implementations, notification_channel_implementa
"required": [
],
},
"includes": {
"type": "array",
"items": {
"type": "string"
},
"default": [],
"description": "list of relative or absolute paths to other hmdl files on the local machine, which shall be subsumed in the overall monitoring task"
},
"checks": {
"type": "array",
"items": {
@ -167,8 +175,6 @@ def conf_schema_root(check_kind_implementations, notification_channel_implementa
}
},
"required": [
"defaults",
"checks",
]
}
@ -284,9 +290,18 @@ def conf_normalize_check(check_kind_implementations, notification_channel_implem
}
def conf_normalize_root(check_kind_implementations, notification_channel_implementations, node):
def conf_normalize_root(
check_kind_implementations,
notification_channel_implementations,
node
):
counts = {}
for node_ in node["checks"]:
checks_raw = (
node["checks"]
if ("checks" in node) else
[]
)
for node_ in checks_raw:
if (node_["name"] not in counts):
counts[node_["name"]] = 0
counts[node_["name"]] += 1
@ -301,9 +316,22 @@ def conf_normalize_root(check_kind_implementations, notification_channel_impleme
)
)
else:
defaults = conf_normalize_defaults(notification_channel_implementations, node["defaults"])
defaults = conf_normalize_defaults(
notification_channel_implementations,
(
node["defaults"]
if ("defaults" in node) else
{}
)
)
includes = (
node["includes"]
if ("includes" in node) else
[]
)
return {
"defaults": defaults,
"includes": includes,
"checks": list(
map(
lambda node_: conf_normalize_check(
@ -312,8 +340,67 @@ def conf_normalize_root(check_kind_implementations, notification_channel_impleme
defaults,
node_
),
node["checks"]
checks_raw
)
)
}
def conf_load(
check_kind_implementations,
notification_channel_implementations,
path,
already_included = None
):
if (already_included is None):
already_included = set([])
if (path in already_included):
raise ValueError("circular dependency detected")
else:
already_included.add(path)
conf_raw = _json.loads(file_read(path))
includes = (
conf_raw["includes"]
if ("includes" in conf_raw) else
[]
)
for index in range(len(includes)):
path_ = includes[index]
sub_conf = conf_load(
check_kind_implementations,
notification_channel_implementations,
(
path_
if _os.path.isabs(path_) else
_os.path.join(_os.path.dirname(path), path_)
),
already_included
)
if (not "checks" in conf_raw):
conf_raw["checks"] = []
conf_raw["checks"].extend(
list(
map(
lambda check: dict_merge(
check,
{
"name": string_coin(
"x{{number}}.{{original_name}}",
{
"number": ("%u" % (index + 1)),
"original_name": check["name"],
}
),
}
),
sub_conf["checks"]
)
)
)
conf_raw["includes"] = []
return conf_normalize_root(
check_kind_implementations,
notification_channel_implementations,
conf_raw
)

View file

@ -14,7 +14,7 @@ def file_write(path, content):
def string_coin(template, arguments):
result = template
for (key, value, ) in arguments.items():
result = result.replace("{{%s}}" % key, str(value))
result = result.replace("{{%s}}" % key, value)
return result

View file

@ -141,13 +141,13 @@ def main():
)
### get configuration data
conf = conf_normalize_root(
conf = conf_load(
check_kind_implementations,
notification_channel_implementations,
_json.loads(file_read(args.conf_path))
_os.path.abspath(args.conf_path)
)
if (args.expose_full_conf):
_sys.stdout.write(_json.dumps(checks, indent = "\t") + "\n")
_sys.stdout.write(_json.dumps(conf, indent = "\t") + "\n")
_sys.exit(1)
else:
### get state data

View file

@ -1,10 +1,12 @@
- parallele Zugriffe auf die Zustands-Datei verhindern
- fehlertolerantere Implementierung
- Benachrichtigungen versenden, wenn ein Zustand sich wieder normalisiert hat (aber vorher über dem Schwellwert oft nicht OK war)
- erneute Benachrichtigung über nicht-OK-Zustand nach einer Weile (siehe https://gitlab.greenscale.de/tools/heimdall/-/issues/3)
- längere Statistiken über Metriken führen um auch Anstiege/Abfälle auszuwerten (z.B. "Speicherplatzverbrauch innerhalb einer Woche um 5GB gestiegen")
- Selbst-Test
- Benachrichtigungs-Kanäle:
- Matrix
- evtl. die Kanäle ganz auslagern und nur als Library anbinden
- Möglichkeit dauerhaft laufen zulassen (evtl. als systemd-Dienst)
- Versionierung
- Test-Routinen
- neu schreiben in TypeScript (und plankton dafür nutzen)?
- Benachrichtigungen versenden, wenn ein Zustand sich wieder normalisiert hat (aber vorher über dem Schwellwert oft nicht OK war)
- neu schreiben in TypeScript (und plankton dafür nutzen?)