This commit is contained in:
Christian Fraß 2022-11-29 23:53:14 +01:00
commit 82b47ffa14
23 changed files with 1224 additions and 0 deletions

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
.geany
build/

299
hmdl.schema.json Normal file
View file

@ -0,0 +1,299 @@
{
"$defs": {
"active": {
"type": "boolean"
},
"schedule": {
"type": "object",
"additionalProperties": false,
"properties": {
"kind": {
"type": "string",
"enum": [
"minutely",
"hourly",
"daily"
]
}
},
"required": [
"kind"
]
},
"notifications": {
"type": "array",
"item": {
"anyOf": [
{
"type": "object",
"additionalProperties": false,
"properties": {
"kind": {
"type": "string",
"const": "console"
},
"parameters": {
"type": "object",
"additionalProperties": false,
"properties": {
},
"required": [
]
}
},
"required": [
"kind",
"parameters"
]
},
{
"type": "object",
"additionalProperties": false,
"properties": {
"kind": {
"type": "string",
"const": "email"
},
"parameters": {
"type": "object",
"additionalProperties": false,
"properties": {
"access": {
"type": "object",
"additionalProperties": false,
"properties": {
"host": {
"type": "string"
},
"port": {
"type": "integer"
},
"username": {
"type": "string"
},
"password": {
"type": "string"
}
},
"required": [
"host",
"port",
"username",
"password"
]
},
"sender": {
"type": "string"
},
"receivers": {
"type": "array",
"item": {
"type": "string"
}
},
"tags": {
"description": "list of strings, which will be placed in the e-mail subject",
"type": "array",
"item": {
"type": "string"
},
"default": []
}
},
"required": [
"access",
"sender",
"receivers"
]
}
},
"required": [
"kind",
"parameters"
]
}
]
},
"default": [
{
"kind": "console",
"parameters": {
}
}
]
}
},
"type": "object",
"additionalProperties": false,
"properties": {
"defaults": {
"description": "default values for checks",
"type": "object",
"additionalProperties": false,
"properties": {
"active": {
"$ref": "#/$defs/active"
},
"schedule": {
"$ref": "#/$defs/schedule"
},
"notifications": {
"$ref": "#/$defs/notifications"
}
},
"required": [
]
},
"checks": {
"type": "object",
"additionalProperties": {
"allOf": [
{
"description": "should represent a specific check",
"type": "object",
"additionalProperties": false,
"properties": {
"title": {
"type": "string"
},
"active": {
"$ref": "#/$defs/active"
},
"schedule": {
"$ref": "#/$defs/schedule"
},
"notifications": {
"$ref": "#/$defs/notifications"
}
},
"required": [
]
},
{
"anyOf": [
{
"type": "object",
"additionalProperties": false,
"properties": {
"kind": {
"type": "string",
"const": "script"
},
"parameters": {
"type": "object",
"additionalProperties": false,
"properties": {
"path": {
"type": "string"
},
"arguments": {
"type": "array",
"item": {
"type": "string"
}
}
},
"required": [
"path"
]
}
},
"required": [
"kind",
"parameters"
]
},
{
"type": "object",
"additionalProperties": false,
"properties": {
"kind": {
"type": "string",
"const": "http_request"
},
"parameters": {
"type": "object",
"additionalProperties": false,
"properties": {
"request": {
"type": "object",
"additionalProperties": false,
"properties": {
"target": {
"description": "URL",
"type": "string"
},
"method": {
"type": "string",
"enum": [
"GET",
"POST"
],
"default": "GET"
}
},
"required": [
"target"
]
},
"response": {
"type": "object",
"additionalProperties": false,
"properties": {
"status_code": {
"description": "checks whether the response status code is this",
"type": ["null", "integer"],
"default": 200
},
"headers": {
"description": "conjunctively checks header key-value pairs",
"type": "object",
"additionalProperties": {
"description": "header value",
"type": "string"
},
"properties": {
},
"required": [
],
"default": {}
},
"body_part": {
"description": "checks whether the response body contains this string",
"type": "string"
}
},
"required": [
]
},
"as_warning": {
"description": "whether a violation of this check shall be exposed as warning instead of critical; default: false",
"type": "boolean",
"default": false
}
},
"required": [
"request"
]
}
},
"required": [
"kind",
"parameters"
]
}
]
}
]
},
"properties": {
},
"required": [
]
}
},
"required": [
"defaults",
"groups"
]
}

56
readme.md Normal file
View file

@ -0,0 +1,56 @@
# Heimdall
## Beschreibung
- führt Prüfungen durch um den Zustand von Systemen zu überwachen und meldet Unstimmigkeiten
## Technologien
- python
## Erstellung
- `tools/build`
## Austührung
- siehe `build/heimdall -h`
- sollte als minütlich laufender Cronjob aufgerufen werden
### Eigene Skripte
Mittels den Prüfungs-Art `script`, kann man selbst definierte Prüf-Funktionen schreiben. Diese Skripte sollen durch exit-Codes das Ergebnis der Prüfung kommunizieren:
- `0`: alles prima
- `1`: Warnung
- `2`: kritisch
- alles andere: Fehler bei Ausführung (unbekannter Status)
Infos (z.B. was genau schief gelaufen ist) sollen zu `stdout` geschrieben werden.
## Testen
TODO
## Ausrollen
TODO
## Überwachung
(nicht relevant)
## Sicherung
(nicht relevant)

Binary file not shown.

270
source/heimdall.py Executable file
View file

@ -0,0 +1,270 @@
#!/usr/bin/env python3
import sys as _sys
import os as _os
import json as _json
import argparse as _argparse
from lib import *
from implementation_check_kind_script import *
from implementation_check_kind_http_request import *
from implementation_notification_channel_console import *
from implementation_notification_channel_email import *
def state_encode(state):
return {
"timestamp": state["timestamp"],
"condition": condition_encode(state["condition"]),
"count": state["count"],
}
def state_decode(state_encoded):
return {
"timestamp": state_encoded["timestamp"],
"condition": condition_decode(state_encoded["condition"]),
"count": state_encoded["count"],
}
def conf_normalize_check(check_kind_implementations, defaults, name, node):
if ("kind" not in node):
raise ValueError("missing mandatory 'member' field 'kind'")
else:
if (node["kind"] not in check_kind_implementations):
raise ValueError("unhandled kind: %s" % node["kind"])
else:
node_ = dict_merge(
{
"title": name,
"active": True,
"schedule": defaults["schedule"],
"notifications": defaults["notifications"],
"parameters": {},
},
node
)
return {
"title": node_["title"],
"active": node_["active"],
"schedule": node_["schedule"],
"notifications": node_["notifications"],
"kind": node_["kind"],
"parameters": check_kind_implementations[node_["kind"]].normalize_conf_node(node_["parameters"]),
}
def conf_normalize_defaults(node):
return dict_merge(
{
"active": True,
"schedule": {"kind": "hourly"},
"notifications": [],
},
node
)
def conf_normalize_root(check_kind_implementations, node):
return dict(
map(
lambda check_pair: (
check_pair[0],
conf_normalize_check(
check_kind_implementations,
conf_normalize_defaults(node["defaults"]),
check_pair[0],
check_pair[1]
),
),
node["checks"].items()
)
)
def main():
## args
argumentparser = _argparse.ArgumentParser(
description = "monitoring processor",
formatter_class = _argparse.ArgumentDefaultsHelpFormatter
)
argumentparser.add_argument(
"-c",
"--conf-path",
type = str,
default = "conf.json",
dest = "conf_path",
metavar = "<conf-path>",
help = "path to the configuration file"
)
argumentparser.add_argument(
"-s",
"--state-path",
type = str,
default = "/tmp/monitoring-state.json",
dest = "state_path",
metavar = "<state-path>",
help = "path to the state file, which contains information about the recent checks"
)
argumentparser.add_argument(
"-t",
"--threshold",
type = int,
default = 3,
dest = "threshold",
metavar = "<threshold>",
help = "how often a condition has to occur in order to be reported"
)
argumentparser.add_argument(
"-k",
"--keep-notifying",
action = "store_true",
default = False,
dest = "keep_notifying",
help = "whether notifications shall be kept sending after the threshold has been surpassed"
)
argumentparser.add_argument(
"-x",
"--expose-full-conf",
action = "store_true",
default = False,
dest = "expose_full_conf",
help = "only print the extended configuration to stdout and exit (useful for debug purposes)"
)
args = argumentparser.parse_args()
## exec
### load check kind implementations
check_kind_implementations = {
"script": implementation_check_kind_script(),
"http_request": implementation_check_kind_http_request(),
}
### load notification channel implementations
notification_channel_implementations = {
"console": implementation_notification_channel_console(),
"email": implementation_notification_channel_email(),
}
### get configuration data
checks = conf_normalize_root(check_kind_implementations, _json.loads(file_read(args.conf_path)))
if (args.expose_full_conf):
_sys.stdout.write(_json.dumps(checks, indent = "\t") + "\n")
_sys.exit(1)
else:
### get state data
if (not _os.path.exists(args.state_path)):
state_data = {}
file_write(args.state_path, _json.dumps(state_data, indent = "\t"))
else:
state_data = _json.loads(file_read(args.state_path))
### iterate through checks
for (check_name, check_data, ) in checks.items():
if (not check_data["active"]):
pass
else:
### get old state and examine whether the check shall be executed
old_item_state = (
None
if (check_name not in state_data) else
state_decode(state_data[check_name])
)
timestamp = get_current_timestamp()
due = (
(old_item_state is None)
or
(
(old_item_state["count"] is not None)
and
((timestamp - old_item_state["timestamp"]) >= (1 * 5))
)
or
(
(
(check_data["schedule"]["kind"] == "minutely")
and
((timestamp - old_item_state["timestamp"]) >= (60))
)
or
(
(check_data["schedule"]["kind"] == "hourly")
and
((timestamp - old_item_state["timestamp"]) >= (60 * 60))
)
or
(
(check_data["schedule"]["kind"] == "daily")
and
((timestamp - old_item_state["timestamp"]) >= (60 * 60 * 24))
)
)
)
if (not due):
pass
else:
_sys.stderr.write(
string_coin(
"-- {{check_name}}\n",
{
"check_name": check_name,
}
)
)
### execute check and set new state
result = check_kind_implementations[check_data["kind"]].run(check_data)
new_item_state = {
"timestamp": timestamp,
"condition": result["condition"],
"count": (
1
if (
(old_item_state is None)
or
(old_item_state["condition"] != result["condition"])
) else
(
(old_item_state["count"] + 1)
if (
(old_item_state["count"] is not None)
and
((old_item_state["count"] + 1) <= args.threshold)
) else
None
)
),
}
state_data[check_name] = state_encode(new_item_state)
file_write(args.state_path, _json.dumps(state_data, indent = "\t"))
### send notifications
if (
(
(new_item_state["count"] is not None)
and
(new_item_state["count"] == args.threshold)
)
or
(
(new_item_state["count"] is None)
and
args.keep_notifying
)
):
for notification in check_data["notifications"]:
if (notification["kind"] in notification_channel_implementations):
notification_channel_implementations[notification["kind"]].notify(
notification["parameters"],
check_name,
check_data,
new_item_state,
result["output"]
)
else:
raise ValueError("invalid notification kind: %s" % notification["kind"])
main()

View file

@ -0,0 +1,120 @@
class implementation_check_kind_http_request(interface_check_kind):
'''
[implementation]
'''
def normalize_conf_node(self, node):
return dict_merge(
{
"request": {
"method": "GET"
},
"response": {
"status_code": 200
},
"as_warning": False,
},
node,
True
)
'''
[implementation]
'''
def run(self, check_data):
if (check_data["parameters"]["request"]["method"] == "GET"):
method_handled = True
try:
response = _requests.get(
check_data["parameters"]["request"]["target"]
)
error = None
except Exception as error_:
error = error_
response = None
elif (check_data["parameters"]["request"]["method"] == "POST"):
method_handled = True
try:
response = _requests.post(
check_data["parameters"]["request"]["target"]
)
error = None
except Exception as error_:
error = error_
response = None
else:
method_handled = False
response = None
if (not method_handled):
return {
"condition": enum_condition.unknown,
"output": ("invalid HTTP request method: %s" % check_data["parameters"]["request"]["method"])
}
else:
if (response is None):
return {
"condition": (
enum_condition.warning
if check_data["parameters"]["as_warning"] else
enum_condition.critical
),
"output": "HTTP request failed",
}
else:
lines = []
for (key, value, ) in check_data["parameters"]["response"].items():
if (key == "status_code"):
if ((value is None) or (response.status_code == value)):
pass
else:
lines.append(
string_coin(
"actual status code {{status_code_actual}} does not match expected value {{status_code_expected}}",
{
"status_code_actual": ("%u" % response.status_code),
"status_code_expected": ("%u" % value),
}
)
)
elif (key == "headers"):
for (header_key, header_value, ) in value.items():
if (response.headers[header_key] == header_value):
pass
else:
lines.append(
string_coin(
"actual header value for key {{key}} is {{value_actual}} and does not match the expected value {{value_expected}}",
{
"key": header_key,
"value_actual": response.headers[header_key],
"value_expected": header_value,
}
)
)
elif (key == "body_part"):
if (response.text.find(value) >= 0):
pass
else:
lines.append(
string_coin(
"body does not contain the expected part '{{part}}'",
{
"part": value,
}
)
)
else:
raise ValueError("unhandled ")
return {
"condition": (
enum_condition.ok
if (len(lines) <= 0) else
(
enum_condition.warning
if check_data["parameters"]["as_warning"] else
enum_condition.critical
)
),
"output": "\n".join(lines),
}

View file

@ -0,0 +1,37 @@
class implementation_check_kind_script(interface_check_kind):
'''
[implementation]
'''
def normalize_conf_node(self, node):
return dict_merge(
{
},
node
)
'''
[implementation]
'''
def run(self, check_data):
result = _subprocess.run(
[check_data["parameters"]["path"]] + check_data["parameters"]["arguments"],
capture_output = True
)
if (result.returncode == 0):
condition = enum_condition.ok
elif (result.returncode == 1):
condition = enum_condition.unknown
elif (result.returncode == 2):
condition = enum_condition.warning
elif (result.returncode == 3):
condition = enum_condition.critical
else:
raise ValueError("invalid exit code: %i" % result.returncode)
output = result.stdout.decode()
return {
"condition": condition,
"output": output,
}

View file

@ -0,0 +1,16 @@
class implementation_notification_channel_console(interface_notification_channel):
'''
[implementation]
'''
def notify(self, parameters, name, data, state, output):
_sys.stdout.write(
string_coin(
"[{{title}}] <{{condition}}> {{output}}\n",
{
"title": data["title"],
"condition": condition_encode(state["condition"]),
"output": ("(no infos)" if (output is None) else output),
}
)
)

View file

@ -0,0 +1,44 @@
class implementation_notification_channel_email(interface_notification_channel):
'''
[implementation]
'''
def notify(self, parameters, name, data, state, output):
smtp_connection = _smtplib.SMTP(
parameters["access"]["host"]
)
smtp_connection.login(
parameters["access"]["username"],
parameters["access"]["password"]
)
message = MIMEText(
string_coin(
("(no infos)" if (output is None) else output),
{
}
)
)
message["Subject"] = string_coin(
"{{tags}} {{title}}",
{
"tags": " ".join(
map(
lambda tag: ("[%s]" % tag.upper()),
(
parameters["tags"]
+
[condition_encode(state["condition"])]
)
)
),
"title": data["title"],
}
)
message["From"] = parameters["sender"]
message["To"] = ",".join(parameters["receivers"])
smtp_connection.sendmail(
parameters["sender"],
parameters["receivers"],
message.as_string()
)
smtp_connection.quit()

View file

@ -0,0 +1,9 @@
class interface_check_kind(object):
def normalize_conf_node(self, node):
raise NotImplementedError
def run(self, check_data):
raise NotImplementedError

View file

@ -0,0 +1,5 @@
class interface_notification_channel(object):
def notify(self, parameters, name, data, state, output):
raise NotImplementedError

77
source/lib.py Normal file
View file

@ -0,0 +1,77 @@
import enum as _enum
import time as _time
def file_read(path):
handle = open(path, "r")
content = handle.read()
handle.close()
return content
def file_write(path, content):
handle = open(path, "w")
handle.write(content)
handle.close()
def string_coin(template, arguments):
result = template
for (key, value, ) in arguments.items():
result = result.replace("{{%s}}" % key, value)
return result
def get_current_timestamp():
return int(round(_time.time(), 0))
def dict_merge(core_dict, mantle_dict, recursive = False):
result_dict = {}
for current_dict in [core_dict, mantle_dict]:
for (key, value, ) in current_dict.items():
if (not (key in result_dict)):
result_dict[key] = value
else:
if (recursive and (type(result_dict[key]) == dict) and (type(value) == dict)):
result_dict[key] = dict_merge(result_dict[key], value)
elif (recursive and (type(result_dict[key]) == list) and (type(value) == list)):
result_dict[key] = (result_dict[key] + value)
else:
result_dict[key] = value
return result_dict
class enum_condition(_enum.Enum):
unknown = 0
ok = 1
warning = 2
critical = 3
def condition_encode(condition):
if (condition == enum_condition.ok):
return "ok"
elif (condition == enum_condition.unknown):
return "unknown"
elif (condition == enum_condition.warning):
return "warning"
elif (condition == enum_condition.critical):
return "critical"
else:
raise ValueError("unhandled condition: %s" % str(condition))
def condition_decode(condition_encoded):
if (condition_encoded == "ok"):
return enum_condition.ok
elif (condition_encoded == "unknown"):
return enum_condition.unknown
elif (condition_encoded == "warning"):
return enum_condition.warning
elif (condition_encoded == "critical"):
return enum_condition.critical
else:
raise ValueError("unhandled encoded condition: %s" % condition_encoded)

256
source/main.py Normal file
View file

@ -0,0 +1,256 @@
def state_encode(state):
return {
"timestamp": state["timestamp"],
"condition": condition_encode(state["condition"]),
"count": state["count"],
}
def state_decode(state_encoded):
return {
"timestamp": state_encoded["timestamp"],
"condition": condition_decode(state_encoded["condition"]),
"count": state_encoded["count"],
}
def conf_normalize_check(check_kind_implementations, defaults, name, node):
if ("kind" not in node):
raise ValueError("missing mandatory 'member' field 'kind'")
else:
if (node["kind"] not in check_kind_implementations):
raise ValueError("unhandled kind: %s" % node["kind"])
else:
node_ = dict_merge(
{
"title": name,
"active": True,
"schedule": defaults["schedule"],
"notifications": defaults["notifications"],
"parameters": {},
},
node
)
return {
"title": node_["title"],
"active": node_["active"],
"schedule": node_["schedule"],
"notifications": node_["notifications"],
"kind": node_["kind"],
"parameters": check_kind_implementations[node_["kind"]].normalize_conf_node(node_["parameters"]),
}
def conf_normalize_defaults(node):
return dict_merge(
{
"active": True,
"schedule": {"kind": "hourly"},
"notifications": [],
},
node
)
def conf_normalize_root(check_kind_implementations, node):
return dict(
map(
lambda check_pair: (
check_pair[0],
conf_normalize_check(
check_kind_implementations,
conf_normalize_defaults(node["defaults"]),
check_pair[0],
check_pair[1]
),
),
node["checks"].items()
)
)
def main():
## args
argumentparser = _argparse.ArgumentParser(
description = "monitoring processor",
formatter_class = _argparse.ArgumentDefaultsHelpFormatter
)
argumentparser.add_argument(
"-c",
"--conf-path",
type = str,
default = "conf.json",
dest = "conf_path",
metavar = "<conf-path>",
help = "path to the configuration file"
)
argumentparser.add_argument(
"-s",
"--state-path",
type = str,
default = "/tmp/monitoring-state.json",
dest = "state_path",
metavar = "<state-path>",
help = "path to the state file, which contains information about the recent checks"
)
argumentparser.add_argument(
"-t",
"--threshold",
type = int,
default = 3,
dest = "threshold",
metavar = "<threshold>",
help = "how often a condition has to occur in order to be reported"
)
argumentparser.add_argument(
"-k",
"--keep-notifying",
action = "store_true",
default = False,
dest = "keep_notifying",
help = "whether notifications shall be kept sending after the threshold has been surpassed"
)
argumentparser.add_argument(
"-x",
"--expose-full-conf",
action = "store_true",
default = False,
dest = "expose_full_conf",
help = "only print the extended configuration to stdout and exit (useful for debug purposes)"
)
args = argumentparser.parse_args()
## exec
### load check kind implementations
check_kind_implementations = {
"script": implementation_check_kind_script(),
"http_request": implementation_check_kind_http_request(),
}
### load notification channel implementations
notification_channel_implementations = {
"console": implementation_notification_channel_console(),
"email": implementation_notification_channel_email(),
}
### get configuration data
checks = conf_normalize_root(check_kind_implementations, _json.loads(file_read(args.conf_path)))
if (args.expose_full_conf):
_sys.stdout.write(_json.dumps(checks, indent = "\t") + "\n")
_sys.exit(1)
else:
### get state data
if (not _os.path.exists(args.state_path)):
state_data = {}
file_write(args.state_path, _json.dumps(state_data, indent = "\t"))
else:
state_data = _json.loads(file_read(args.state_path))
### iterate through checks
for (check_name, check_data, ) in checks.items():
if (not check_data["active"]):
pass
else:
### get old state and examine whether the check shall be executed
old_item_state = (
None
if (check_name not in state_data) else
state_decode(state_data[check_name])
)
timestamp = get_current_timestamp()
due = (
(old_item_state is None)
or
(
(old_item_state["count"] is not None)
and
((timestamp - old_item_state["timestamp"]) >= (1 * 5))
)
or
(
(
(check_data["schedule"]["kind"] == "minutely")
and
((timestamp - old_item_state["timestamp"]) >= (60))
)
or
(
(check_data["schedule"]["kind"] == "hourly")
and
((timestamp - old_item_state["timestamp"]) >= (60 * 60))
)
or
(
(check_data["schedule"]["kind"] == "daily")
and
((timestamp - old_item_state["timestamp"]) >= (60 * 60 * 24))
)
)
)
if (not due):
pass
else:
_sys.stderr.write(
string_coin(
"-- {{check_name}}\n",
{
"check_name": check_name,
}
)
)
### execute check and set new state
result = check_kind_implementations[check_data["kind"]].run(check_data)
new_item_state = {
"timestamp": timestamp,
"condition": result["condition"],
"count": (
1
if (
(old_item_state is None)
or
(old_item_state["condition"] != result["condition"])
) else
(
(old_item_state["count"] + 1)
if (
(old_item_state["count"] is not None)
and
((old_item_state["count"] + 1) <= args.threshold)
) else
None
)
),
}
state_data[check_name] = state_encode(new_item_state)
file_write(args.state_path, _json.dumps(state_data, indent = "\t"))
### send notifications
if (
(
(new_item_state["count"] is not None)
and
(new_item_state["count"] == args.threshold)
)
or
(
(new_item_state["count"] is None)
and
args.keep_notifying
)
):
for notification in check_data["notifications"]:
if (notification["kind"] in notification_channel_implementations):
notification_channel_implementations[notification["kind"]].notify(
notification["parameters"],
check_name,
check_data,
new_item_state,
result["output"]
)
else:
raise ValueError("invalid notification kind: %s" % notification["kind"])
main()

9
source/packages.py Normal file
View file

@ -0,0 +1,9 @@
import sys as _sys
import os as _os
import subprocess as _subprocess
import argparse as _argparse
import json as _json
import requests as _requests
import smtplib as _smtplib
from email.mime.text import MIMEText

4
todo.md Normal file
View file

@ -0,0 +1,4 @@
- prevent parallel acces to state file?
- more resililient checks
- self check
- notification channel "Matrix"

16
tools/build Executable file
View file

@ -0,0 +1,16 @@
#!/usr/bin/env sh
mkdir -p build
echo "#!/usr/bin/env python3" > build/heimdall
cat \
source/packages.py \
source/lib.py \
source/interface_check_kind.py \
source/implementation_check_kind_script.py \
source/implementation_check_kind_http_request.py \
source/interface_notification_channel.py \
source/implementation_notification_channel_console.py \
source/implementation_notification_channel_email.py \
source/main.py \
>> build/heimdall
chmod +x build/heimdall

3
tools/install Executable file
View file

@ -0,0 +1,3 @@
#!/usr/bin/env sh
cp build/heimdall /usr/local/bin/