diff --git a/roles/services/files/99-local.cfg b/roles/services/files/99-local.cfg new file mode 100644 index 0000000..6e9204b --- /dev/null +++ b/roles/services/files/99-local.cfg @@ -0,0 +1 @@ +command[check_chrony]=/usr/lib/nagios/plugins/check_chrony 1 2 diff --git a/roles/services/files/check_chrony b/roles/services/files/check_chrony new file mode 100644 index 0000000..2ddc6e2 --- /dev/null +++ b/roles/services/files/check_chrony @@ -0,0 +1,162 @@ +#!/usr/bin/python3 + +# Monitor chronyd +# +# Copyright 2019 Bernd Zeimetz +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from subprocess import Popen, PIPE +import optparse +import sys + +broken_example = """Reference ID : 00000000 () +Stratum : 0 +Ref time (UTC) : Thu Jan 01 00:00:00 1970 +System time : 0.000000000 seconds fast of NTP time +Last offset : +0.000000000 seconds +RMS offset : 0.000000000 seconds +Frequency : 8.769 ppm fast +Residual freq : +0.000 ppm +Skew : 0.000 ppm +Root delay : 1.000000000 seconds +Root dispersion : 1.000000000 seconds +Update interval : 0.0 seconds +Leap status : Not synchronised +""" + +NAGIOS_STATUS = { + "OK": 0, + "WARNING": 1, + "CRITICAL": 2, + "UNKNOWN": 3 +} + +parser = optparse.OptionParser() +parser.set_usage("%prog [options]") +parser.add_option( + "-w", "--warning", dest="warning", metavar="WARNING", + type="int", default="1000", + help="time differences in ms (warning)" +) +parser.add_option( + "-c", "--critical", dest="critical", metavar="CRITICAL", + type="int", default="3000", + help="time differences in ms (critical)" +) +parser.add_option( + "-W", "--stratum-warning", dest="stratum_warning", + metavar="STRATUM_WARNING", + type="int", default="3", + help="maximum stratum level (warning)" +) +parser.add_option( + "-C", "--stratum-critical", dest="stratum_critical", + metavar="STRATUM_CRITICAL", + type="int", default="5", + help="maximum stratum level (critical)" +) +(options, args) = parser.parse_args() + +chronyc = Popen( + ['chronyc', 'tracking'], + stdin=PIPE, stdout=PIPE, stderr=PIPE, + bufsize=-1 +) +output, error = chronyc.communicate() + +if chronyc.returncode > 0: + print(("chronyc failed: {}".format(output))) + sys.exit(NAGIOS_STATUS['CRITICAL']) + +output = output.decode().split('\n') +parsed_output = {} + +for line in output: + if ':' not in line: + continue + line = line.split(':') + key = line[0] + value = ':'.join(line[1:]) + key = key.strip() + value = value.strip() + key = key.replace('(', '') + key = key.replace(')', '') + key = key.replace(' ', '_') + key = key.lower() + + parsed_output[key] = value + +if '00000000' in parsed_output['reference_id']: + print("chrony failed to connect to NTP server.") + sys.exit(NAGIOS_STATUS['CRITICAL']) + +if int(parsed_output['stratum']) > options.stratum_critical: + print(( + "chrony stratum too high: {} > {}".format( + parsed_output['stratum'], + options.stratum_critical + ) + )) + sys.exit(NAGIOS_STATUS['CRITICAL']) + +if int(parsed_output['stratum']) > options.stratum_warning: + print(( + "chrony stratum too high: {} > {}".format( + parsed_output['stratum'], + options.stratum_warning + ) + )) + sys.exit(NAGIOS_STATUS['WARNING']) + +system_time = parsed_output['system_time'].split(' ') +system_time_diff_ms = float(system_time[0]) * 1000 +system_time_desc = ' '.join(system_time[1:]) + +if (system_time_diff_ms > options.critical): + print(( + "chrony system time {}ms {}. ({}ms > {}ms)".format( + system_time_diff_ms, + system_time_desc, + system_time_diff_ms, + options.critical + ) + )) + sys.exit(NAGIOS_STATUS['CRITICAL']) + +if (system_time_diff_ms > options.warning): + print(( + "chrony system time {}ms {}. ({}ms > {}ms)".format( + system_time_diff_ms, + system_time_desc, + system_time_diff_ms, + options.warning + ) + )) + sys.exit(NAGIOS_STATUS['WARNING']) + + +print(( + "chrony OK: System Time {}, Stratum {}".format( + parsed_output['system_time'], + parsed_output['stratum'], + ) +)) +sys.exit(NAGIOS_STATUS['OK']) diff --git a/roles/services/handlers/main.yml b/roles/services/handlers/main.yml index 2727cca..da05443 100644 --- a/roles/services/handlers/main.yml +++ b/roles/services/handlers/main.yml @@ -5,3 +5,7 @@ - name: reload-systemd command: cmd: systemctl daemon-reload +- name: restart-nrpe + systemd: + name: nagios-nrpe-server + state: restarted diff --git a/roles/services/tasks/timeserver.yml b/roles/services/tasks/timeserver.yml index 5f3d850..f943b9c 100644 --- a/roles/services/tasks/timeserver.yml +++ b/roles/services/tasks/timeserver.yml @@ -11,5 +11,15 @@ group: root mode: u=rw,g=r,o=r notify: restart-chrony - +- name: Copy NRPE configuration and plugins + copy: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + owner: "{{ item.owner }}" + group: "{{ item.group }}" + mode: "{{ item.mode }}" + loop: + - { src: "99-local.cfg", dest: "/etc/nagios/nrpe.d/99-local.cfg", owner: "nagios", group: "nagios", mode: "0640" } + - { src: "check_chrony", dest: "/usr/lib/nagios/plugins/check_chrony", owner: "root", group: "root", mode: "0755" } + notify: restart-nrpe