From 8d5d5ed434f99c9dc6f8be54a10bda18cbba0f77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCrkan=20G=C3=BCr?= Date: Thu, 16 Oct 2025 09:00:18 +0200 Subject: [PATCH 1/3] check_systemd_units: Ignore non-actionable scope failures --- src/check_systemd_units.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/check_systemd_units.py b/src/check_systemd_units.py index 32956135..4b7bdfde 100755 --- a/src/check_systemd_units.py +++ b/src/check_systemd_units.py @@ -33,6 +33,7 @@ import collections import datetime import logging +import re import subprocess import sys import time @@ -282,7 +283,18 @@ def check_unit(self, unit_id: str, timer: bool = False) -> CheckResult: ) ) - # Fast state checks first + # Ignore transient user/session units that are managed by logind. + # These come and go with user logins and are arguably harmless: + # * session-*.scope + # * user@.service + # * user-.slice + if re.match( + r'^(session-.*\.scope|user[@-].*\.(service|slice))$', unit_id + ): + logger.debug(f'Ignoring transient user/session unit {unit_id}') + return CheckResult(Codes.OK, '') + + # Fast state checks if unit['LoadState'] != 'loaded' and unit['ActiveState'] != 'inactive': return CheckResult( Codes.CRITICAL, From c56c837b73ac76e89f1a0fbf4a8c32417384eff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCrkan=20G=C3=BCr?= Date: Thu, 5 Feb 2026 11:54:55 +0100 Subject: [PATCH 2/3] check_systemd_units: Fix formatting --- src/check_systemd_units.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/check_systemd_units.py b/src/check_systemd_units.py index 4b7bdfde..a0f76a41 100755 --- a/src/check_systemd_units.py +++ b/src/check_systemd_units.py @@ -62,7 +62,7 @@ def parse_args(): action='append', default=[], help='Units to return critical when failed. Checking a timer unit' - ' will implicitly check the related service unit as well', + ' will implicitly check the related service unit as well', ) parser.add_argument( '-i', @@ -74,14 +74,14 @@ def parse_args(): parser.add_argument( '-w', '--timer-warn', - default=3., + default=3.0, type=float, help='Warning threshold of timer (inactivity/min_monotonic_interval)', ) parser.add_argument( '-c', '--timer-crit', - default=7., + default=7.0, type=float, help='Critical threshold of timer (inactivity/min_monotonic_interval)', ) @@ -119,8 +119,8 @@ def run_checks(args: argparse.Namespace) -> CheckResult: units = parse_units(raw_units) results = process(args, units) - logger.info('Criticals are: {}'.format(results[2])) - logger.info('Warnings are: {}'.format(results[1])) + logger.info(f'Criticals are: {results[2]}') + logger.info(f'Warnings are: {results[1]}') return gen_output(results) @@ -148,9 +148,7 @@ def process( if res_code == Codes.OK: continue - logger.info('Problem for {} is: {} - {}'.format( - unit_id, res_code, res.msg, - )) + logger.info(f'Problem for {unit_id} is: {res_code} - {res.msg}') results[res_code].append((unit_id, res.msg)) return results @@ -176,7 +174,7 @@ def gen_output( else: problems[problem] = unit - logger.info('Problems are: {}'.format(problems)) + logger.info(f'Problems are: {problems}') message += '; '.join([': '.join(i) for i in problems.items()]) return exit_code, message @@ -300,15 +298,15 @@ def check_unit(self, unit_id: str, timer: bool = False) -> CheckResult: Codes.CRITICAL, 'the unit is not loaded but not inactive', ) - elif unit['ActiveState'] == 'failed': + if unit['ActiveState'] == 'failed': return CheckResult(Codes.CRITICAL, 'the unit is failed') - elif unit['LoadState'] != 'loaded': + if unit['LoadState'] != 'loaded': return CheckResult(Codes.WARNING, 'the unit is not loaded') # Check the specifics about the different unit types if unit_id.endswith('.timer'): return self.check_timer(unit_id) - elif unit_id.endswith('.service'): + if unit_id.endswith('.service'): return self.check_service(unit_id, timer) return CheckResult(Codes.OK, '') @@ -409,10 +407,11 @@ def check_intervals(self, unit_id: str) -> CheckResult: # We can check only monotonic triggers for regular execution checked_intervals = ['OnUnitActiveUSec', 'OnUnitInactiveUSec'] intervals = [ - (p[0], p[1]) for p in unit['TimersMonotonic'] + (p[0], p[1]) + for p in unit['TimersMonotonic'] if p[0] in checked_intervals ] - logger.debug('Monotonic timers are: {}'.format(intervals)) + logger.debug(f'Monotonic timers are: {intervals}') if not intervals: return CheckResult(Codes.OK, '') @@ -474,7 +473,7 @@ def _check_interval( return CheckResult( code, f"the timer hasn't been launched since {last_trigger_human}, " - f"look at {service_unit['Id']}" + f'look at {service_unit["Id"]}', ) return CheckResult(Codes.OK, '') From dd5ab6a68d4d1de7c8263b141e8bedadfc3b343b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCrkan=20G=C3=BCr?= Date: Thu, 5 Feb 2026 15:45:38 +0100 Subject: [PATCH 3/3] check_systemd_units: Compile regex --- src/check_systemd_units.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/check_systemd_units.py b/src/check_systemd_units.py index a0f76a41..b08991f9 100755 --- a/src/check_systemd_units.py +++ b/src/check_systemd_units.py @@ -39,6 +39,11 @@ import time import typing +# Compiled regex for transient user/session units +TRANSIENT_UNIT_RE = re.compile( + r'^(session-.*\.scope|user[@-].*\.(service|slice))$' +) + CheckResult = collections.namedtuple('CheckResult', ['code', 'msg']) logging.basicConfig( @@ -286,9 +291,7 @@ def check_unit(self, unit_id: str, timer: bool = False) -> CheckResult: # * session-*.scope # * user@.service # * user-.slice - if re.match( - r'^(session-.*\.scope|user[@-].*\.(service|slice))$', unit_id - ): + if TRANSIENT_UNIT_RE.match(unit_id): logger.debug(f'Ignoring transient user/session unit {unit_id}') return CheckResult(Codes.OK, '')