From e050a5f4832b178ada29a5ac2dca25071eceb5dd Mon Sep 17 00:00:00 2001 From: Jonas Bardino Date: Tue, 27 Jan 2026 13:59:21 +0100 Subject: [PATCH 1/3] Retire the python2 workarounds for inefficient walk and listdir in `os` module. There is quite a bit of related calling code to update acordingly to completely eliminate the construct. --- mig/shared/fileio.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/mig/shared/fileio.py b/mig/shared/fileio.py index a8a295ab5..809bf4573 100644 --- a/mig/shared/fileio.py +++ b/mig/shared/fileio.py @@ -42,24 +42,10 @@ import zipfile # NOTE: We expose optimized walk function directly for ease and efficiency. -# Requires stand-alone scandir module on python 2 whereas the native os -# functions are built-in and optimized similarly on python 3+ +# The functions are built-in and optimized since python 3 but several +# modules need to adjust before we can eliminate this old workaround. slow_walk, slow_listdir = False, False -if sys.version_info[0] > 2: - from os import walk, listdir -else: - try: - from distutils.version import StrictVersion - from scandir import walk, listdir, __version__ as scandir_version - if StrictVersion(scandir_version) < StrictVersion("1.3"): - # Important os.walk compatibility utf8 fixes were not added until 1.3 - raise ImportError( - "scandir version is too old: fall back to os.walk") - except ImportError as err: - # print("DEBUG: not using scandir: %s" % err) - slow_walk = slow_listdir = True - walk = os.walk - listdir = os.listdir +walk, listdir = os.walk, os.listdir try: from mig.shared.base import force_utf8, force_utf8_rec, force_native_str @@ -571,8 +557,6 @@ def remove_rec(dir_path, configuration): Returns Boolean to indicate success, writes messages to log. """ _logger = configuration.logger - if slow_walk: - _logger.warning("no optimized walk available - using old os.walk") try: if not os.path.isdir(dir_path): raise Exception("Directory %r does not exist" % dir_path) From 4b5c5c01c78cd116617d2f2dae72004e0de7755e Mon Sep 17 00:00:00 2001 From: Jonas Bardino Date: Tue, 27 Jan 2026 14:18:42 +0100 Subject: [PATCH 2/3] Retire external scandir module use and replace it with the built-in `os` function available since python 3. --- mig/server/grid_cron.py | 20 ++------------- mig/server/grid_events.py | 19 +-------------- mig/shared/fileio.py | 2 +- mig/shared/functionality/docs.py | 7 +----- mig/shared/resource.py | 42 ++++++++------------------------ mig/shared/user.py | 36 ++++++--------------------- 6 files changed, 22 insertions(+), 104 deletions(-) diff --git a/mig/server/grid_cron.py b/mig/server/grid_cron.py index 854e289f5..ec0fe33a3 100755 --- a/mig/server/grid_cron.py +++ b/mig/server/grid_cron.py @@ -4,7 +4,7 @@ # --- BEGIN_HEADER --- # # grid_cron - daemon to monitor user crontabs and trigger actions -# Copyright (C) 2003-2024 The MiG Project lead by Brian Vinter +# Copyright (C) 2003-2026 The MiG Project by the Science HPC Center at UCPH # # This file is part of MiG. # @@ -57,22 +57,6 @@ print('ERROR: the python watchdog module is required for this daemon') sys.exit(1) -# Use the scandir module version if available: -# https://github.com/benhoyt/scandir -# Otherwise fail - -try: - from distutils.version import StrictVersion - from scandir import scandir, walk, __version__ as scandir_version - if StrictVersion(scandir_version) < StrictVersion("1.3"): - - # Important os.walk compatibility utf8 fixes were not added until 1.3 - - raise ImportError('scandir version is too old >= 1.3 required') -except ImportError as exc: - print('ERROR: %s' % exc) - sys.exit(1) - from mig.shared.base import force_utf8, client_dir_id, client_id_dir from mig.shared.cmdapi import parse_command_args from mig.shared.conf import get_configuration_object @@ -80,7 +64,7 @@ cron_log_name, cron_log_size, cron_log_cnt, csrf_field from mig.shared.events import get_time_expand_map, parse_crontab, cron_match, \ parse_atjobs, at_remain -from mig.shared.fileio import makedirs_rec +from mig.shared.fileio import makedirs_rec, scandir, walk from mig.shared.handlers import get_csrf_limit, make_csrf_token from mig.shared.job import fill_mrsl_template, new_job from mig.shared.logger import daemon_logger, register_hangup_handler diff --git a/mig/server/grid_events.py b/mig/server/grid_events.py index d9c476fc5..70c987624 100755 --- a/mig/server/grid_events.py +++ b/mig/server/grid_events.py @@ -59,23 +59,6 @@ print('ERROR: the python watchdog module is required for this daemon') sys.exit(1) -# Use the native os.scandir function on python 3+ or rely on similar function -# from the stand-alone module of the same name when on python 2. -if sys.version_info[0] >= 3: - from os import scandir -else: - try: - from distutils.version import StrictVersion - from scandir import scandir, __version__ as scandir_version - if StrictVersion(scandir_version) < StrictVersion("1.3"): - - # Important os.walk compatibility utf8 fixes were not added until 1.3 - - raise ImportError('scandir version is too old >= 1.3 required') - except ImportError as exc: - print('ERROR: this daemon requires the scandir module on python 2') - sys.exit(1) - try: from mig.shared.base import force_utf8 from mig.shared.cmdapi import parse_command_args @@ -83,7 +66,7 @@ from mig.shared.defaults import valid_trigger_changes, workflows_log_name, \ workflows_log_size, workflows_log_cnt, csrf_field, default_vgrid from mig.shared.events import get_path_expand_map - from mig.shared.fileio import makedirs_rec, pickle, unpickle, walk + from mig.shared.fileio import makedirs_rec, pickle, unpickle, scandir, walk from mig.shared.handlers import get_csrf_limit, make_csrf_token from mig.shared.job import fill_mrsl_template, new_job from mig.shared.listhandling import frange diff --git a/mig/shared/fileio.py b/mig/shared/fileio.py index 809bf4573..175eb5580 100644 --- a/mig/shared/fileio.py +++ b/mig/shared/fileio.py @@ -45,7 +45,7 @@ # The functions are built-in and optimized since python 3 but several # modules need to adjust before we can eliminate this old workaround. slow_walk, slow_listdir = False, False -walk, listdir = os.walk, os.listdir +listdir, scandir, walk = os.listdir, os.scandir, os.walk try: from mig.shared.base import force_utf8, force_utf8_rec, force_native_str diff --git a/mig/shared/functionality/docs.py b/mig/shared/functionality/docs.py index e1c1efe6f..e387a9875 100755 --- a/mig/shared/functionality/docs.py +++ b/mig/shared/functionality/docs.py @@ -4,7 +4,7 @@ # --- BEGIN_HEADER --- # # docs - online documentation generator -# Copyright (C) 2003-2022 The MiG Project lead by Brian Vinter +# Copyright (C) 2003-2026 The MiG Project by the Science HPC Center at UCPH # # This file is part of MiG. # @@ -233,11 +233,6 @@ def license_information(configuration, output_objects): 'title': 'Python Watchdog Module at Python Package Index', 'text': 'Python Watchdog Module (Apache 2.0 license)'}) output_objects.append({'object_type': 'text', 'text': ''}) - output_objects.append({'object_type': 'link', - 'destination': 'https://pypi.python.org/pypi/scandir', - 'class': 'urllink iconspace', - 'title': 'Python scandir Module at Python Package Index', - 'text': 'Python scandir Module (New BSD license)'}) output_objects.append({'object_type': 'text', 'text': ''}) output_objects.append({'object_type': 'link', 'destination': 'https://pypi.python.org/pypi/pyenchant', diff --git a/mig/shared/resource.py b/mig/shared/resource.py index fbd5b7720..dc8b985dc 100644 --- a/mig/shared/resource.py +++ b/mig/shared/resource.py @@ -4,7 +4,7 @@ # --- BEGIN_HEADER --- # # resource - resource configuration functions -# Copyright (C) 2003-2024 The MiG Project lead by Brian Vinter +# Copyright (C) 2003-2026 The MiG Project by the Science HPC Center at UCPH # # This file is part of MiG. # @@ -34,24 +34,11 @@ import re import socket -# TODO: move to os.scandir with py3 -# NOTE: Use faster scandir if available -try: - from distutils.version import StrictVersion - from scandir import scandir, __version__ as scandir_version - if StrictVersion(scandir_version) < StrictVersion("1.3"): - # Important os.scandir compatibility utf8 fixes were not added until - # 1.3 - raise ImportError( - "scandir version is too old: fall back to os.listdir") -except ImportError: - scandir = None - from mig.shared.base import client_id_dir from mig.shared.confparser import get_resource_config_dict, run from mig.shared.defaults import exe_leader_name, keyword_auto -from mig.shared.fileio import pickle, move, walk, write_file, read_file_lines, \ - write_file_lines +from mig.shared.fileio import pickle, move, scandir, walk, write_file, \ + read_file_lines, write_file_lines from mig.shared.modified import mark_resource_modified, mark_vgrid_modified from mig.shared.pwcrypto import make_simple_hash from mig.shared.resconfkeywords import get_resource_specs, get_exenode_specs, \ @@ -876,26 +863,17 @@ def write_resource_config(configuration, resource_conf, conf_path): def list_resources(resource_home, only_valid=False): """Return a list of all resources by listing the resource configuration - directories in resource_home. Uses scandir for efficiency when available. - Use only_valid parameter to filter out deleted and broken resources. + directories in resource_home. Uses scandir for efficiency. Use only_valid + parameter to filter out deleted and broken resources. """ resources = [] - if scandir: - children = scandir(resource_home) - else: - children = os.listdir(resource_home) + children = scandir(resource_home) for entry in children: # skip all files and dot dirs - they are NOT resources - if scandir: - name = entry.name - path = entry.path - if not entry.is_dir(): - continue - else: - name = entry - path = os.path.join(resource_home, name) - if not os.path.isdir(path): - continue + name = entry.name + path = entry.path + if not entry.is_dir(): + continue if path.find(os.sep + '.') != -1: continue diff --git a/mig/shared/user.py b/mig/shared/user.py index 940cfbc93..fb33d99b2 100644 --- a/mig/shared/user.py +++ b/mig/shared/user.py @@ -33,23 +33,10 @@ import base64 import os -# TODO: move to os.scandir with py3 -# NOTE: Use faster scandir if available -try: - from distutils.version import StrictVersion - from scandir import scandir, __version__ as scandir_version - if StrictVersion(scandir_version) < StrictVersion("1.3"): - # Important os.scandir compatibility utf8 fixes were not added until - # 1.3 - raise ImportError( - "scandir version is too old: fall back to os.listdir") -except ImportError: - scandir = None - from mig.shared.base import client_dir_id, client_id_dir, get_site_base_url, \ force_native_str, force_utf8 from mig.shared.defaults import litmus_id -from mig.shared.fileio import read_file +from mig.shared.fileio import read_file, scandir from mig.shared.pwcrypto import make_simple_hash from mig.shared.settings import load_settings, load_profile from mig.shared.url import urlencode @@ -66,25 +53,16 @@ def anon_user_id(user_id): def list_users(configuration): """Return a list of all users by listing the user homes in user_home. - Uses scandir for efficiency when available. + Uses scandir for efficiency. """ users = [] - if scandir: - children = scandir(configuration.user_home) - else: - children = os.listdir(configuration.user_home) + children = scandir(configuration.user_home) for entry in children: # skip all files and dot dirs - they are NOT users - if scandir: - name = entry.name - path = entry.path - if entry.is_symlink() or not entry.is_dir(): - continue - else: - name = entry - path = os.path.join(configuration.user_home, name) - if os.path.islink(path) or not os.path.isdir(path): - continue + name = entry.name + path = entry.path + if entry.is_symlink() or not entry.is_dir(): + continue if name.startswith('.'): continue From 18659ff88e9aa3e9666ca334570af1e36b5a9d43 Mon Sep 17 00:00:00 2001 From: Jonas Bardino Date: Tue, 27 Jan 2026 14:43:30 +0100 Subject: [PATCH 3/3] Eliminate the definition and use of `slow_walk` and `slow_listdir` helpers now that we just use efficient os.X versions everywhere. --- mig/shared/fileio.py | 8 ++++---- mig/shared/gridscript.py | 11 ++++------- mig/shared/gridstat.py | 10 +++------- mig/shared/userio.py | 6 ++---- mig/shared/vgrid.py | 4 +--- 5 files changed, 14 insertions(+), 25 deletions(-) diff --git a/mig/shared/fileio.py b/mig/shared/fileio.py index 175eb5580..c9e3a8e4e 100644 --- a/mig/shared/fileio.py +++ b/mig/shared/fileio.py @@ -41,10 +41,10 @@ import time import zipfile -# NOTE: We expose optimized walk function directly for ease and efficiency. -# The functions are built-in and optimized since python 3 but several -# modules need to adjust before we can eliminate this old workaround. -slow_walk, slow_listdir = False, False +# NOTE: We expose efficient walk functions directly as a leftover from back +# when we hooked up scandir versions on python 2 when the native ones +# were highly inefficient. The native functions in the os module are +# optimized since python 3 but several modules still import from here. listdir, scandir, walk = os.listdir, os.scandir, os.walk try: diff --git a/mig/shared/gridscript.py b/mig/shared/gridscript.py index 7a167e1ac..171cc192c 100644 --- a/mig/shared/gridscript.py +++ b/mig/shared/gridscript.py @@ -4,7 +4,7 @@ # --- BEGIN_HEADER --- # # gridscript - main script helper functions -# Copyright (C) 2003-2023 The MiG Project lead by Brian Vinter +# Copyright (C) 2003-2026 The MiG Project by the Science HPC Center at UCPH # # This file is part of MiG. # @@ -36,7 +36,7 @@ from mig.shared.base import client_id_dir from mig.shared.defaults import job_output_dir, ignore_file_names from mig.shared.fileio import send_message_to_grid_script, pickle, unpickle, \ - delete_file, touch, walk, slow_walk + delete_file, touch, walk from mig.shared.notification import notify_user_thread @@ -107,9 +107,6 @@ def check_mrsl_files( check_mrsl_files_start_time = time.time() - if slow_walk: - logger.warning("no optimized walk available - using old os.walk") - # TODO: switch to listdir or glob? all files are in mrsl_files_dir/*/*.mRSL for (root, _, files) in walk(configuration.mrsl_files_dir): @@ -202,7 +199,7 @@ def check_mrsl_files( timestamp=check_mrsl_files_start_time) check_mrsl_files_end_time = time.time() logger.info('finished checking for mRSL files in %fs' % - (check_mrsl_files_end_time-check_mrsl_files_start_time)) + (check_mrsl_files_end_time - check_mrsl_files_start_time)) def remove_jobrequest_pending_files(configuration, only_new=True): @@ -247,7 +244,7 @@ def remove_jobrequest_pending_files(configuration, only_new=True): check_pending_files_end_time = time.time() logger.info('finished cleaning pending jobrequests in %fs' % - (check_pending_files_end_time-check_pending_files_start_time)) + (check_pending_files_end_time - check_pending_files_start_time)) def server_cleanup( diff --git a/mig/shared/gridstat.py b/mig/shared/gridstat.py index 6854facce..7ce88ce5c 100644 --- a/mig/shared/gridstat.py +++ b/mig/shared/gridstat.py @@ -4,7 +4,7 @@ # --- BEGIN_HEADER --- # # gridstat - grid monitor helpers -# Copyright (C) 2003-2021 The MiG Project lead by Brian Vinter +# Copyright (C) 2003-2026 The MiG Project by the Science HPC Center at UCPH # # This file is part of MiG. # @@ -36,7 +36,7 @@ import os from mig.shared.defaults import default_vgrid, pending_states -from mig.shared.fileio import pickle, unpickle, touch, walk, slow_walk +from mig.shared.fileio import pickle, unpickle, touch, walk from mig.shared.serial import pickle as py_pickle from mig.shared.vgrid import validated_vgrid_list, job_fits_res_vgrid @@ -470,10 +470,6 @@ def update(self): # Traverse mRSL dir and update cache - if slow_walk: - self.__logger.warning( - "no optimized walk available - using old os.walk") - for (root, _, files) in walk(root_dir, topdown=True): # skip all dot dirs - they are from repos etc and _not_ jobs @@ -543,7 +539,7 @@ def update(self): if root.find(os.sep + '.') != -1: continue - for name in fnmatch.filter(files, job_id+'.mRSL'): + for name in fnmatch.filter(files, job_id + '.mRSL'): filename = os.path.join(root, name) job_dict = unpickle(filename, configuration.logger) if not job_dict: diff --git a/mig/shared/userio.py b/mig/shared/userio.py index e2330a0b1..4768c06d3 100644 --- a/mig/shared/userio.py +++ b/mig/shared/userio.py @@ -4,7 +4,7 @@ # --- BEGIN_HEADER --- # # userio - wrappers to keep user file I/O in a single replaceable module -# Copyright (C) 2003-2025 The MiG Project by the Science HPC Center at UCPH +# Copyright (C) 2003-2026 The MiG Project by the Science HPC Center at UCPH # # This file is part of MiG. # @@ -40,7 +40,7 @@ from mig.shared.base import invisible_path from mig.shared.defaults import trash_destdir, trash_linkname -from mig.shared.fileio import walk, slow_walk +from mig.shared.fileio import walk from mig.shared.gdp.all import get_project_from_client_id, project_log from mig.shared.vgrid import in_vgrid_legacy_share, in_vgrid_writable, \ in_vgrid_priv_web, in_vgrid_pub_web @@ -208,8 +208,6 @@ def prepare_changes(configuration, operation, changeset, action, path, # Use walk for recursive dir path - silently ignored for file path if not recursive or not os.path.isdir(path): return pending_path - if slow_walk: - _logger.warning("no optimized walk available - using old os.walk") _logger.info('%s walking: %s' % (operation, [path])) for (root, dirs, files) in walk(path, topdown=False, followlinks=True): for (kind, target) in [('files', files), ('dirs', dirs)]: diff --git a/mig/shared/vgrid.py b/mig/shared/vgrid.py index c25503536..c09822812 100644 --- a/mig/shared/vgrid.py +++ b/mig/shared/vgrid.py @@ -47,7 +47,7 @@ vgrid_nest_sep, _dot_vgrid from mig.shared.fileio import make_symlink, move, check_readonly, check_writable, \ check_write_access, unpickle, acquire_file_lock, release_file_lock, walk, \ - slow_walk, remove_rec, move_rec, delete_symlink + remove_rec, move_rec, delete_symlink from mig.shared.findtype import is_user, is_resource from mig.shared.handlers import get_csrf_limit, make_csrf_token from mig.shared.htmlgen import html_post_helper @@ -683,8 +683,6 @@ def vgrid_list_vgrids(configuration, include_default=True, root_vgrid=''): vgrids_list = [] search_root = os.path.join(configuration.vgrid_home, root_vgrid.strip(os.sep)) - if slow_walk: - _logger.warning("no optimized walk available - using old os.walk") for (root, dirs, _) in walk(search_root): # skip all dot dirs - they are from repos etc and _not_ vgrids