Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion docs/config_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,22 @@ System Partition Configuration
List of hosts in a partition that uses the ``ssh`` scheduler.


.. py:attribute:: systems.partitions.sched_options.slurm_multi_cluster_mode

:required: No
:default: ``[]``

List of Slurm clusters to poll for submitted jobs.

If empty, only the local cluster is considered.
If the single value ``all`` is passed, then all clusters will be considered.
This is translated directly to Slurm's ``-M`` option passed to the ``sacct`` or ``squeue`` commands.

This option is relevant only for the Slurm backends.

.. versionadded:: 4.10


.. py:attribute:: systems.partitions.sched_options.ignore_reqnodenotavail

:required: No
Expand Down Expand Up @@ -1647,7 +1663,7 @@ The additional properties for the ``httpjson`` handler are the following:
These may depend on the server configuration.

.. note::
If you specify an authorization header here, it will be evaluated at the start of the test session and potentially expire.
If you specify an authorization header here, it will be evaluated at the start of the test session and potentially expire.
Consider using the :attr:`~config.logging.handlers_perflog..httpjson..authorization_header` parameter instead for dynamic authorization headers.

.. versionadded:: 4.2
Expand Down
18 changes: 15 additions & 3 deletions reframe/core/schedulers/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def __init__(self):
self._sched_access_in_submit = self.get_option(
'sched_access_in_submit'
)
self._multi_clusters = self.get_option('slurm_multi_cluster_mode')
self._available_states = {
'ALLOCATED',
'COMPLETING',
Expand All @@ -155,6 +156,15 @@ def __init__(self):
'RESERVED'
}

# Define the base sacct and squeue commands to account for Slurm's
# multiple cluster mode if enabled
self._sacct = 'sacct'
self._squeue = 'squeue'
if self._multi_clusters:
clusters = ",".join(self._multi_clusters)
self._sacct += f' -M {clusters}'
self._squeue += f' -M {clusters}'

def make_job(self, *args, **kwargs):
return _SlurmJob(*args, **kwargs)

Expand Down Expand Up @@ -491,7 +501,7 @@ def poll(self, *jobs):
)
try:
completed = _run_strict(
f'sacct -S {t_start} -P '
f'{self._sacct} -S {t_start} -P '
f'-j {",".join(job.jobid for job in jobs)} '
f'-o jobid,state,exitcode,end,nodelist'
)
Expand Down Expand Up @@ -570,7 +580,9 @@ def _cancel_if_blocked(self, job, reasons=None):
return

if not reasons:
completed = osext.run_command('squeue -h -j %s -o %%r' % job.jobid)
completed = osext.run_command(
f'{self._squeue} -h -j {job.jobid} -o %r'
)
reasons = completed.stdout.splitlines()
if not reasons:
# Can't retrieve job's state. Perhaps it has finished already
Expand Down Expand Up @@ -677,7 +689,7 @@ def poll(self, *jobs):
# finished already, squeue might return an error about an invalid
# job id.
completed = osext.run_command(
f'squeue -h -j {",".join(job.jobid for job in jobs)} '
f'{self._squeue} -h -j {",".join(job.jobid for job in jobs)} '
f'-o "%%i|%%T|%%N|%%r"'
)

Expand Down
5 changes: 5 additions & 0 deletions reframe/schemas/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@
"type": "array",
"items": {"type": "string"}
},
"slurm_multi_cluster_mode": {
"type": "array",
"items": {"type": "string"}
},
"sched_access_in_submit": {"type": "boolean"},
"unqualified_hostnames": {"type": "boolean"},
"use_nodes_option": {"type": "boolean"}
Expand Down Expand Up @@ -708,6 +712,7 @@
"systems*/sched_options/job_submit_timeout": 60,
"systems*/sched_options/max_sacct_failures": 3,
"systems*/sched_options/sched_access_in_submit": false,
"systems*/sched_options/slurm_multi_cluster_mode": [],
"systems*/sched_options/ssh_hosts": [],
"systems*/sched_options/resubmit_on_errors": [],
"systems*/sched_options/unqualified_hostnames": false,
Expand Down
Loading