Skip to content

Config

Configuration management for slurm-viewer

Classes:

Name Description
Cluster

Cluster definition.

Config

Application config

Tabs

Available tabs

UiSettings

UI settings for the various widgets

Functions:

Name Description
create_default_config

Create a default config.

get_config_filename

Get the path for the config file using these steps:

Cluster

Bases: BaseModel

Cluster definition.

Attributes:

Name Type Description
ignore_partitions list[str]

Partitions to ignore from partitions list.

name str

Name of the cluster for display.

node_name_ignore_prefix list[str]

Prefix to strip from node names (shorten names if a prefix is use, i.e. 'res-hpc-'.

partitions list[str]

Partitions to use when retrieving data.

servers list[str]

List of server names to use, all pointing to the same server (multiple logon nodes for the same cluster).

tabs list[Tabs]

Which tabs to display.

ignore_partitions

ignore_partitions: list[str] = Field(default_factory=list)

Partitions to ignore from partitions list.

name

name: str = ''

Name of the cluster for display.

node_name_ignore_prefix

node_name_ignore_prefix: list[str] = Field(
    default_factory=list
)

Prefix to strip from node names (shorten names if a prefix is use, i.e. 'res-hpc-'.

partitions

partitions: list[str] = Field(default_factory=list)

Partitions to use when retrieving data.

servers

servers: list[str] = Field(
    default=[],
    validation_alias=AliasChoices("server", "servers"),
    description="List of servers to use.",
)

List of server names to use, all pointing to the same server (multiple logon nodes for the same cluster).

tabs

tabs: list[Tabs] = Field(default=[NODES, JOBS, GPU])

Which tabs to display.

Config

Bases: BaseModel

Application config

Methods:

Name Description
get_cluster

Get cluster definition by name.

init

Create a default config.

load

Load config from file.

Attributes:

Name Type Description
clusters list[Cluster]

List of clusters to display.

config_file Path

Get the path to the config file.

ui UiSettings

The UI settings.

clusters

clusters: list[Cluster] = Field(default_factory=list)

List of clusters to display.

config_file

config_file: Path

Get the path to the config file.

ui

The UI settings.

get_cluster

get_cluster(name: str) -> Cluster | None

Get cluster definition by name.

Source code in src/slurm_viewer/data/config.py
def get_cluster(self, name: str) -> Cluster | None:
    """ Get cluster definition by name. """
    for cluster in self.clusters:
        if cluster.name == name:
            return cluster

    return None

init

init() -> Config

Create a default config.

Source code in src/slurm_viewer/data/config.py
@classmethod
def init(cls) -> Config:
    """ Create a default config. """
    cfg: Config | None
    try:
        cfg = Config.load(get_config_filename(Path('settings.toml')))
    except FileNotFoundError:
        cfg = create_default_config()

    if cfg is not None:
        return cfg

    raise RuntimeError('Settings file could not be loaded.')

load

load(_filename: Path | str) -> Config

Load config from file.

Source code in src/slurm_viewer/data/config.py
@classmethod
def load(cls, _filename: Path | str) -> Config:
    """ Load config from file. """
    if not Path(_filename).exists():
        raise FileNotFoundError(f'Settings file "{Path(_filename).absolute().resolve()}" does not exist.')  # noqa: EM102

    try:
        with Path(_filename).open('r', encoding='utf-8') as settings_file:
            toml_content = tomlkit.loads(settings_file.read())
            setting = Config(**cast(dict, toml_content))

            if len(setting.ui.nodes_filter_rules) == 0:
                setting.ui.nodes_filter_rules = [
                    FilterRule(False, 'Is Healthy', 'healthy', UnaryOperator.PRESENT),
                    FilterRule(False, 'Is Available', 'available', UnaryOperator.PRESENT),
                    FilterRule(False, 'Is GPU Node', 'gpu_tot', UnaryOperator.PRESENT),
                    FilterRule(False, '# GPU Available', 'gpu_avail', BinaryOperator.GE, 1),
                    FilterRule(False, '# CPU Available', 'cpu_avail', BinaryOperator.GE, 4),
                ]
    except (tomlkit.exceptions.ParseError, tomlkit.exceptions.UnexpectedCharError) as e:
        raise RuntimeError(f'Error parsing settings file: {_filename}: {e}.') from e  # noqa: EM102

    setting._config_file = Path(_filename).absolute().resolve()
    return setting

Tabs

Bases: str, Enum

Available tabs

Attributes:

Name Type Description
GPU

GPU usage widget

JOBS

Jobs/Queue widget

NODES

Nodes widget

STATUS

Status widget

GPU

GPU = 'gpu'

GPU usage widget

JOBS

JOBS = 'jobs'

Jobs/Queue widget

NODES

NODES = 'nodes'

Nodes widget

STATUS

STATUS = 'status'

Status widget

UiSettings

Bases: BaseModel

UI settings for the various widgets

Attributes:

Name Type Description
auto_refresh bool

Auto-refresh data

node_columns list[str]

Columns for the noded widget.

priority_columns list[str]

Columns for the priority widget.

queue_columns list[str]

Columns for the queue widget.

refresh_interval int

Refresh interval in seconds.

user_only bool

Display only current user jobs or all jobs.

auto_refresh

auto_refresh: bool = False

Auto-refresh data

node_columns

node_columns: list[str] = Field(default_factory=list)

Columns for the noded widget.

priority_columns

priority_columns: list[str] = Field(default_factory=list)

Columns for the priority widget.

queue_columns

queue_columns: list[str] = Field(default_factory=list)

Columns for the queue widget.

refresh_interval

refresh_interval: int = 30

Refresh interval in seconds.

user_only

user_only: bool = False

Display only current user jobs or all jobs.

create_default_config

create_default_config() -> Config | None

Create a default config.

Source code in src/slurm_viewer/data/config.py
def create_default_config() -> Config | None:
    """ Create a default config. """
    config = Config()
    config.ui.node_columns = [
        "node_name",
        "state",
        "gpu_tot",
        "gpu_alloc",
        "gpu_avail",
        "gpu_type",
        "gpu_mem",
        "cpu_tot",
        "cpu_alloc",
        "cpu_avail",
        "mem_tot",
        "mem_avail",
        "cpu_gpu",
        "mem_gpu",
        "partitions",
        "active_features"
    ]
    config.ui.queue_columns = [
        "user",
        "job_id",
        "reason",
        "exec_host",
        "start_time",
        "submit_time",
        "start_delay",
        "run_time",
        "time_limit",
        "command",
        "work_dir"
    ]
    config.ui.priority_columns = [
        "user_name",
        "job_id",
        "job_priority_n",
        "age_n",
        "fair_share_n",
        "partition_name"
    ]

    config.ui.nodes_filter_rules = [
        FilterRule(False, 'Is Healthy', 'healthy', UnaryOperator.PRESENT),
        FilterRule(False, 'Is Available', 'available', UnaryOperator.PRESENT),
        FilterRule(False, 'Is GPU Node', 'gpu_tot', UnaryOperator.PRESENT),
        FilterRule(False, '# GPU Available', 'gpu_avail', BinaryOperator.GE, 1),
        FilterRule(False, '# CPU Available', 'cpu_avail', BinaryOperator.GE, 4),
    ]

    # pylint: disable=no-member
    config.clusters.append(Cluster())
    # pylint: enable=no-member

    config_path = Path('~/.config/slurm-viewer/settings.toml').expanduser().resolve()
    config_path.parent.mkdir(exist_ok=True, parents=True)

    if config_path.exists():
        overwrite = input('Config file already exists, overwrite? [Y/n] ')
        if overwrite.lower() == 'n':
            print(f'Skipping config file generation, file already exists: {config_path}')  # noqa: T201
            return None

    with open(config_path, 'w', encoding='utf-8') as settings_file:
        doc = tomlkit.document()
        ui = tomlkit.table()
        ui.update(**config.ui.model_dump())
        doc['ui'] = ui

        clusters = tomlkit.aot()
        cluster = Cluster().model_dump()
        cluster['servers'] = 'None'
        cluster['tabs'] = ['nodes', 'jobs']
        clusters.append(tomlkit.item(cluster))
        doc['clusters'] = clusters

        tomlkit.dump(doc, settings_file, sort_keys=True)
    print(f'Config file generated: {config_path}')  # noqa: T201

    return config

get_config_filename

get_config_filename(filename: Path) -> Path

Get the path for the config file using these steps: - Use the 'SLURM_VIEW_CONFIG' environment variable - Use the path provided (if it exists) - Use the '~/.config/slurm-viewer/settings.toml' file (if it exists)

Source code in src/slurm_viewer/data/config.py
def get_config_filename(filename: Path) -> Path:
    """ Get the path for the config file using these steps:
    - Use the 'SLURM_VIEW_CONFIG' environment variable
    - Use the path provided (if it exists)
    - Use the '~/.config/slurm-viewer/settings.toml' file (if it exists)
    """
    if 'SLURM_VIEW_CONFIG' in os.environ:
        filename = Path(os.environ['SLURM_VIEW_CONFIG'])
        if filename.exists():
            return filename

    if filename.exists():
        return filename

    filename = Path.home() / '.config/slurm-viewer/settings.toml'
    if filename.exists():
        return filename

    raise FileNotFoundError('Settings file could not be found. ')

Example

# General settings
[ui]
node_columns = [
    "node_name",
    "state",
    "gpu_tot",
    "gpu_avail",
    "gpu_type",
    "gpu_mem",
    "cpu_tot",
    "cpu_avail",
    "mem_tot",
    "mem_avail",
    "load_norm",
    "reason",
    "available_features"
]
queue_columns = [
    "user",
    "account",
    "exec_host",
    "reason",
    "start_delay",
    "run_time",
    "time_limit",
    "req_nodes",
    "excluded_nodes",
    "cpus",
    "name",
    "partition",
]
refresh_interval = 10
auto_refresh = "False"
user_only = "False"

[[clusters]]
name = "MySlurm"
partitions = ["gpu-short", "gpu-medium", "gpu-long"]
ignore_partitions = []
node_name_ignore_prefix = ["res-hpc-"]
servers = ["logon_node_1", "logon_node_2"]
tabs = ["nodes", "jobs", "gpu", "status"]