diff --git a/data/machines.json b/data/machines.json index 1ebfd2b..503c69b 100644 --- a/data/machines.json +++ b/data/machines.json @@ -1,54 +1,959 @@ -{ - "clusters": - - { "sherlock":{ - - "name":"sherlock", - "nodes": { - "maxnnodes":8000, - "maxnsockets":2, - "maxncores":24, - "maxnht":2, - "nranks_bc":50000 - }, - "queues":[ - { - "name":"normal", - "maxtime":"24:00:00" - }, - { - "name":"long", - "maxtime":"72:00:00" - }, - { - "name":"dev", - "maxtime":"1:00:00" - } - ] +{ + "clusters": { + "sherlock": { + "partitions": { + "jianq": { + "max": { + "mem-per-cpu": "4096" + }, + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "mem-per-cpu": "4000", + "time": "2:0:0" + } + }, + "yamins": { + "max": { + "mem-per-cpu": "13107" + }, + "default": { + "mem-per-cpu": "12800", + "time": "2:0:0" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "hns": { + "qos": [ + "normal", + "hns", + "system" + ], + "default": { + "qos": "hns", + "time": "2:0:0", + "mem-per-cpu": "4000" + } + }, + "whwong": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "8000" + } + }, + "owners": { + "qos": [ + "normal", + "system" + ], + "default": { + "mem-per-cpu": "4000", + "time": "2:0:0" + } + }, + "mcovert": { + "default": { + "time": "2:0:0", + "mem-per-cpu": "4000" + }, + "qos": [ + "normal", + "long", + "system" + ], + "max": { + "mem-per-cpu": "4096" + } + }, + "hns_gpu": { + "max": { + "mem-per-cpu": "8192" + }, + "qos": [ + "normal", + "system" + ], + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0", + "qos": "hns_gpu" + } + }, + "pelc": { + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0" + }, + "qos": [ + "normal", + "long", + "system" + ], + "max": { + "mem-per-cpu": "8192" + } + }, + "normal": { + "default": { + "mem-per-cpu": "4000", + "time": "2:0:0" + }, + "qos": [ + "normal", + "long", + "system" + ], + "max": { + "mem-per-cpu": "4096" + } + }, + "fkessler": { + "max": { + "mem-per-cpu": "16384" + }, + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "16000" + } + }, + "rbaltman": { + "default": { + "mem-per-cpu": "4000", + "time": "2:0:0", + "qos": "rbaltman" + }, + "qos": [ + "normal", + "rbaltman", + "system" + ] + }, + "stat": { + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "diffenbaugh": { + "default": { + "mem-per-cpu": "4000", + "qos": "diffenbaugh", + "time": "2:0:0" + }, + "qos": [ + "normal", + "diffenbaugh", + "system" + ], + "max": { + "mem-per-cpu": "4096" + } + }, + "spalumbi": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "mem-per-cpu": "16000", + "time": "2:0:0" + }, + "max": { + "mem-per-cpu": "16384" + } + }, + "jhyoon1": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "8000" + }, + "max": { + "mem-per-cpu": "8192" + } + }, + "delp": { + "max": { + "mem-per-cpu": "8192" + }, + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0" + } + }, + "msalit": { + "max": { + "mem-per-cpu": "8192" + }, + "default": { + "time": "2:0:0", + "mem-per-cpu": "8000" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "pande": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "8000" + }, + "max": { + "mem-per-cpu": "8192" + } + }, + "chetty": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "16000" + }, + "max": { + "mem-per-cpu": "16384" + } + }, + "rondror": { + "default": { + "time": "2:0:0", + "qos": "rondror", + "mem-per-cpu": "4000" + }, + "qos": [ + "normal", + "rondror", + "rondror_high", + "system" + ] + }, + "pritch": { + "default": { + "time": "2:0:0", + "mem-per-cpu": "16000" + }, + "qos": [ + "normal", + "long", + "system" + ], + "max": { + "mem-per-cpu": "16384" + } + }, + "hbfraser": { + "qos": [ + "normal", + "hbfraser", + "system" + ], + "default": { + "mem-per-cpu": "4000", + "qos": "hbfraser", + "time": "2:0:0" + } + }, + "ibiis": { + "qos": [ + "normal", + "system" + ], + "default": { + "mem-per-cpu": "16000", + "time": "2:0:0", + "qos": "ibiis" + }, + "max": { + "mem-per-cpu": "16384" + } + }, + "biochem": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "4000" + }, + "max": { + "mem-per-cpu": "4096" + } + }, + "brunger": { + "max": { + "mem-per-cpu": "16384" + }, + "default": { + "qos": "brunger", + "time": "2:0:0", + "mem-per-cpu": "16000" + }, + "qos": [ + "normal", + "brunger", + "system" + ] + }, + "djames": { + "max": { + "mem-per-cpu": "8192" + }, + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "8000" + } + }, + "eriking": { + "max": { + "mem-per-cpu": "8192" + }, + "qos": [ + "normal", + "eriking", + "system" + ], + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0", + "qos": "eriking" + } + }, + "agitler": { + "max": { + "mem-per-cpu": "8192" + }, + "default": { + "time": "2:0:0", + "mem-per-cpu": "8000" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "shenoy": { + "max": { + "mem-per-cpu": "16384" + }, + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "16000" + } + }, + "iric": { + "qos": [ + "normal", + "iric", + "system" + ], + "default": { + "time": "2:0:0", + "qos": "iric", + "mem-per-cpu": "4000" + } + }, + "dev": { + "qos": [ + "normal", + "dev", + "system" + ], + "default": { + "time": "1:0:0", + "qos": "dev", + "mem-per-cpu": "4000" + }, + "max": { + "time": "0-2:0:0", + "mem-per-cpu": "4096" + } + }, + "jonfan": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0" + }, + "max": { + "mem-per-cpu": "8192" + } + }, + "dpwall": { + "default": { + "time": "2:0:0", + "mem-per-cpu": "8000" + }, + "qos": [ + "normal", + "long", + "system" + ], + "max": { + "mem-per-cpu": "8192" + } + }, + "schneidr": { + "max": { + "mem-per-cpu": "16384" + }, + "default": { + "mem-per-cpu": "16000", + "time": "2:0:0" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "jduchi": { + "max": { + "mem-per-cpu": "8192" + }, + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0" + } + }, + "haiwang": { + "max": { + "mem-per-cpu": "4096" + }, + "qos": [ + "normal", + "haiwang", + "system" + ], + "default": { + "time": "2:0:0", + "qos": "haiwang", + "mem-per-cpu": "4000" + } + }, + "abutte": { + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0" + }, + "qos": [ + "normal", + "long", + "system" + ], + "max": { + "mem-per-cpu": "8192" + } + }, + "test": { + "default": { + "mem-per-cpu": "4000" + }, + "max": { + "mem-per-cpu": "4096" + } + }, + "euan": { + "max": { + "mem-per-cpu": "8192" + }, + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0", + "qos": "owner" + } + }, + "cees": { + "max": { + "mem-per-cpu": "4096" + }, + "default": { + "time": "2:0:0", + "mem-per-cpu": "4000" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "lmackey": { + "max": { + "mem-per-cpu": "16384" + }, + "default": { + "time": "2:0:0", + "mem-per-cpu": "16000" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "russpold": { + "default": { + "mem-per-cpu": "8000", + "time": "2:0:0", + "qos": "russpold" + }, + "qos": [ + "normal", + "russpold", + "russpold_interactive", + "system" + ], + "max": { + "mem-per-cpu": "8192" + } + }, + "kornberg": { + "default": { + "time": "2:0:0", + "qos": "kornberg", + "mem-per-cpu": "8000" + }, + "qos": [ + "normal", + "kornberg", + "system" + ] + }, + "amarsden": { + "max": { + "mem-per-cpu": "4096" + }, + "default": { + "time": "2:0:0", + "mem-per-cpu": "4000" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "bigmem": { + "qos": [ + "normal", + "bigmem", + "system" + ], + "default": { + "qos": "bigmem", + "time": "2:0:0", + "mem-per-cpu": "48000" + }, + "max": { + "mem-per-cpu": "49152" + } + }, + "manishad": { + "default": { + "time": "2:0:0", + "mem-per-cpu": "4000" + }, + "qos": [ + "normal", + "long", + "system" + ] + }, + "cee": { + "default": { + "time": "2:0:0", + "qos": "cee", + "mem-per-cpu": "4000" + }, + "qos": [ + "normal", + "cee", + "system" + ], + "max": { + "mem-per-cpu": "4096" + } + }, + "dpetrov": { + "max": { + "mem-per-cpu": "4096" + }, + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "4000" + } + }, + "cbohon": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "4000" + }, + "max": { + "mem-per-cpu": "4096" + } + }, + "gpu": { + "max": { + "mem-per-cpu": "16384" + }, + "qos": [ + "normal", + "gpu", + "system" + ], + "default": { + "mem-per-cpu": "16000", + "time": "2:0:0", + "qos": "gpu" + } + }, + "horence": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "mem-per-cpu": "4000", + "time": "2:0:0" + } + }, + "mc": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "time": "2:0:0", + "mem-per-cpu": "8000" + }, + "max": { + "mem-per-cpu": "8192" + } + }, + "wolak": { + "qos": [ + "normal", + "long", + "system" + ], + "default": { + "mem-per-cpu": "4000", + "time": "2:0:0" + }, + "max": { + "mem-per-cpu": "4096" + } + } }, - "sherlock2": { - "name":"sherlock2", - "nodes": { - "maxnnodes":8000, - "maxnsockets":2, - "maxncores":24, - "maxnht":2, - "nranks_bc":50000 - }, - "queues":[ - { - "name":"normal", - "maxtime":"24:00:00" + "qos": { + "hns_gpu": { + "min": { + "gres": { + "gpu": "1" }, - { - "name":"long", - "maxtime":"72:00:00" - }, - { - "name":"dev", - "maxtime":"1:00:00" + "cpu": "1" + }, + "max": { + "wall": "2-00:00:00" + } + }, + "normal": { + "max": { + "cpu": "512" + }, + "min": { + "cpu": "1" + } + }, + "rbaltman": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "7-00:00:00" + } + }, + "haiwang": { + "max": { + "wall": "14-00:00:00" + }, + "min": { + "cpu": "1" + } + }, + "hns": { + "max": { + "wall": "14-00:00:00" + } + }, + "kornberg": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "7-00:00:00" + } + }, + "diffenbaugh": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "7-00:00:00" + } + }, + "russpold": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "7-00:00:00" + } + }, + "long": { + "max": { + "cpu": "128" + }, + "min": { + "cpu": "1" + } + }, + "cee": { + "max": { + "wall": "7-00:00:00" + }, + "min": { + "cpu": "1" + } + }, + "brunger": { + "max": { + "wall": "7-00:00:00" + }, + "min": { + "cpu": "1" + } + }, + "ibiis": { + "max": { + "wall": "2-00:00:00" + } + }, + "system": { + "min": { + "cpu": "1" + } + }, + "hbfraser": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "7-00:00:00" + } + }, + "rondror_high": { + "max": { + "wall": "2-00:00:00" + }, + "min": { + "cpu": "1" + } + }, + "rondror": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "14-00:00:00" + } + }, + "bigmem": { + "max": { + "cpu": "32", + "node": "1" + }, + "min": { + "mem": "64G", + "cpu": "1" + } + }, + "owner": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "7-00:00:00" + } + }, + "russpold_interactive": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "2-00:00:00" + } + }, + "iric": { + "max": { + "wall": "7-00:00:00" + }, + "min": { + "cpu": "1" + } + }, + "dev": { + "max": { + "node": "2", + "cpu": "2" + }, + "min": { + "cpu": "1" + } + }, + "gpu": { + "min": { + "cpu": "1", + "gres": { + "gpu": "1" } - ] + }, + "max": { + "node": "16", + "cpu": "32" + } + }, + "eriking": { + "min": { + "cpu": "1" + }, + "max": { + "wall": "7-00:00:00" + } + } + }, + "features": [ + "CPU_SNB", + "E5-4640", + "2.40GHz", + "NOACCL", + "CPU_IVY", + "E5-2650v2", + "2.60GHz", + "CPU_HSW", + "E5-2640v3", + "E7-4830v3", + "2.10GHz", + "E5-2698v3", + "2.30GHz", + "E5-2680v3", + "2.50GHz", + "E5-2695v3", + "E5-2640v2", + "2.00GHz", + "GPU_KPL", + "K20X", + "k20x", + "TITAN_BLACK", + "titanblack", + "GPU_MXW", + "TITAN_X", + "titanx", + "K80" + ], + "gres": { + "gpu": [ + "tesla", + "gtx" + ] + }, + "users": { + "*": [ + "normal", + "dev", + "bigmem", + "gpu" + ] } - } + } + } } diff --git a/slurm2json/README.md b/slurm2json/README.md new file mode 100644 index 0000000..cf06be7 --- /dev/null +++ b/slurm2json/README.md @@ -0,0 +1,148 @@ +# slurm2json: Slurm config, QOS, and account converter. + +`slurm2json` is used to convert SLURM configuration into a JSON file that +job-maker can read. + +SLURM configuration is a living thing. During the life of a cluster, several +things regularly take place: + +* New nodes are added, sometime with new features. + +* Partitions are created, typically with the addition of new nodes. + +* New users are created, and added to accounts & groups. + +* Users are removed. + +All of the above actions will cause changes to SLURM's "static" configuration, +as well as to QOS settings, account membership, etc.. + +As SLURM configuration is not static, it's not possible to create a job-maker +JSON file once, and then leave it alone forever. It needs to be a living +thing. For that reason, there needs to be an automated way to read the +information from a cluster, and output the necessary JSON. + +`slurm2json` exists to read the information from a cluster, and output an +intermediary JSON file, which can then be merged with other JSON files to +create the `machines.json` file which job-maker requires. + +# Requirements + +This code uses three modules from CPAN: + +* `JSON` + +* `List::Util`, version 1.45 or later. + +* `Text::CSV` + +Although `List::Util` is a core module, your stock Perl might not have a +new-enough version, so you'll want to pull the latest from CPAN. + +You also need to be running this on a system that has the SLURM client +installed, and which has been configured to communicate with your cluster. +`sacctmgr` is used to gather a number of things from SLURM. + +Also, you should (if possible) run this as root. The reason is, `sacctmgr` +will not provide account information to anyone other than root. If you don't +run this as root, then some of the user access information will be missing: +Specifically, partition access that is granted via `AllowAccounts` won't be +processed. + +# How To Run + +Running slurm2json can be as simple as running… + + perl -wT slurm2json.pl < /etc/slurm/slurm.conf > cluster.json + +… or even … + + ./slurm2json.pl < /etc/slurm/slurm.conf > cluster.json + +*However*, if you are using something like `local::lib`—where Perl modules are +outside of system module paths—then you'll need to explicitly specify those +paths on the command line, using the `-I` option. For example… + + perl -wT -I/home/akkornel/perl5/lib/perl5 -I/home/akkornel/perl5/lib/perl5/5.24.0/x86_64-linux-thread-multi -I/home/akkornel/perl5/lib/perl5/x86_64-linux-thread-multi slurm2json.pl < /etc/slurm/slurm.conf > cluster.json + +This is needed because `slurm2json` runs in Taint mode for safety, and taint +mode ignores non-system paths unless they are explicitly specified on the +command-line. + +## Non-Standard SLURM paths + +`slurm2json` calls `sacctmgr` to get information from SLURM. It assumes that +`sacctmgr` can be found at path `/usr/bin/sacctmgr`. If that is not true, you +will need to change the definition of `$SACCTMGR_PATH`, which is defined near +the top of the file. + +Note that because this code uses Taint mode, the `PATH` environment variable is +not trusted. So, the full path must be specified. + +# Making machines.json + +`slurm2json` outputs an intermediary JSON file, where the root object is a dict +containing a single key: The name of the cluster. The value is another dict, +which contains the cluster-specific entries that we're used to. + +job-maker's `machines.json` also requires that the root object be a dict, but +it expects the root dict to have a key named `clusters`. That key's value is a +dict, which contains one key for each cluster. So, the intermediate output of +`slurm2json` needs to be "pushed down a level", and combined with all of the +other intermediate JSONs, to make a single `machines.json` file. + +The easiest way to manipulate the JSON is to use `jq`. + +If you only have one cluster, you can use this command: + + jq '{ clusters : . }' slurm2json_output.json > machines.json + +That command essentially says, "Output a dict, with one entry, who's key is +'clusters', and whose value is whatever input is coming in.". + +If you have multiple clusters, you'll need to run `slurm2json` on each cluster +(to generate the intermediate JSON files, and then combine the outputs, like so: + + jq -s 'add | { clusters : . }' slurm2json_output1.json slurm2json_output2.json ... > machines.json + +This command is more complicated. The `-s` option "slurps" in all of the +input, making `.` an array of dicts. The `add` filter takes that array of +dicts, and merges them into a single dict. This is OK, because the top-level +keys are cluster names, and clusters *should* be using different names. The +new dict is then processed normally, as if we were working with a single +intermediate JSON file. + +# To-Do + +There are a number of things which aren't done yet. + +* **DenyAccounts**: The `DenyAccounts` partition option isn't being processed + right now. + +* **Groups Resolution**: SLURM is able to restrict partition access to members + of a particular UNIX group. + + The code already builds a list of UNIX groups that have special access. The + global hash `$access_by_group` uses UNIX group names as keys, and the value + is an arrayref of partitions (partitions which the UNIX group can access). + + The work to be done is: Get the list of SLURM users, get each user's group + membership, and grant them access to the partition. This is done by updating + `$cluster{'users'}`: This resolves to a hashref, where the keys are + usernames, and the value is an arrayref of partitions. + + This either requires root access to run `sacctmgr list users`, or you have to + do alot of manual trawling through the results of `getent passwd` and `getent + group`. + +* **Accounts Resolution**: SLURM is able to restrict partition access to + members of a particular "account". In this usage, "account" is basically + another grouping of people, except this grouping is internal to SLURM. + + The code already builds a list of SLURM accounts that have special access. + The global hash `%access_by_account` has SLURM account names as keys, and the + value is an arrayref of partition names. + + The work to be done is, use `sacctmgr` to work out who is in which account, + and then update `$cluster{'users'}` appropriately. This requires root + access, or else `sacctmgr` won't provide the necessary data. diff --git a/slurm2json/slurm2json.pl b/slurm2json/slurm2json.pl new file mode 100755 index 0000000..c420a5a --- /dev/null +++ b/slurm2json/slurm2json.pl @@ -0,0 +1,461 @@ +#!/usr/bin/perl -wT +# vim: ts=4 sw=4 et +# +# slurm2json: Slurm config, QOS, and account converter. +# +# Written By: +# - A. Karl Kornel +# +# Last Updated: 2017-05-15 +# +# Copyright © 2017 the Board of Trustees of the Leland Stanford Junior University. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +use strict; +use warnings; + +use IPC::Open2; +use JSON; +use List::Util 1.45; # 1.45+ needed for uniq() +use Text::CSV; + +my $DEBUG = 0; +my $SACCTMGR_PATH = '/usr/bin/sacctmgr'; + +# Wipe out our environment, for taint-safety. +%ENV = (); + +# This is our cluster information. +my $cluster_name; +my @features; +my %gres; +my %partitions; +my %qos; +my (@access_all, %access_by_account, %access_by_group); + +# Begin processing slurm.conf on standard input +while (1) { + my $line = ; + if (!defined($line)) { + print STDERR "No more lines to parse\n" if $DEBUG; + last; + } + chomp $line; + + # Filter out stuff to ignore + if ((length($line) == 0) + or ($line =~ m|^#|) + or ($line =~ m|^\s+$|)) { + print STDERR "Skipping line $line\n" if $DEBUG; + next; + } + + print STDERR "Looking at $line\n" if $DEBUG; + + # Catch multi-line lines (that is, those ending in \) + # A few cases have whitespace and/or comments after the \, so we must catch that. + while ($line =~ m|\\\s*$|) { + my $extra_line = ; + # Remove whitespace from start/end of line, and comments from end of line. + # But, keep the ending backslash. + $extra_line =~ s|^\s+||; + $extra_line =~ s|\s+(#.*)?$||; + $line =~ s|\\\s*$|$extra_line|; + print STDERR "Appended line $extra_line\n" if $DEBUG; + } + + # Figure out what type of line we have. + my $line_type; + if ($line =~ m|^(\w+)=(.+)$|) { + $line_type = lc($1); + } else { + print "Could not parse the line $line\n"; + exit 1; + } + print STDERR "This line is a $line_type\n" if $DEBUG; + + # We only care about some things + + # We want to capture the ClusterName + if ($line_type eq 'clustername') { + $line =~ m|^ClusterName=(.+)$|i; + $cluster_name = $1; + } + + # For NodeName, we want the feature list, which we add to the global list. + # (We'll handle deduplication later) + # We also want to get Gres stuff + elsif ($line_type eq 'nodename') { + # Pull out the Feature list + if ($line =~ m|Feature="(.+)"|i) { + push @features, (split /,/, $1); + print STDERR "Found features!\n" if $DEBUG; + } else { + print STDERR "No features found\n" if $DEBUG; + } + + # Pull out the Gres list + if ($line =~ m|Gres=([a-z0-9:,]+)|i) { + my $gres_string = $1; + print STDERR "Found Gres line $gres_string\n" if $DEBUG; + foreach my $gres_entry (split(/,/, $gres_string)) { + # Parse out the name, and optional type. + my @gres_params = split(/:/, $gres_entry); + my $gres_name = shift @gres_params; + print STDERR "Gres name is $gres_name\n" if $DEBUG; + + # Make sure the top-level name exists, then add the option + if (!exists($gres{$gres_name})) { + $gres{$gres_name} = []; + } + + # If a Gres type is defined, add it to the list. + # Again, we don't de-duplicate right now. + if (scalar(@gres_params) > 0) { + push @{$gres{$gres_name}}, shift @gres_params; + print STDERR "Gres type is $gres_params[0]\n" if $DEBUG; + } + } + } else { + print STDERR "No Gres found\n" if $DEBUG; + } + } + + # For PartitionName, we want alot of stuff! + elsif ($line_type eq 'partitionname') { + my ($name, $options); + my $hidden; + my (%default, %max, %min, @qos); + my (@allowed_accounts, @allowed_groups); + + # Get the partition name and the rest of the line + if ($line =~ m|^PartitionName=(\w+)\s+(.+)$|i) { + ($name, $options) = ($1, $2); + print STDERR "Name is $name\nOptions are $options\n" if $DEBUG; + } else { + print "Could not extract name from $line\n"; + exit 1; + } + + # Now, let's go through the rest of the options + foreach my $option (split(/\s+/, $options)) { + my ($option_name, $option_value) = split(/=/, $option); + $option_name = lc($option_name); + + # We're going to look for options in this order: + # Hidden, AllowQOS, Defaults (plus QOS), Max, Min, and Allow/Deny. + + # Catch Hidden + if ($option_name eq 'hidden') { + print STDERR "Found Hidden\n"; + if ($option_value eq 'YES') { + $hidden = JSON::true; + } + elsif ($option_value eq 'NO') { + $hidden = JSON::false; + } + else { + print "Invalid Hidden value \"$hidden\" for partition $name\n"; + exit 1; + } + } + + # Build AllowQOS list + elsif ($option_name eq 'allowqos') { + @qos = split(/,/, $option_value); + } + + # Defaults + elsif ($option_name eq 'defmempercpu') { + $default{'mem-per-cpu'} = $option_value; + } + elsif ($option_name eq 'defmempernode') { + $default{'mem-per-node'} = $option_value; + } + elsif ($option_name eq 'defaulttime') { + $default{'time'} = $option_value; + } + elsif ($option_name eq 'qos') { + $default{'qos'} = $option_value; + } + + # Maxima + elsif ($option_name eq 'maxcpuspernode') { + $max{'cpu-per-node'} = $option_value; + } + elsif ($option_name eq 'maxmempercpu') { + $max{'mem-per-cpu'} = $option_value; + } + elsif ($option_name eq 'maxmempernode') { + $max{'mem-per-node'} = $option_value; + } + elsif ($option_name eq 'maxtime') { + $max{'time'} = $option_value; + } + elsif ($option_name eq 'maxnodes') { + $max{'nodes'} = $option_value; + } + + # Minima + elsif ($option_name eq 'minnodes') { + $min{'nodes'} = $option_value; + } + + # Allowed entities + elsif ($option_name eq 'allowaccounts') { + @allowed_accounts = split(/,/, $option_value); + } + elsif ($option_name eq 'allowgroups') { + @allowed_groups = split(/,/, $option_value); + } + + # Denied entities + # TODO: Add support for DenyAccounts + } + + # Skip the DEFAULT partition + if ($name eq 'DEFAULT') { + print STDERR "Skipping DEFAULT partition\n" if $DEBUG; + next; + } + + # Build our partition entry + my %partition_entry; + if (defined($hidden)) { + $partition_entry{'hidden'} = $hidden; + } + if (scalar(@qos) != 0) { + $partition_entry{'qos'} = \@qos; + } + if (scalar(keys(%default)) != 0) { + $partition_entry{'default'} = \%default; + } + if (scalar(keys(%max)) != 0) { + $partition_entry{'max'} = \%max; + } + if (scalar(keys(%min)) != 0) { + $partition_entry{'min'} = \%min; + } + + # Add our partition to the list! + $partitions{$name} = \%partition_entry; + + # Now we need to work out who can access this partition! + + # If there's nobody in the allowed list, then everyone can access! + if (!scalar(@allowed_accounts) and !scalar(@allowed_groups)) { + push @access_all, $name; + } + + # Otherwise, add this partition to the access lists for accounts & groups. + if (scalar(@allowed_accounts) > 0) { + foreach my $account (@allowed_accounts) { + if (!exists($access_by_account{$account})) { + $access_by_account{$account} = [$name]; + } else { + push @{$access_by_account{$account}}, $name; + } + } + } + if (scalar(@allowed_groups) > 0) { + foreach my $group (@allowed_groups) { + if (!exists($access_by_group{$group})) { + $access_by_group{$group} = [$name]; + } else { + push @{$access_by_group{$group}}, $name; + } + } + } + + } # Done with PartitionName +} # Done processing slurm.conf + +# Start running external commands! +my ($sacctmgr_pid, $sacctmgr_stdin, $sacctmgr_stdout); + +# Pull QoS information from SLURM. + +# To help, create an anonymous function that parses a TRES string. +my $tres_parse = sub { + my ($tres_text) = (@_); + my %output; + + foreach my $tres_item (split(/,/, $tres_text)) { + # Each item is name=value + my @tres_components = split(/=/, $tres_item); + + # For most items, processing is simple! + if ($tres_components[0] !~ m|^gres/|) { + $output{$tres_components[0]} = $tres_components[1]; + } + + # For gres components, populate a separate structure. + else { + if (!exists($output{'gres'})) { + $output{'gres'} = {}; + } + + # First, split out the gres string + $tres_components[0] =~ m|^gres/(.+)$|; + my $gres_item = $1; + + # For now, put that entire item in. + $output{'gres'}->{$gres_item} = $tres_components[1]; + # TODO: Properly split out the gres string. + } + } + + # Return our hashref! + return \%output; +}; + +# Run sacctmgr to get the list of QOSes, in machine-readable form, with only what we want. +eval { + my @cmdline = ($SACCTMGR_PATH, qw(-p list qos), + 'Format=Name,MaxTRESPerJob,MaxTRESPerUser,MaxWall,MinTRES', + ); + print STDERR 'Running command: ', join(' ', @cmdline), "\n" if $DEBUG; + $sacctmgr_pid = open2($sacctmgr_stdout, $sacctmgr_stdin, @cmdline); + close($sacctmgr_stdin); + print STDERR "PID is $sacctmgr_pid\n" if $DEBUG; +}; +if ($@) { + print STDERR "sacctmgr error: $@\n" if $DEBUG; + print "WARNING! Unable to run sacctmgr to get QOS info.\n"; + print "JSON will be incomplete.\n"; +} else { + # We'll use Text::CSV to handle parsing of sacctmgr's output. + my $csv = Text::CSV->new({ + 'sep_char' => '|', + 'blank_is_undef' => 1, + 'empty_is_undef' => 1, + }); + + # Pull the first row for column names. + $csv->column_names($csv->getline($sacctmgr_stdout)); + + # Loop through each QoS line. + while (my $qos_entry = $csv->getline_hr($sacctmgr_stdout)) { + # Now we need to look at individual columns + + # First, get the name and create our entry. + my $qos_name = $qos_entry->{'Name'}; + my (%qos_min, %qos_max); + print STDERR "Examining QOS entry $qos_name\n" if $DEBUG; + + # MaxWall is easy to check for. + if (defined($qos_entry->{'MaxWall'})) { + $qos_max{'wall'} = $qos_entry->{'MaxWall'}; + } + + # Parse our minimums. + if (defined($qos_entry->{'MinTRES'})) { + %qos_min = %{$tres_parse->($qos_entry->{'MinTRES'})}; + } + + # Parse our per-user node maxima. + if (defined($qos_entry->{'MaxTRESPU'})) { + %qos_max = %{$tres_parse->($qos_entry->{'MaxTRESPU'})}; + } + + # For per-host maxima, if there's a conflict. + # We assume that user-level overrides host-level TRES. + # TODO: Check if this logic is actually correct. + if (defined($qos_entry->{'MaxTRES'})) { + my %qos_max_host = %{$tres_parse->($qos_entry->{'MaxTRESPU'})}; + + foreach my $conflicting_qos (keys(%qos_max_host)) { + if (!exists($qos_max{$conflicting_qos})) { + $qos_max{$conflicting_qos} = $qos_max_host{$conflicting_qos}; + } + } + } + + # Assemble our QOS components into the global hash. + $qos{$qos_name} = {}; + if (scalar(keys(%qos_min)) > 0) { + $qos{$qos_name}->{'min'} = \%qos_min; + } + if (scalar(keys(%qos_max)) > 0) { + $qos{$qos_name}->{'max'} = \%qos_max; + } + } # Done looping through sacctmgr's QOS output. +} # Done with sacctmgr post-eval code. + +# Clean up sacctmgr's child process. +waitpid($sacctmgr_pid, 0); + + +# Start building our JSON. + + +# Catch if we didn't get a cluster name +if (!defined $cluster_name) { + print "No ClusterName line found. Did you provide a config?\n"; + exit 1; +} + +# Make our cluster hash, starting with partitions. +my %cluster = ( + partitions => \%partitions, +); + +# If we have a populated QOS hash, add them. +if (scalar(keys(%qos)) > 0) { + $cluster{'qos'} = \%qos; +} + +# If we have features, deduplicate and add them. +# NOTE: This doesn't sort the list! +if (scalar(@features) > 0) { + $cluster{'features'} = [List::Util::uniq(@features)]; +} + +# If we have Gres, add them as well. +# If a key has any types, add them as well. Else add an empty list. +# And again, NOTE, this doesn't sort! +if (scalar(keys(%gres)) > 0) { + foreach my $gres_key (keys(%gres)) { + my @gres_types = List::Util::uniq(@{$gres{$gres_key}}); + $gres{$gres_key} = \@gres_types; + } + $cluster{'gres'} = \%gres; +} + +# If we have any users, then set up that hash key. +if ((scalar(@access_all) > 0) or + (scalar(keys(%access_by_account)) > 0) or + (scalar(keys(%access_by_group))) +) { + $cluster{'users'} = {}; +} + +# Add the wildcard user +if (scalar(@access_all) > 0) { + $cluster{'users'}->{'*'} = \@access_all; +} + +# TODO: Handle %access_by_account and %access_by_group + +# Output the JSON: We encode as UTF-8, and we pretty-print +print JSON->new->utf8(1)->pretty(1)->encode({ $cluster_name => \%cluster }); + +print STDERR "All done!\n" if $DEBUG; +exit 0;