|
|
(8 intermediate revisions by the same user not shown) |
Line 1: |
Line 1: |
| + | '''This page is now obsolete. Please go to https://ccm-docs.readthedocs.io ''' |
| + | |
| [[Category:Math compute cluster]] | | [[Category:Math compute cluster]] |
| + | :''Up to [[CCM]]'' |
| ==General Use Information== | | ==General Use Information== |
| | | |
Line 73: |
Line 76: |
| </code> | | </code> |
| | | |
− | ===Hosts=== | + | ===Nodes=== |
− | | + | To see a list of all nodes, use: |
− | There are lots of hosts (servers) in our system and you can view all of them by typing <code>qhost</code> at the prompt.
| |
− | | |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">malingoj@math-compute:~$
| |
− | qhost</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">HOSTNAME
| |
− | ARCH
| |
− | NCPU NSOC NCOR NTHR
| |
− | LOAD MEMTOT MEMUSE SWAPTO SWAPUS</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">----------------------------------------------------------------------------------------------</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">global
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">chem-xenon-c01
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.01
| |
− | 251.4G
| |
− | 2.2G
| |
− | 4.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">chem-xenon-c02
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.03
| |
− | 251.4G
| |
− | 2.1G
| |
− | 4.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">chem-xenon-c03
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 7.90
| |
− | 251.4G
| |
− | 6.0G
| |
− | 4.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">chem-xenon-c04
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 14.95
| |
− | 251.4G 8.2G
| |
− | 4.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">chem-xenon-c05
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.03
| |
− | 251.4G
| |
− | 2.1G
| |
− | 4.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">chem-xenon-c06
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.01
| |
− | 251.4G
| |
− | 2.3G
| |
− | 4.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">clas-pi-c01
| |
− | lx-armhf
| |
− | 4
| |
− | 1 4
| |
− | 4 0.00 976.7M
| |
− | 61.4M 100.0M 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">clas-pi-c02
| |
− | lx-armhf
| |
− | 4
| |
− | 1 4
| |
− | 4 0.00 976.7M
| |
− | 45.9M 100.0M 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c01
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.02
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c02
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.11
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c03
| |
− | lx-amd64
| |
− | 32
| |
− | 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c04
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c05
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c06
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.01
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c07
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c08
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c09
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c10
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c11
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c12
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.15
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c13
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c14
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c15
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c16
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c17
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c18
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c19
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c20
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c21
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c22
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c23
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 62.9G 1.4G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-c24
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | - -</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-colibri-i01
| |
− | lx-amd64
| |
− | 64 4
| |
− | 32 64 3.00
| |
− | 1007.9G 8.0G
| |
− | 932.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-compgen-c01
| |
− | lx-amd64
| |
− | 4
| |
− | 1 4
| |
− | 4 0.00
| |
− | 3.7G 495.6M
| |
− | 34.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-compgen-c02
| |
− | lx-amd64
| |
− | 4
| |
− | 1 4
| |
− | 4 0.00
| |
− | 3.7G 497.2M
| |
− | 34.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-compgen-c03
| |
− | lx-amd64
| |
− | 4
| |
− | 1 4
| |
− | 4 0.00
| |
− | 3.7G 489.7M
| |
− | 34.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-compgen-c04
| |
− | lx-amd64
| |
− | 4
| |
− | 1 4
| |
− | 4 0.00
| |
− | 3.7G 496.4M
| |
− | 34.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c01
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 948.4M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c02
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 953.5M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c03
| |
− | lx-amd64
| |
− | 24
| |
− | 2
| |
− | 12 24 0.00
| |
− | 23.5G 954.2M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c04
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 941.8M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c05
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 946.8M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c06
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 942.1M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c07
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G
| |
− | 1.0G 50.0G
| |
− | 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c08
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 958.7M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c09
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 844.3M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c10
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 862.1M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c11
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.01
| |
− | 23.5G 939.4M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-c12
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 23.5G 844.9M
| |
− | 50.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-i01
| |
− | lx-amd64
| |
− | 24 2
| |
− | 12 24 0.00
| |
− | 94.4G 1.0G
| |
− | 178.3G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-gross-i02
| |
− | lx-amd64
| |
− | 32 2
| |
− | 16 32 0.00
| |
− | 377.9G 1.1G
| |
− | 130.0G 0.0</span></p>
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 10pt; font-family: Consolas; color: black;">math-turing
| |
− | -
| |
− | - -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | -
| |
− | </span></p>
| |
− | | |
− | | |
− | As far as columns go:
| |
− | | |
− | | |
− | NCPU is the number of logical CPUs
| |
− | | |
− | NSOC is the number of physical CPUs sockets
| |
− | | |
− | NCOR is the number of physical cores
| |
− | | |
− | NTHR is the number of threads
| |
− | | |
− | LOAD is the /100 load of the server
| |
− | | |
− | MEMTOT is the total physical memory
| |
− | | |
− | MEMUSE is the total physical memory used
| |
− | | |
− | SWAPTO is the total swap memory
| |
− | | |
− | SWAPUS is the total swap memory used
| |
− | | |
| | | |
− | There are pros and cons of using more threads than physical cores (aka hyperthreading) and in some cases it helps and some it doesn’t. In general, it helps if you’re running a large multithreaded job on one machine, but slows things down if you’re using MPI or running a bunch of small single core jobs. For our purposes and limited budget, it’s usually hyperthreading enabled. Colibri is the main exception as it’s the main cluster for MPI jobs. math-colibri queue will only assign 16 slots per node even though it has 32 threads per node/host/server. When colibri is idle, we can enable colibri hosts into math-all and it will use all 32 threads. Easy answer? Just use math-all.
| + | <pre> |
| + | [jmandel@clas-compute ~]$ sinfo -N |
| + | NODELIST NODES PARTITION STATE |
| + | chem-xenon-c01 1 chem-xenon unk* |
| + | chem-xenon-c02 1 chem-xenon unk* |
| + | chem-xenon-c03 1 chem-xenon unk* |
| + | chem-xenon-c04 1 chem-xenon unk* |
| + | chem-xenon-c05 1 chem-xenon unk* |
| + | chem-xenon-c06 1 chem-xenon unk* |
| + | clas-rcdesktop-01 1 clas-rcdesktop down* |
| + | math-alderaan-c01 1 math-alderaan alloc |
| + | math-alderaan-c02 1 math-alderaan alloc |
| + | math-alderaan-c03 1 math-alderaan alloc |
| + | math-alderaan-c04 1 math-alderaan alloc |
| + | math-alderaan-c05 1 math-alderaan alloc |
| + | math-alderaan-c06 1 math-alderaan alloc |
| + | math-alderaan-c07 1 math-alderaan alloc |
| + | math-alderaan-c08 1 math-alderaan alloc |
| + | math-alderaan-c09 1 math-alderaan alloc |
| + | math-alderaan-c10 1 math-alderaan alloc |
| + | math-alderaan-c11 1 math-alderaan alloc |
| + | math-alderaan-c12 1 math-alderaan alloc |
| + | math-alderaan-c13 1 math-alderaan alloc |
| + | math-alderaan-c14 1 math-alderaan alloc |
| + | math-alderaan-c15 1 math-alderaan alloc |
| + | math-alderaan-c16 1 math-alderaan mix |
| + | math-alderaan-c17 1 math-alderaan idle |
| + | math-alderaan-c18 1 math-alderaan idle |
| + | math-alderaan-c19 1 math-alderaan idle |
| + | math-alderaan-c20 1 math-alderaan idle |
| + | math-alderaan-c21 1 math-alderaan idle |
| + | math-alderaan-c22 1 math-alderaan idle |
| + | math-alderaan-c23 1 math-alderaan idle |
| + | math-alderaan-c24 1 math-alderaan idle |
| + | math-alderaan-c25 1 math-alderaan idle |
| + | math-alderaan-c26 1 math-alderaan idle |
| + | math-alderaan-c27 1 math-alderaan idle |
| + | math-alderaan-c28 1 math-alderaan idle |
| + | math-alderaan-c29 1 math-alderaan idle |
| + | math-alderaan-c30 1 math-alderaan idle |
| + | math-alderaan-c31 1 math-alderaan idle |
| + | math-alderaan-c32 1 math-alderaan idle |
| + | math-alderaan-h01 1 math-alderaan-gpu idle |
| + | math-alderaan-h02 1 math-alderaan-gpu idle |
| + | math-colibri-c01 1 math-colibri-gpu idle |
| + | math-colibri-c02 1 math-colibri-gpu idle |
| + | math-colibri-c03 1 math-colibri-gpu idle |
| + | math-colibri-c04 1 math-colibri-gpu unk* |
| + | math-colibri-c05 1 math-colibri-gpu unk* |
| + | math-colibri-c06 1 math-colibri-gpu unk* |
| + | math-colibri-c07 1 math-colibri-gpu unk* |
| + | math-colibri-c08 1 math-colibri-gpu unk* |
| + | math-colibri-c09 1 math-colibri-gpu unk* |
| + | math-colibri-c10 1 math-colibri-gpu unk* |
| + | math-colibri-c11 1 math-colibri-gpu unk* |
| + | math-colibri-c12 1 math-colibri-gpu unk* |
| + | math-colibri-c13 1 math-colibri-gpu idle |
| + | math-colibri-c14 1 math-colibri-gpu idle |
| + | math-colibri-c15 1 math-colibri-gpu idle |
| + | math-colibri-c16 1 math-colibri-gpu idle |
| + | math-colibri-c17 1 math-colibri-gpu idle |
| + | math-colibri-c18 1 math-colibri-gpu idle |
| + | math-colibri-c19 1 math-colibri-gpu idle |
| + | math-colibri-c20 1 math-colibri-gpu idle |
| + | math-colibri-c21 1 math-colibri-gpu idle |
| + | math-colibri-c22 1 math-colibri-gpu idle |
| + | math-colibri-c23 1 math-colibri-gpu idle |
| + | math-colibri-c24 1 math-colibri-gpu idle |
| + | math-score-c01 1 math-score unk* |
| + | math-score-c02 1 math-score unk* |
| + | math-score-c03 1 math-score idle |
| + | math-score-c04 1 math-score idle |
| + | math-score-c05 1 math-score idle |
| + | </pre> |
| | | |
− | Swap memory is both a blessing and a curse. It’s basically using a slow hard drive (or array of hard drives) for extra memory. While this allows your job to continue to run without blowing up and failing, the CPU has to wait to snag data from that swap so things usually drop to a crawl if you start ‘swapping’. If something is taking forever and you ‘qhost’ and see it at full memory and into swap, let Joe know so he can try to fix it without killing your jobs.
| |
| | | |
− | It looks confusing but there is a method to the madness in the naming convention. Obviously, math-colibri and math-gross is the identifier for what cluster/building the servers are in, but the –c## and –i## stand for compute and interactive. the c## servers are usually part of the queuing system and the i## ones are for interactive use. | + | It looks confusing but there is a method to the madness in the naming convention. Obviously, math-colibri and math-score are the identifiers for what cluster/building the servers are in, but the –c## and –i## stand for compute and interactive. the c## servers are usually part of the queuing system and the i## ones are for interactive use. Again, never ssh to compute nodes directly. |
| | | |
| ==Scheduler Instructions== | | ==Scheduler Instructions== |
Line 542: |
Line 161: |
| ===Submitting a job=== | | ===Submitting a job=== |
| | | |
− | The <code>qsub</code> command is used to submit a job into a queue. Your job should be a script that is accessible to the compute nodes. There are several switches you can add to the qsub command to set the submission options - the most common ones are: | + | The <code>sbatch</code> command is used to submit a job into a queue. Your job should be a script that is accessible to the compute nodes. You can add switches to the <code>sbatch</code> command, but it is recommended to make them a part of your batch script. Here is a sample SLURM batch script: COMING SOON |
− | | |
− | <table class="MsoNormalTable" style="width: 547.5pt;" | |
− | border="1" cellpadding="0" width="0">
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-q
| |
− | [queuename]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Submit
| |
− | the job into a certain queue (you should almost always do this)</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-pe
| |
− | [parallel environment] </span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Submit
| |
− | the job with a specified parallel environment</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | </table>
| |
− | | |
− | By default, a job will run with one "slot" (aka core reservation). If your job is going to use more than one CPU core, use the -pe smp XX where XX is the number of cores your job will consume. (16 in the example below)
| |
− | | |
− | Example:
| |
− | | |
− | <code>qsub -q math-all -pe smp 16 ./awesomescript.sh</code>
| |
| | | |
| ===Viewing queues and job status=== | | ===Viewing queues and job status=== |
| | | |
− | The <code>qstat</code> and <code>qhost</code> command are used to gather information from the scheduler. Some of the most common switches are: | + | The <code>squeue</code> command is used to gather information from the scheduler. Some of the most common switches are: COMING SOON |
− | | |
− | <table class="MsoNormalTable" style="width: 552pt;"
| |
− | border="1" cellpadding="0" width="0">
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-q
| |
− | [queuename]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Request
| |
− | information on a certain queue. If you don't specify this option, all
| |
− | queues will be given</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-f</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Request
| |
− | full output - this is similar to giving a verbose output</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-u
| |
− | [username]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Request
| |
− | list of jobs for a certain user. Use -u '*' to show all users.</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-j
| |
− | [jobid]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Request
| |
− | information on a job. This is useful if your job throws an E (error)
| |
− | code.</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-g
| |
− | c</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Show
| |
− | status for all available queues - shows the used and available
| |
− | resources.</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | </table>
| |
− | | |
− | | |
− | The command <code>qhost</code> will show information on each server managed by the scheduler - mainly number of CPUs/Threads, total memory and total memory used, and total swap memory space and swap memory space used.
| |
− | | |
− | ===Modifying Jobs===
| |
− | | |
− | The <code>qmod</code> and <code>qalter</code> commands are used to modify job settings or statuses.
| |
− | | |
− | The common <code>qmod</code> commands are:
| |
− | | |
− | <table class="MsoNormalTable" style="width: 553.5pt;"
| |
− | border="1" cellpadding="0" width="0">
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-sj
| |
− | [jobid]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">suspend
| |
− | (pause) a job</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-usj
| |
− | [jobid]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">unsuspend
| |
− | (unpause) a job</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-rj
| |
− | [jobid]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">reschedule
| |
− | a job (restart and submit back in queue)</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-cj
| |
− | [jobid]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">clear
| |
− | job error state of a job</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | </table>
| |
− | | |
− | | |
− | The common <code>qalter</code> commands are:
| |
− | | |
− | <table class="MsoNormalTable" style="width: 552.75pt;"
| |
− | border="1" cellpadding="0" width="0">
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-q
| |
− | [queuename]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Change
| |
− | the job queue to the specified queue</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | <tr style="">
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">-pe
| |
− | [parallelenvironment] [slots]</span></p>
| |
− | </td>
| |
− | <td style="padding: 1.5pt;">
| |
− | <p class="MsoNormal"><span
| |
− | style="font-size: 11pt; font-family: "Times New Roman",serif; color: black;">Change
| |
− | the parallel environment and/or number of slots used for the job</span></p>
| |
− | </td>
| |
− | </tr>
| |
− | </table>
| |