aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVolker Hoffmann <volker@cheleb.net>2015-08-26 08:56:36 +0200
committerVolker Hoffmann <volker@cheleb.net>2015-08-26 08:56:36 +0200
commit1684ac7bbc23aaffd489112766b76ce28659ef00 (patch)
tree09f91edb1083043c2c73a1d80cf379eefb7af2d0
parentb29d230eead7008d0d655bd02b1b084333179813 (diff)
feat: compute cluster utilisation
-rw-r--r--Common/derived.py47
-rw-r--r--ticker.py14
2 files changed, 61 insertions, 0 deletions
diff --git a/Common/derived.py b/Common/derived.py
new file mode 100644
index 0000000..fae3da5
--- /dev/null
+++ b/Common/derived.py
@@ -0,0 +1,47 @@
+"""
+Derived Quantities.
+"""
+
+
+def compute_utilization(number_of_nodes_down, \
+ number_of_jobs_by_partition):
+ """
+ Compute Cluster Utilization.
+
+ @todo: Less Hardcoding.
+
+ @param: number_of_nodes_down - [Dict {'partition': 2, ...}]
+ @param: number_of_jobs_by_partition - [Dict {'zbox': {'running': 1}, ...]
+ @return: utilization - [Dict {'cpu': 0.3, 'tasna': 0.5, ..., 'gpu': 0.9}]
+ """
+
+ utilization = {}
+
+ # CPU Utilization (Without Hyperthreading)
+ # 2 Sockets per Server, 8 Cores per Socket, 192 Servers = 3072 Cores
+ total_cpu_cores = 16*192
+ cpu_nodes_down = number_of_nodes_down['cpu']*16
+ allocated_cpu_cores = number_of_jobs_by_partition['cpu']['running']
+ utilization['cpu'] = \
+ float(allocated_cpu_cores) / float(total_cpu_cores - cpu_nodes_down)
+
+ # Tasna Utilization
+ # 4 GTX 590 Boards per Server, 2 GPUs per Board, 5 Servers = 40 Slots
+ total_tasna_slots = 40 - number_of_nodes_down['tasna']
+ allocated_tasna_slots = number_of_jobs_by_partition['tasna']['running']
+ utilization['tasna'] = \
+ float(allocated_tasna_slots) / float(total_tasna_slots)
+
+ # Vesta Utilization
+ # 8 K80 Boards per Server, 2 GPUs per Board, 2 Servers = 32 Slots
+ total_vesta_slots = 32 - number_of_nodes_down['vesta']
+ allocated_vesta_slots = number_of_jobs_by_partition['vesta']['running']
+ utilization['vesta'] = \
+ float(allocated_vesta_slots) / float(total_vesta_slots)
+
+ # GPU Utilization
+ total_gpu_slots = total_tasna_slots + total_vesta_slots
+ allocated_gpu_slots = allocated_tasna_slots + allocated_vesta_slots
+ utilization['gpu'] = float(allocated_gpu_slots) / float(total_gpu_slots)
+
+ return utilization
diff --git a/ticker.py b/ticker.py
index 3b52b08..3cc34ec 100644
--- a/ticker.py
+++ b/ticker.py
@@ -5,6 +5,7 @@ Read, Parse, Post Stats to InfluxDB.
import Common.slurm as slurm
import Common.sensors as sensors
import Common.influx as influx
+import Common.derived as derived
import time
@@ -23,6 +24,13 @@ njobs_by_partition_and_state = \
slurm.get_number_of_jobs_by_partition_and_state()
# #############################################################################
+# Compute Derived Quantities
+# #############################################################################
+utilization_by_partition = \
+ derived.compute_utilization(number_of_nodes_down, \
+ njobs_by_partition_and_state)
+
+# #############################################################################
# Load Data from Netatmo
# #############################################################################
temperature, netatmo_epoch = sensors.get_netatmo_temperature()
@@ -52,6 +60,12 @@ for partition, njobs_by_state in njobs_by_partition_and_state.iteritems():
(state, partition, njobs, epoch)
lines.append(line)
+# Cluster Utilization
+for partition, utilization in utilization_by_partition.iteritems():
+ line = "utilization,partition=%s value=%.6f %i" % \
+ (partition, utilization, epoch)
+ lines.append(line)
+
# Netatmo
line = "room_temperature,room=zbox_room value=%.2f %i" % \
(temperature, netatmo_epoch)