aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Common/nvidia.py75
-rw-r--r--ticker.py4
2 files changed, 51 insertions, 28 deletions
diff --git a/Common/nvidia.py b/Common/nvidia.py
index 88463b3..3579c7b 100644
--- a/Common/nvidia.py
+++ b/Common/nvidia.py
@@ -7,17 +7,23 @@ import numpy as np
import glob as glob
-def read_gpu_stats(node='vesta1'):
+def read_gpu_stats(node='vesta1', type='tesla'):
"""
Reads stats from CSV file, which was dumped on the GPU nodes.
- This query generates the CSV file. Run on GPU node.
- nvidia-smi --query-gpu=index,uuid,name,temperature.gpu,utilization.gpu,utilization.memory,power.draw --format=csv,noheader > vesta1_`date '+%s'`.csv
+ The following queries generate the CSV files.
+
+ Run on GPU node w/ Tesla Cards:
+ $ nvidia-smi --query-gpu=index,uuid,name,temperature.gpu,utilization.gpu,utilization.memory,power.draw --format=csv,noheader > vesta1_`date '+%s'`.csv
+
+ Run on GPU nodes w/ GTX Cards:
+ $ nvidia-smi --query-gpu=index,uuid,name,temperature.gpu --format=csv,noheader > tasna1_`date '+%s'`.csv
CSV files is named "$node_$unix_epoch_in_seconds.csv".
For example, "vesta1_1454748497.csv".
@param: node - GPU node, used to nconstruct filename [String]
+ @param: type - GPU Type (Tesla|GTX) [String]
@return: df - GPU Stats [Pandas Dataframe]
@return: epoch - Time (Seconds since 01-01-1970) [Integer]
@return: success - Did we get data? [Boolean]
@@ -42,29 +48,46 @@ def read_gpu_stats(node='vesta1'):
# Extract Epoch
epoch = int(fname[:-4].split('_')[-1])
- names_cols = [ 'gpu_id', 'uuid', 'gpu_name', 'gpu_temperature', \
- 'gpu_utilization', 'memory_utilization', 'power_draw' ]
- df = pd.read_csv(fname, sep=', ', header=None, names=names_cols, \
- engine='python')
-
- # Drop Units
- df.power_draw = \
- df.power_draw.apply(lambda a: a.split(' ')[0])
- df.gpu_utilization = \
- df.gpu_utilization.apply(lambda a: a.split(' ')[0])
- df.memory_utilization = \
- df.memory_utilization.apply(lambda a: a.split(' ')[0])
-
- # Fix Types
- df.power_draw = \
- np.asarray(df.power_draw, dtype=np.float64)
- df.gpu_utilization = \
- np.asarray(df.gpu_utilization, dtype=np.float64)
- df.memory_utilization = \
- np.asarray(df.memory_utilization, dtype=np.float64)
-
- # Add Node Column
- df['node'] = pd.Series(["%s" % node]*len(df))
+ # Branch on GPU Type. See header documentation.
+ # Tesla cards report all stats. GTX cards are gimped.
+ if type == 'tesla':
+ names_cols = [ 'gpu_id', 'uuid', \
+ 'gpu_name', \
+ 'gpu_temperature', \
+ 'gpu_utilization', \
+ 'memory_utilization', \
+ 'power_draw' ]
+ df = pd.read_csv(fname, sep=', ', header=None, names=names_cols, \
+ engine='python')
+
+ # Drop Units
+ df.power_draw = \
+ df.power_draw.apply(lambda a: a.split(' ')[0])
+ df.gpu_utilization = \
+ df.gpu_utilization.apply(lambda a: a.split(' ')[0])
+ df.memory_utilization = \
+ df.memory_utilization.apply(lambda a: a.split(' ')[0])
+
+ # Fix Types
+ df.power_draw = \
+ np.asarray(df.power_draw, dtype=np.float64)
+ df.gpu_utilization = \
+ np.asarray(df.gpu_utilization, dtype=np.float64)
+ df.memory_utilization = \
+ np.asarray(df.memory_utilization, dtype=np.float64)
+
+ # Add Node Column
+ df['node'] = pd.Series(["%s" % node]*len(df))
+
+ elif type == 'gtx':
+ names_cols = [ 'gpu_id', 'uuid', \
+ 'gpu_name', \
+ 'gpu_temperature' ]
+ df = pd.read_csv(fname, sep=', ', header=None, names=names_cols, \
+ engine='python')
+
+ # Add Node Column
+ df['node'] = pd.Series(["%s" % node]*len(df))
# Return
return df, epoch, True
diff --git a/ticker.py b/ticker.py
index 6b7d799..a7dd3ea 100644
--- a/ticker.py
+++ b/ticker.py
@@ -75,9 +75,9 @@ line = "room_temperature,room=zbox_room value=%.2f %i" % \
(temperature, netatmo_epoch)
lines.append(line)
-# GPU Stats
+# GPU Stats, Tesla Cards
for gpu_node in [ 'vesta1', 'vesta2' ]:
- df, gpu_epoch, sucess = nvidia.read_gpu_stats(node=gpu_node)
+ df, gpu_epoch, sucess = nvidia.read_gpu_stats(node=gpu_node, type='tesla')
for irow, [ index, row ] in enumerate(df.iterrows()):
line_01 = "gpu_temperature,node=%s,uuid=%s value=%.2f %i" % \
(row.node, row.uuid, row.gpu_temperature, gpu_epoch)