From 93c90edb6f7939efb103d0ca092de83fb25aae55 Mon Sep 17 00:00:00 2001 From: Volker Hoffmann Date: Tue, 12 Apr 2016 17:17:15 +0200 Subject: feat: add support for gpu stats from gtx cards --- Common/nvidia.py | 75 ++++++++++++++++++++++++++++++++++++-------------------- ticker.py | 4 +-- 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/Common/nvidia.py b/Common/nvidia.py index 88463b3..3579c7b 100644 --- a/Common/nvidia.py +++ b/Common/nvidia.py @@ -7,17 +7,23 @@ import numpy as np import glob as glob -def read_gpu_stats(node='vesta1'): +def read_gpu_stats(node='vesta1', type='tesla'): """ Reads stats from CSV file, which was dumped on the GPU nodes. - This query generates the CSV file. Run on GPU node. - nvidia-smi --query-gpu=index,uuid,name,temperature.gpu,utilization.gpu,utilization.memory,power.draw --format=csv,noheader > vesta1_`date '+%s'`.csv + The following queries generate the CSV files. + + Run on GPU node w/ Tesla Cards: + $ nvidia-smi --query-gpu=index,uuid,name,temperature.gpu,utilization.gpu,utilization.memory,power.draw --format=csv,noheader > vesta1_`date '+%s'`.csv + + Run on GPU nodes w/ GTX Cards: + $ nvidia-smi --query-gpu=index,uuid,name,temperature.gpu --format=csv,noheader > tasna1_`date '+%s'`.csv CSV files is named "$node_$unix_epoch_in_seconds.csv". For example, "vesta1_1454748497.csv". @param: node - GPU node, used to nconstruct filename [String] + @param: type - GPU Type (Tesla|GTX) [String] @return: df - GPU Stats [Pandas Dataframe] @return: epoch - Time (Seconds since 01-01-1970) [Integer] @return: success - Did we get data? [Boolean] @@ -42,29 +48,46 @@ def read_gpu_stats(node='vesta1'): # Extract Epoch epoch = int(fname[:-4].split('_')[-1]) - names_cols = [ 'gpu_id', 'uuid', 'gpu_name', 'gpu_temperature', \ - 'gpu_utilization', 'memory_utilization', 'power_draw' ] - df = pd.read_csv(fname, sep=', ', header=None, names=names_cols, \ - engine='python') - - # Drop Units - df.power_draw = \ - df.power_draw.apply(lambda a: a.split(' ')[0]) - df.gpu_utilization = \ - df.gpu_utilization.apply(lambda a: a.split(' ')[0]) - df.memory_utilization = \ - df.memory_utilization.apply(lambda a: a.split(' ')[0]) - - # Fix Types - df.power_draw = \ - np.asarray(df.power_draw, dtype=np.float64) - df.gpu_utilization = \ - np.asarray(df.gpu_utilization, dtype=np.float64) - df.memory_utilization = \ - np.asarray(df.memory_utilization, dtype=np.float64) - - # Add Node Column - df['node'] = pd.Series(["%s" % node]*len(df)) + # Branch on GPU Type. See header documentation. + # Tesla cards report all stats. GTX cards are gimped. + if type == 'tesla': + names_cols = [ 'gpu_id', 'uuid', \ + 'gpu_name', \ + 'gpu_temperature', \ + 'gpu_utilization', \ + 'memory_utilization', \ + 'power_draw' ] + df = pd.read_csv(fname, sep=', ', header=None, names=names_cols, \ + engine='python') + + # Drop Units + df.power_draw = \ + df.power_draw.apply(lambda a: a.split(' ')[0]) + df.gpu_utilization = \ + df.gpu_utilization.apply(lambda a: a.split(' ')[0]) + df.memory_utilization = \ + df.memory_utilization.apply(lambda a: a.split(' ')[0]) + + # Fix Types + df.power_draw = \ + np.asarray(df.power_draw, dtype=np.float64) + df.gpu_utilization = \ + np.asarray(df.gpu_utilization, dtype=np.float64) + df.memory_utilization = \ + np.asarray(df.memory_utilization, dtype=np.float64) + + # Add Node Column + df['node'] = pd.Series(["%s" % node]*len(df)) + + elif type == 'gtx': + names_cols = [ 'gpu_id', 'uuid', \ + 'gpu_name', \ + 'gpu_temperature' ] + df = pd.read_csv(fname, sep=', ', header=None, names=names_cols, \ + engine='python') + + # Add Node Column + df['node'] = pd.Series(["%s" % node]*len(df)) # Return return df, epoch, True diff --git a/ticker.py b/ticker.py index 6b7d799..a7dd3ea 100644 --- a/ticker.py +++ b/ticker.py @@ -75,9 +75,9 @@ line = "room_temperature,room=zbox_room value=%.2f %i" % \ (temperature, netatmo_epoch) lines.append(line) -# GPU Stats +# GPU Stats, Tesla Cards for gpu_node in [ 'vesta1', 'vesta2' ]: - df, gpu_epoch, sucess = nvidia.read_gpu_stats(node=gpu_node) + df, gpu_epoch, sucess = nvidia.read_gpu_stats(node=gpu_node, type='tesla') for irow, [ index, row ] in enumerate(df.iterrows()): line_01 = "gpu_temperature,node=%s,uuid=%s value=%.2f %i" % \ (row.node, row.uuid, row.gpu_temperature, gpu_epoch) -- cgit v1.1