1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
"""
NVIDIA Interaction.
"""
import pandas as pd
import numpy as np
import glob as glob
def read_gpu_stats(node='vesta1', type='tesla'):
"""
Reads stats from CSV file, which was dumped on the GPU nodes.
The following queries generate the CSV files.
Run on GPU node w/ Tesla Cards:
$ nvidia-smi --query-gpu=index,uuid,name,temperature.gpu,utilization.gpu,utilization.memory,power.draw --format=csv,noheader > vesta1_`date '+%s'`.csv
Run on GPU nodes w/ GTX Cards:
$ nvidia-smi --query-gpu=index,uuid,name,temperature.gpu --format=csv,noheader > tasna1_`date '+%s'`.csv
CSV files is named "$node_$unix_epoch_in_seconds.csv".
For example, "vesta1_1454748497.csv".
@param: node - GPU node, used to nconstruct filename [String]
@param: type - GPU Type (Tesla|GTX) [String]
@return: df - GPU Stats [Pandas Dataframe]
@return: epoch - Time (Seconds since 01-01-1970) [Integer]
@return: success - Did we get data? [Boolean]
"""
# Dev
# basedir = 'Test/'
# Prod
basedir = '/home/ics/volker/TmpDash/'
# Globbing
globs = glob.glob("%s/%s_*.csv" % (basedir, node))
globs = sorted(globs)
# Is there anybody out there?
if len(globs) > 0:
# Extract Filename
fname = globs[-1]
# Extract Epoch
epoch = int(fname[:-4].split('_')[-1])
# Branch on GPU Type. See header documentation.
# Tesla cards report all stats. GTX cards are gimped.
if type == 'tesla':
names_cols = [ 'gpu_id', 'uuid', \
'gpu_name', \
'gpu_temperature', \
'gpu_utilization', \
'memory_utilization', \
'power_draw' ]
df = pd.read_csv(fname, sep=', ', header=None, names=names_cols, \
engine='python')
# Drop Units
df.power_draw = \
df.power_draw.apply(lambda a: a.split(' ')[0])
df.gpu_utilization = \
df.gpu_utilization.apply(lambda a: a.split(' ')[0])
df.memory_utilization = \
df.memory_utilization.apply(lambda a: a.split(' ')[0])
# Fix Types
df.power_draw = \
np.asarray(df.power_draw, dtype=np.float64)
df.gpu_utilization = \
np.asarray(df.gpu_utilization, dtype=np.float64)
df.memory_utilization = \
np.asarray(df.memory_utilization, dtype=np.float64)
# Add Node Column
df['node'] = pd.Series(["%s" % node]*len(df))
elif type == 'gtx':
names_cols = [ 'gpu_id', 'uuid', \
'gpu_name', \
'gpu_temperature' ]
df = pd.read_csv(fname, sep=', ', header=None, names=names_cols, \
engine='python')
# Add Node Column
df['node'] = pd.Series(["%s" % node]*len(df))
# Return
return df, epoch, True
else:
# Return
return None, None, False
|