diff options
-rw-r--r-- | ticker.py | 25 |
1 files changed, 13 insertions, 12 deletions
@@ -81,18 +81,19 @@ lines.append(line) # This is not ideal. We should think of a way to record such failures. # https://github.com/influxdata/influxdb/issues/4089 for gpu_node in [ 'vesta1', 'vesta2' ]: - df, gpu_epoch, sucess = nvidia.read_gpu_stats(node=gpu_node, \ - gpu_type='tesla') - for irow, [ index, row ] in enumerate(df.iterrows()): - lines.append("gpu_temperature,node=%s,uuid=%s value=%.2f %i" % \ - (row.node, row.uuid, row.gpu_temperature, gpu_epoch)) - if ~np.isnan(row.power_draw): - lines.append("gpu_power_draw,node=%s,uuid=%s value=%.2f %i" % \ - (row.node, row.uuid, row.power_draw, gpu_epoch)) - lines.append("gpu_utilization,node=%s,uuid=%s value=%.2f %i" % \ - (row.node, row.uuid, row.gpu_utilization, gpu_epoch)) - lines.append("gpu_memory_utilization,node=%s,uuid=%s value=%.2f %i" % \ - (row.node, row.uuid, row.memory_utilization, gpu_epoch)) + df, gpu_epoch, success = nvidia.read_gpu_stats(node=gpu_node, \ + gpu_type='tesla') + if success: + for irow, [ index, row ] in enumerate(df.iterrows()): + lines.append("gpu_temperature,node=%s,uuid=%s value=%.2f %i" % \ + (row.node, row.uuid, row.gpu_temperature, gpu_epoch)) + if ~np.isnan(row.power_draw): + lines.append("gpu_power_draw,node=%s,uuid=%s value=%.2f %i" % \ + (row.node, row.uuid, row.power_draw, gpu_epoch)) + lines.append("gpu_utilization,node=%s,uuid=%s value=%.2f %i" % \ + (row.node, row.uuid, row.gpu_utilization, gpu_epoch)) + lines.append("gpu_memory_utilization,node=%s,uuid=%s value=%.2f %i" % \ + (row.node, row.uuid, row.memory_utilization, gpu_epoch)) # Join data = "\n".join(lines) |