267 lines
12 KiB
Python
267 lines
12 KiB
Python
import xmltodict
|
|
import json
|
|
import subprocess
|
|
import os
|
|
import re
|
|
|
|
def _run_nvidia_smi():
|
|
result = subprocess.run(['nvidia-smi', '-q', '-x'], stdout=subprocess.PIPE)
|
|
result = result.stdout.decode('utf-8')
|
|
|
|
return result
|
|
|
|
def _run_rocm_smi():
|
|
try:
|
|
result = subprocess.run(['rocm-smi', '-a', '--json'], stdout=subprocess.PIPE)
|
|
result = result.stdout.decode('utf-8')
|
|
except:
|
|
result = subprocess.run(['/opt/rocm/bin/rocm-smi', '-a', '--json'], stdout=subprocess.PIPE)
|
|
result = result.stdout.decode('utf-8')
|
|
|
|
result = json.loads(result)
|
|
|
|
return result
|
|
|
|
def _read_file_number(path):
|
|
with open(path, "r") as f:
|
|
number = f.read().split("\n")[0]
|
|
return float(number)
|
|
|
|
def _intel():
|
|
from time import sleep
|
|
stats = {}
|
|
|
|
with open("/sys/class/drm/card0/device/device", "r") as f:
|
|
stats["uuid"] = f.read().split("\n")[0]
|
|
|
|
result = subprocess.run(['lshw', '-c', 'display', '-json'], stdout=subprocess.PIPE)
|
|
result = result.stdout.decode('utf-8')
|
|
|
|
a = json.loads(result)
|
|
|
|
result = subprocess.Popen(['intel_gpu_top', '-J'], stdout=subprocess.PIPE)
|
|
sleep(0.2)
|
|
result.kill()
|
|
c,d = result.communicate()
|
|
|
|
c = c.decode('utf-8')[3:]
|
|
c = json.loads(c)
|
|
|
|
stats["max_freq"] = _read_file_number("/sys/class/drm/card0/gt_max_freq_mhz") * 10**6
|
|
stats["min_freq"] = _read_file_number("/sys/class/drm/card0/gt_min_freq_mhz") * 10**6
|
|
stats["cur_freq"] = _read_file_number("/sys/class/drm/card0/gt_cur_freq_mhz") * 10**6
|
|
stats["power"] = c["power"]["GPU"]
|
|
stats["engine_3d"] = c["engines"]["Render/3D"]["busy"]
|
|
stats["engine_video"] = c["engines"]["Video"]["busy"]
|
|
stats["usage"] = (stats["engine_3d"] + stats["engine_video"]) / 2
|
|
stats["model"] = a[0]["product"]
|
|
stats["driver"] = a[0]["configuration"]["driver"]
|
|
|
|
return stats
|
|
|
|
def _getAmdGpuMemSize():
|
|
devs = os.listdir("/sys/class/drm/")
|
|
cards = {}
|
|
|
|
for i in devs:
|
|
card = re.findall("card[0-9]",i)
|
|
if card != []:
|
|
try:
|
|
with open("/sys/class/drm/"+card[0]+"/device/mem_info_vram_total", "r") as f:
|
|
mem = f.read()[:-1]
|
|
with open("/sys/class/drm/"+card[0]+"/device/device", "r") as f:
|
|
device = f.read()[:-1]
|
|
cards[device] = mem
|
|
except:
|
|
pass
|
|
|
|
return cards
|
|
|
|
def _getAmdGpuMaxFanspeed():
|
|
result = subprocess.run(['sensors', '-j', '-A'], stdout=subprocess.PIPE)
|
|
result = result.stdout.decode('utf-8')
|
|
|
|
devices = json.loads(result)
|
|
for device in devices:
|
|
if "amdgpu" in device:
|
|
for entry in devices[device]:
|
|
if "fan" in entry:
|
|
for rpm in devices[device][entry]:
|
|
if "max" in rpm:
|
|
return devices[device][entry][rpm]
|
|
return 1
|
|
|
|
def readGpu(vendor="nVidia"):
|
|
data = {}
|
|
data["about"] = {}
|
|
data["gpu"] = {}
|
|
|
|
if vendor == "nVidia":
|
|
whitelist = ["driver_version","cuda_version","product_name","uuid","vbios_version","fan_speed","performance_state"] #amd: valid?,not_valid,valid,valid,valid,valid,not_valid
|
|
|
|
smi = xmltodict.parse(_run_nvidia_smi())
|
|
uuid = smi["nvidia_smi_log"]["gpu"]["uuid"]
|
|
|
|
data["gpu"][uuid] = {}
|
|
data["gpu"][uuid]["throttle"] = {}
|
|
data["gpu"][uuid]["util"] = {}
|
|
|
|
for i in smi["nvidia_smi_log"]:
|
|
if i in whitelist:
|
|
data["about"][i] = smi["nvidia_smi_log"][i]
|
|
|
|
for i in smi["nvidia_smi_log"]["gpu"]:
|
|
if i in whitelist:
|
|
data["gpu"][uuid][i] = smi["nvidia_smi_log"]["gpu"][i]
|
|
|
|
for i in smi["nvidia_smi_log"]["gpu"]["clocks_event_reasons"]:
|
|
active = 1
|
|
if "Not Active" in smi["nvidia_smi_log"]["gpu"]["clocks_event_reasons"][i]:
|
|
active = 0
|
|
data["gpu"][uuid]["throttle"][i.replace("clocks_event_reason_", "")] = active
|
|
|
|
data["gpu"][uuid]["memory_total"] = float(smi["nvidia_smi_log"]["gpu"]["fb_memory_usage"]["total"].split(" ")[0])
|
|
data["gpu"][uuid]["memory_used"] = float(smi["nvidia_smi_log"]["gpu"]["fb_memory_usage"]["used"].split(" ")[0])
|
|
data["gpu"][uuid]["util"] = smi["nvidia_smi_log"]["gpu"]["utilization"]
|
|
data["gpu"][uuid]["temp"] = float(smi["nvidia_smi_log"]["gpu"]["temperature"]["gpu_temp"].split(" ")[0])
|
|
data["gpu"][uuid]["power"] = float(smi["nvidia_smi_log"]["gpu"]["gpu_power_readings"]["instant_power_draw"].split(" ")[0])
|
|
data["gpu"][uuid]["power_limit"] = float(smi["nvidia_smi_log"]["gpu"]["gpu_power_readings"]["current_power_limit"].split(" ")[0])
|
|
data["gpu"][uuid]["gpu_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["graphics_clock"].split(" ")[0])
|
|
data["gpu"][uuid]["mem_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["mem_clock"].split(" ")[0])
|
|
data["gpu"][uuid]["sm_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["sm_clock"].split(" ")[0])
|
|
data["gpu"][uuid]["video_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["video_clock"].split(" ")[0])
|
|
data["gpu"][uuid]["gpu_max_clock"] = float(smi["nvidia_smi_log"]["gpu"]["max_clocks"]["graphics_clock"].split(" ")[0])
|
|
data["gpu"][uuid]["mem_max_clock"] = float(smi["nvidia_smi_log"]["gpu"]["max_clocks"]["mem_clock"].split(" ")[0])
|
|
|
|
data["gpu"][uuid]["fan_speed"] = float(data["gpu"][uuid]["fan_speed"].split(" ")[0])
|
|
for i in data["gpu"][uuid]["util"]:
|
|
data["gpu"][uuid]["util"][i] = float(data["gpu"][uuid]["util"][i].split(" ")[0])
|
|
|
|
elif vendor == "AMD":
|
|
smi = _run_rocm_smi()
|
|
memsize = _getAmdGpuMemSize()
|
|
|
|
for card in smi:
|
|
if card == "system":
|
|
data["about"]["driver_version"] = smi["system"]["Driver version"]
|
|
else:
|
|
data["gpu"][smi[card]["Device ID"]] = {}
|
|
try:
|
|
data["gpu"][smi[card]["Device ID"]]["fan_speed"] = 100 * float(smi[card]["current_fan_speed (rpm)"]) / _getAmdGpuMaxFanspeed()
|
|
except:
|
|
data["gpu"][smi[card]["Device ID"]]["fan_speed"] = 0.0
|
|
data["gpu"][smi[card]["Device ID"]]["vbios_version"] = smi[card]["VBIOS version"]
|
|
data["gpu"][smi[card]["Device ID"]]["product_name"] = smi[card]["Device Name"]
|
|
data["gpu"][smi[card]["Device ID"]]["uuid"] = smi[card]["Unique ID"]
|
|
data["gpu"][smi[card]["Device ID"]]["memory_total"] = int(memsize[smi[card]["Device ID"]])
|
|
data["gpu"][smi[card]["Device ID"]]["memory_used"] = (int(memsize[smi[card]["Device ID"]]) / 100) * float(smi[card]["GPU Memory Allocated (VRAM%)"])
|
|
data["gpu"][smi[card]["Device ID"]]["temp"] = float(smi[card]["Temperature (Sensor edge) (C)"])
|
|
try:
|
|
data["gpu"][smi[card]["Device ID"]]["power"] = float(smi[card]["Average Graphics Package Power (W)"])
|
|
except:
|
|
try:
|
|
data["gpu"][smi[card]["Device ID"]]["power"] = float(smi[card]["Current Socket Graphics Package Power (W)"])
|
|
except:
|
|
data["gpu"][smi[card]["Device ID"]]["power"] = 0.0
|
|
try:
|
|
data["gpu"][smi[card]["Device ID"]]["power_limit"] = float(smi[card]["Max Graphics Package Power (W)"])
|
|
except:
|
|
data["gpu"][smi[card]["Device ID"]]["power_limit"] = 65.0
|
|
data["gpu"][smi[card]["Device ID"]]["gpu_clock"] = float(smi[card]["sclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6
|
|
data["gpu"][smi[card]["Device ID"]]["mem_clock"] = float(smi[card]["mclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6
|
|
try:
|
|
data["gpu"][smi[card]["Device ID"]]["sm_clock"] = float(smi[card]["current_dclk0 (MHz)"]) * 10**6
|
|
data["gpu"][smi[card]["Device ID"]]["video_clock"] = float(smi[card]["current_vclk0 (MHz)"]) * 10**6
|
|
except:
|
|
data["gpu"][smi[card]["Device ID"]]["sm_clock"] = 0.0
|
|
data["gpu"][smi[card]["Device ID"]]["video_clock"] = 0.0
|
|
data["gpu"][smi[card]["Device ID"]]["gpu_max_clock"] = float(smi[card]["Valid sclk range"].replace("Mhz","").split(" - ")[1]) * 10**6
|
|
try:
|
|
data["gpu"][smi[card]["Device ID"]]["mem_max_clock"] = float(smi[card]["Valid mclk range"].replace("Mhz","").split(" - ")[1]) * 10**6
|
|
except:
|
|
data["gpu"][smi[card]["Device ID"]]["mem_max_clock"] = float(smi[card]["mclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6
|
|
|
|
util = {}
|
|
util["gpu_util"] = float(smi[card]["GPU use (%)"])
|
|
util["memory_util"] = float(smi[card]["GPU Memory Allocated (VRAM%)"])
|
|
|
|
data["gpu"][smi[card]["Device ID"]]["throttle"] = {}
|
|
try:
|
|
data["gpu"][smi[card]["Device ID"]]["throttle"]["status"] = float(smi[card]["throttle_status"])
|
|
except:
|
|
data["gpu"][smi[card]["Device ID"]]["throttle"]["status"] = 1
|
|
|
|
data["gpu"][smi[card]["Device ID"]]["util"] = util
|
|
data["gpu"][smi[card]["Device ID"]]["performance_state"] = "N/A"
|
|
|
|
elif vendor == "Intel":
|
|
try:
|
|
intel = _intel()
|
|
except:
|
|
intel = {'uuid': 'Error', 'max_freq': 0.0, 'min_freq': 0.0, 'cur_freq': 0.0, 'power': 0.0, 'engine_3d': 0.0, 'engine_video': 0.0, 'usage': 0.0, 'model': 'Error', 'driver': 'Error'}
|
|
|
|
uuid = intel["uuid"]
|
|
data["gpu"][uuid] = {}
|
|
data["gpu"][uuid]["throttle"] = {}
|
|
data["gpu"][uuid]["util"] = {}
|
|
|
|
data["about"]["driver_version"] = intel["driver"]
|
|
data["gpu"][uuid]["throttle"]["status"] = 0
|
|
util = {}
|
|
util["gpu_util"] = intel["usage"]
|
|
util["memory_util"] = 0
|
|
data["gpu"][uuid]["util"] = util
|
|
data["gpu"][uuid]["fan_speed"] = 0
|
|
data["gpu"][uuid]["vbios_version"] = "0.0.0"
|
|
data["gpu"][uuid]["product_name"] = intel["model"]
|
|
data["gpu"][uuid]["uuid"] = uuid
|
|
data["gpu"][uuid]["memory_total"] = 1
|
|
data["gpu"][uuid]["memory_used"] = 0
|
|
data["gpu"][uuid]["temp"] = 0
|
|
data["gpu"][uuid]["power"] = intel["power"]
|
|
data["gpu"][uuid]["power_limit"] = 15
|
|
data["gpu"][uuid]["gpu_clock"] = intel["cur_freq"]
|
|
data["gpu"][uuid]["mem_clock"] = 0
|
|
data["gpu"][uuid]["sm_clock"] = 0
|
|
data["gpu"][uuid]["video_clock"] = 0
|
|
data["gpu"][uuid]["mem_max_clock"] = 1
|
|
data["gpu"][uuid]["gpu_max_clock"] = intel["max_freq"]
|
|
data["gpu"][uuid]["performance_state"] = "N/A"
|
|
|
|
else:
|
|
uuid = "unsupported"
|
|
data["gpu"][uuid] = {}
|
|
data["gpu"][uuid]["throttle"] = {}
|
|
data["gpu"][uuid]["util"] = {}
|
|
|
|
data["about"]["driver_version"] = "Unknown"
|
|
data["gpu"][uuid]["throttle"]["status"] = 0
|
|
util = {}
|
|
util["gpu_util"] = 0
|
|
util["memory_util"] = 0
|
|
data["gpu"][uuid]["util"] = util
|
|
data["gpu"][uuid]["fan_speed"] = 0
|
|
data["gpu"][uuid]["vbios_version"] = "0.0.0"
|
|
data["gpu"][uuid]["product_name"] = "Unknown"
|
|
data["gpu"][uuid]["uuid"] = uuid
|
|
data["gpu"][uuid]["memory_total"] = 1
|
|
data["gpu"][uuid]["memory_used"] = 0
|
|
data["gpu"][uuid]["temp"] = 0
|
|
data["gpu"][uuid]["power"] = 0
|
|
data["gpu"][uuid]["power_limit"] = 1
|
|
data["gpu"][uuid]["gpu_clock"] = 0
|
|
data["gpu"][uuid]["mem_clock"] = 0
|
|
data["gpu"][uuid]["sm_clock"] = 0
|
|
data["gpu"][uuid]["video_clock"] = 0
|
|
data["gpu"][uuid]["mem_max_clock"] = 1
|
|
data["gpu"][uuid]["gpu_max_clock"] = 1
|
|
data["gpu"][uuid]["performance_state"] = "N/A"
|
|
|
|
return data
|
|
|
|
if __name__ == "__main__":
|
|
#print(json.dumps(_run_rocm_smi(), indent=1))
|
|
#print(json.dumps(_getAmdGpuMemSize(), indent=1))
|
|
#print(json.dumps(readGpu(vendor="AMD"), indent=1))
|
|
_getAmdGpuMaxFanspeed()
|