import xmltodict import json import subprocess import os import re def _run_nvidia_smi(): result = subprocess.run(['nvidia-smi', '-q', '-x'], stdout=subprocess.PIPE) result = result.stdout.decode('utf-8') return result def _run_rocm_smi(): try: result = subprocess.run(['rocm-smi', '-a', '--json'], stdout=subprocess.PIPE) result = result.stdout.decode('utf-8') except: result = subprocess.run(['/opt/rocm/bin/rocm-smi', '-a', '--json'], stdout=subprocess.PIPE) result = result.stdout.decode('utf-8') result = json.loads(result) return result def _read_file_number(path): with open(path, "r") as f: number = f.read().split("\n")[0] return float(number) def _intel(): from time import sleep stats = {} with open("/sys/class/drm/card0/device/device", "r") as f: stats["uuid"] = f.read().split("\n")[0] result = subprocess.run(['lshw', '-c', 'display', '-json'], stdout=subprocess.PIPE) result = result.stdout.decode('utf-8') a = json.loads(result) result = subprocess.Popen(['intel_gpu_top', '-J'], stdout=subprocess.PIPE) sleep(0.2) result.kill() c,d = result.communicate() c = c.decode('utf-8')[3:] c = json.loads(c) stats["max_freq"] = _read_file_number("/sys/class/drm/card0/gt_max_freq_mhz") * 10**6 stats["min_freq"] = _read_file_number("/sys/class/drm/card0/gt_min_freq_mhz") * 10**6 stats["cur_freq"] = _read_file_number("/sys/class/drm/card0/gt_cur_freq_mhz") * 10**6 stats["power"] = c["power"]["GPU"] stats["engine_3d"] = c["engines"]["Render/3D"]["busy"] stats["engine_video"] = c["engines"]["Video"]["busy"] stats["usage"] = (stats["engine_3d"] + stats["engine_video"]) / 2 stats["model"] = a[0]["product"] stats["driver"] = a[0]["configuration"]["driver"] return stats def _getAmdGpuMemSize(): devs = os.listdir("/sys/class/drm/") cards = {} for i in devs: card = re.findall("card[0-9]",i) if card != []: try: with open("/sys/class/drm/"+card[0]+"/device/mem_info_vram_total", "r") as f: mem = f.read()[:-1] with open("/sys/class/drm/"+card[0]+"/device/device", "r") as f: device = f.read()[:-1] cards[device] = mem except: pass return cards def _getAmdGpuMaxFanspeed(): result = subprocess.run(['sensors', '-j', '-A'], stdout=subprocess.PIPE) result = result.stdout.decode('utf-8') devices = json.loads(result) for device in devices: if "amdgpu" in device: for entry in devices[device]: if "fan" in entry: for rpm in devices[device][entry]: if "max" in rpm: return devices[device][entry][rpm] return 1 def readGpu(vendor="nVidia"): data = {} data["about"] = {} data["gpu"] = {} if vendor == "nVidia": whitelist = ["driver_version","cuda_version","product_name","uuid","vbios_version","fan_speed","performance_state"] #amd: valid?,not_valid,valid,valid,valid,valid,not_valid smi = xmltodict.parse(_run_nvidia_smi()) uuid = smi["nvidia_smi_log"]["gpu"]["uuid"] data["gpu"][uuid] = {} data["gpu"][uuid]["throttle"] = {} data["gpu"][uuid]["util"] = {} for i in smi["nvidia_smi_log"]: if i in whitelist: data["about"][i] = smi["nvidia_smi_log"][i] for i in smi["nvidia_smi_log"]["gpu"]: if i in whitelist: data["gpu"][uuid][i] = smi["nvidia_smi_log"]["gpu"][i] for i in smi["nvidia_smi_log"]["gpu"]["clocks_event_reasons"]: active = 1 if "Not Active" in smi["nvidia_smi_log"]["gpu"]["clocks_event_reasons"][i]: active = 0 data["gpu"][uuid]["throttle"][i.replace("clocks_event_reason_", "")] = active data["gpu"][uuid]["memory_total"] = float(smi["nvidia_smi_log"]["gpu"]["fb_memory_usage"]["total"].split(" ")[0]) data["gpu"][uuid]["memory_used"] = float(smi["nvidia_smi_log"]["gpu"]["fb_memory_usage"]["used"].split(" ")[0]) data["gpu"][uuid]["util"] = smi["nvidia_smi_log"]["gpu"]["utilization"] data["gpu"][uuid]["temp"] = float(smi["nvidia_smi_log"]["gpu"]["temperature"]["gpu_temp"].split(" ")[0]) data["gpu"][uuid]["power"] = float(smi["nvidia_smi_log"]["gpu"]["gpu_power_readings"]["instant_power_draw"].split(" ")[0]) data["gpu"][uuid]["power_limit"] = float(smi["nvidia_smi_log"]["gpu"]["gpu_power_readings"]["current_power_limit"].split(" ")[0]) data["gpu"][uuid]["gpu_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["graphics_clock"].split(" ")[0]) data["gpu"][uuid]["mem_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["mem_clock"].split(" ")[0]) data["gpu"][uuid]["sm_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["sm_clock"].split(" ")[0]) data["gpu"][uuid]["video_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["video_clock"].split(" ")[0]) data["gpu"][uuid]["gpu_max_clock"] = float(smi["nvidia_smi_log"]["gpu"]["max_clocks"]["graphics_clock"].split(" ")[0]) data["gpu"][uuid]["mem_max_clock"] = float(smi["nvidia_smi_log"]["gpu"]["max_clocks"]["mem_clock"].split(" ")[0]) data["gpu"][uuid]["fan_speed"] = float(data["gpu"][uuid]["fan_speed"].split(" ")[0]) for i in data["gpu"][uuid]["util"]: data["gpu"][uuid]["util"][i] = float(data["gpu"][uuid]["util"][i].split(" ")[0]) elif vendor == "AMD": smi = _run_rocm_smi() memsize = _getAmdGpuMemSize() for card in smi: if card == "system": data["about"]["driver_version"] = smi["system"]["Driver version"] else: data["gpu"][smi[card]["Device ID"]] = {} try: data["gpu"][smi[card]["Device ID"]]["fan_speed"] = 100 * float(smi[card]["current_fan_speed (rpm)"]) / _getAmdGpuMaxFanspeed() except: data["gpu"][smi[card]["Device ID"]]["fan_speed"] = 0.0 data["gpu"][smi[card]["Device ID"]]["vbios_version"] = smi[card]["VBIOS version"] data["gpu"][smi[card]["Device ID"]]["product_name"] = smi[card]["Device Name"] data["gpu"][smi[card]["Device ID"]]["uuid"] = smi[card]["Unique ID"] data["gpu"][smi[card]["Device ID"]]["memory_total"] = int(memsize[smi[card]["Device ID"]]) data["gpu"][smi[card]["Device ID"]]["memory_used"] = (int(memsize[smi[card]["Device ID"]]) / 100) * float(smi[card]["GPU Memory Allocated (VRAM%)"]) data["gpu"][smi[card]["Device ID"]]["temp"] = float(smi[card]["Temperature (Sensor edge) (C)"]) try: data["gpu"][smi[card]["Device ID"]]["power"] = float(smi[card]["Average Graphics Package Power (W)"]) except: try: data["gpu"][smi[card]["Device ID"]]["power"] = float(smi[card]["Current Socket Graphics Package Power (W)"]) except: data["gpu"][smi[card]["Device ID"]]["power"] = 0.0 try: data["gpu"][smi[card]["Device ID"]]["power_limit"] = float(smi[card]["Max Graphics Package Power (W)"]) except: data["gpu"][smi[card]["Device ID"]]["power_limit"] = 65.0 data["gpu"][smi[card]["Device ID"]]["gpu_clock"] = float(smi[card]["sclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6 data["gpu"][smi[card]["Device ID"]]["mem_clock"] = float(smi[card]["mclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6 try: data["gpu"][smi[card]["Device ID"]]["sm_clock"] = float(smi[card]["current_dclk0 (MHz)"]) * 10**6 data["gpu"][smi[card]["Device ID"]]["video_clock"] = float(smi[card]["current_vclk0 (MHz)"]) * 10**6 except: data["gpu"][smi[card]["Device ID"]]["sm_clock"] = 0.0 data["gpu"][smi[card]["Device ID"]]["video_clock"] = 0.0 data["gpu"][smi[card]["Device ID"]]["gpu_max_clock"] = float(smi[card]["Valid sclk range"].replace("Mhz","").split(" - ")[1]) * 10**6 try: data["gpu"][smi[card]["Device ID"]]["mem_max_clock"] = float(smi[card]["Valid mclk range"].replace("Mhz","").split(" - ")[1]) * 10**6 except: data["gpu"][smi[card]["Device ID"]]["mem_max_clock"] = float(smi[card]["mclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6 util = {} util["gpu_util"] = float(smi[card]["GPU use (%)"]) util["memory_util"] = float(smi[card]["GPU Memory Allocated (VRAM%)"]) data["gpu"][smi[card]["Device ID"]]["throttle"] = {} try: data["gpu"][smi[card]["Device ID"]]["throttle"]["status"] = float(smi[card]["throttle_status"]) except: data["gpu"][smi[card]["Device ID"]]["throttle"]["status"] = 1 data["gpu"][smi[card]["Device ID"]]["util"] = util data["gpu"][smi[card]["Device ID"]]["performance_state"] = "N/A" elif vendor == "Intel": try: intel = _intel() except: intel = {'uuid': 'Error', 'max_freq': 0.0, 'min_freq': 0.0, 'cur_freq': 0.0, 'power': 0.0, 'engine_3d': 0.0, 'engine_video': 0.0, 'usage': 0.0, 'model': 'Error', 'driver': 'Error'} uuid = intel["uuid"] data["gpu"][uuid] = {} data["gpu"][uuid]["throttle"] = {} data["gpu"][uuid]["util"] = {} data["about"]["driver_version"] = intel["driver"] data["gpu"][uuid]["throttle"]["status"] = 0 util = {} util["gpu_util"] = intel["usage"] util["memory_util"] = 0 data["gpu"][uuid]["util"] = util data["gpu"][uuid]["fan_speed"] = 0 data["gpu"][uuid]["vbios_version"] = "0.0.0" data["gpu"][uuid]["product_name"] = intel["model"] data["gpu"][uuid]["uuid"] = uuid data["gpu"][uuid]["memory_total"] = 1 data["gpu"][uuid]["memory_used"] = 0 data["gpu"][uuid]["temp"] = 0 data["gpu"][uuid]["power"] = intel["power"] data["gpu"][uuid]["power_limit"] = 15 data["gpu"][uuid]["gpu_clock"] = intel["cur_freq"] data["gpu"][uuid]["mem_clock"] = 0 data["gpu"][uuid]["sm_clock"] = 0 data["gpu"][uuid]["video_clock"] = 0 data["gpu"][uuid]["mem_max_clock"] = 1 data["gpu"][uuid]["gpu_max_clock"] = intel["max_freq"] data["gpu"][uuid]["performance_state"] = "N/A" else: uuid = "unsupported" data["gpu"][uuid] = {} data["gpu"][uuid]["throttle"] = {} data["gpu"][uuid]["util"] = {} data["about"]["driver_version"] = "Unknown" data["gpu"][uuid]["throttle"]["status"] = 0 util = {} util["gpu_util"] = 0 util["memory_util"] = 0 data["gpu"][uuid]["util"] = util data["gpu"][uuid]["fan_speed"] = 0 data["gpu"][uuid]["vbios_version"] = "0.0.0" data["gpu"][uuid]["product_name"] = "Unknown" data["gpu"][uuid]["uuid"] = uuid data["gpu"][uuid]["memory_total"] = 1 data["gpu"][uuid]["memory_used"] = 0 data["gpu"][uuid]["temp"] = 0 data["gpu"][uuid]["power"] = 0 data["gpu"][uuid]["power_limit"] = 1 data["gpu"][uuid]["gpu_clock"] = 0 data["gpu"][uuid]["mem_clock"] = 0 data["gpu"][uuid]["sm_clock"] = 0 data["gpu"][uuid]["video_clock"] = 0 data["gpu"][uuid]["mem_max_clock"] = 1 data["gpu"][uuid]["gpu_max_clock"] = 1 data["gpu"][uuid]["performance_state"] = "N/A" return data if __name__ == "__main__": #print(json.dumps(_run_rocm_smi(), indent=1)) #print(json.dumps(_getAmdGpuMemSize(), indent=1)) #print(json.dumps(readGpu(vendor="AMD"), indent=1)) _getAmdGpuMaxFanspeed()