initial commit
This commit is contained in:
61
RAS.py
Normal file
61
RAS.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import subprocess
|
||||
|
||||
def convertToDict(RAS_dump):
|
||||
ras_status = {}
|
||||
current_driver = ""
|
||||
lines = RAS_dump.split('\n')
|
||||
|
||||
for line in lines:
|
||||
if line == '':
|
||||
continue
|
||||
if '\t' in line:
|
||||
try:
|
||||
item = line.split(': ')
|
||||
ras_status[current_driver][item[1]] = {}
|
||||
if "Corrected" in line:
|
||||
ras_status[current_driver][item[1]]["corrected"] = int(line.replace('\t', '').split(' ')[0])
|
||||
ras_status[current_driver]["total_errors"] += int(line.replace('\t', '').split(' ')[0])
|
||||
else:
|
||||
ras_status[current_driver][item[1]]["uncorrected"] = int(line.replace('\t', '').split(' ')[0])
|
||||
ras_status[current_driver]["total_errors"] += int(line.replace('\t', '').split(' ')[0])
|
||||
except:
|
||||
item = line.replace('\t', '').split(' ')
|
||||
ras_status[current_driver][item[0]] = {}
|
||||
ras_status[current_driver][item[0]]["uncorrected"] = int(line.replace('\t', '').split(' ')[2])
|
||||
ras_status[current_driver]["total_errors"] += int(line.replace('\t', '').split(' ')[2])
|
||||
else:
|
||||
current_driver = ""
|
||||
if "No" in line:
|
||||
words = line.split(' ')
|
||||
for word in words:
|
||||
if word == "No":
|
||||
continue
|
||||
if "errors" in word:
|
||||
current_driver = current_driver[:-1]
|
||||
break
|
||||
current_driver += word+" "
|
||||
else:
|
||||
words = line.split(' ')
|
||||
for word in words:
|
||||
if word == "events":
|
||||
current_driver = current_driver[:-1]
|
||||
break
|
||||
if "errors" in word:
|
||||
current_driver = current_driver[:-1]
|
||||
break
|
||||
current_driver += word+" "
|
||||
ras_status[current_driver] = {}
|
||||
ras_status[current_driver]["total_errors"] = 0
|
||||
|
||||
return ras_status
|
||||
|
||||
def readRAS():
|
||||
result = subprocess.run(['ras-mc-ctl', '--summary'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
return convertToDict(result)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(readRAS())
|
||||
10
README.md
10
README.md
@@ -1,3 +1,9 @@
|
||||
# monitoring
|
||||
# Hardware monitoring suite
|
||||
|
||||
Monitoring suite for system and other stuff
|
||||
Set of python scripts for asserting health and stats of a linux system
|
||||
|
||||
# Configuration
|
||||
|
||||
See config-example.json
|
||||
|
||||
test
|
||||
|
||||
65
SMART.py
Normal file
65
SMART.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
def getDevices():
|
||||
result = subprocess.run(['smartctl', '--scan-open', '-j'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
devices = json.loads(result)
|
||||
|
||||
return devices
|
||||
|
||||
def getAttributes(device):
|
||||
attributes = {}
|
||||
attributes["data"] = {}
|
||||
|
||||
result = subprocess.run(['smartctl', device["name"], '-a', '-j'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
data = json.loads(result)
|
||||
|
||||
if "NVMe" == device["protocol"]:
|
||||
attributes["sector_size"] = data["nvme_namespaces"][0]["formatted_lba_size"]
|
||||
attributes["serial_number"] = data["serial_number"]
|
||||
attributes["type"] = "NVME"
|
||||
for attribute in data["nvme_smart_health_information_log"]:
|
||||
attributes["data"][attribute] = data["nvme_smart_health_information_log"][attribute]
|
||||
attributes["bytes_written"] = attributes["data"]["data_units_written"] * attributes["sector_size"] * 1000
|
||||
elif "ATA" == device["protocol"]:
|
||||
attributes["sector_size"] = data["physical_block_size"]
|
||||
attributes["serial_number"] = data["serial_number"]
|
||||
attributes["type"] = "ATA"
|
||||
for attribute in data["ata_smart_attributes"]["table"]:
|
||||
attributes["data"][attribute["name"]] = {}
|
||||
try:
|
||||
attributes["data"][attribute["name"]]["raw"] = int(attribute["raw"]["string"].split(' ')[0])
|
||||
except:
|
||||
attributes["data"][attribute["name"]]["raw"] = -1
|
||||
attributes["data"][attribute["name"]]["id"] = int(attribute["id"])
|
||||
attributes["data"][attribute["name"]]["value"] = int(attribute["value"])
|
||||
attributes["data"][attribute["name"]]["worst"] = int(attribute["worst"])
|
||||
attributes["data"][attribute["name"]]["thr"] = int(attribute["thresh"])
|
||||
try:
|
||||
attributes["bytes_written"] = attributes["data"]["Total_LBAs_Written"]["raw"] * attributes["sector_size"]
|
||||
except:
|
||||
attributes["bytes_written"] = -1
|
||||
else:
|
||||
pass
|
||||
|
||||
return attributes
|
||||
|
||||
def getAllDeviceAttributes():
|
||||
devices = getDevices()
|
||||
attributes = {}
|
||||
|
||||
for device in devices["devices"]:
|
||||
attributes[device["name"]] = getAttributes(device)
|
||||
|
||||
return attributes
|
||||
|
||||
if __name__ == "__main__":
|
||||
smart = getAllDeviceAttributes()
|
||||
|
||||
print(smart)
|
||||
|
||||
for device in smart:
|
||||
print(smart[device]["serial_number"])
|
||||
11
config-example.json
Normal file
11
config-example.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"modules": ["sysinfo","docker","procMon"],
|
||||
"volumes": {
|
||||
"zfs": [],
|
||||
"non_zfs": ["sda"]
|
||||
},
|
||||
"network": {
|
||||
"nics": ["ens18"]
|
||||
},
|
||||
"gpu": "nVidia"
|
||||
}
|
||||
37
cpuinfo.py
Normal file
37
cpuinfo.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
def getFrequency():
|
||||
result = subprocess.run(['cat', '/proc/cpuinfo'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8').split("\n")
|
||||
|
||||
freq = {}
|
||||
|
||||
index = 0
|
||||
for line in result:
|
||||
if "cpu MHz" in line:
|
||||
line = line.split(":")
|
||||
freq[str(index)] = float(line[-1])
|
||||
index += 1
|
||||
return freq
|
||||
|
||||
def getCpuInfo():
|
||||
result = subprocess.run(['lscpu', '-J'], stdout=subprocess.PIPE)
|
||||
result = json.loads(result.stdout.decode('utf-8'))
|
||||
|
||||
cpuinfo = {}
|
||||
|
||||
for item in result["lscpu"]:
|
||||
if 'Vendor ID' in item['field']:
|
||||
cpuinfo["vendor"] = item["data"]
|
||||
if 'Model name' in item['field']:
|
||||
cpuinfo["model"] = item["data"]
|
||||
if item['field'] == 'CPU(s):':
|
||||
cpuinfo["cpus"] = item["data"]
|
||||
cpuinfo["frequency"] = getFrequency()
|
||||
|
||||
return cpuinfo
|
||||
|
||||
if __name__ == "__main__":
|
||||
# print(getFrequency())
|
||||
print(getCpuInfo())
|
||||
138
docker.py
Normal file
138
docker.py
Normal file
@@ -0,0 +1,138 @@
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
def health():
|
||||
result = subprocess.run(['docker', 'ps', '-a', '--format', 'json', '--no-trunc'], stdout=subprocess.PIPE)
|
||||
result = str(result.stdout)[:-1].replace("b'", "").replace('\\"', '').replace('\\', '').replace("u003e", ">").split('{"Command"')
|
||||
|
||||
ret_dict = {}
|
||||
|
||||
for container in result:
|
||||
if container != "":
|
||||
j = '{"Command"'+container[:-1]
|
||||
data = json.loads(j)
|
||||
ret_dict[data["Names"]] = {}
|
||||
if data["State"] == "running":
|
||||
ret_dict[data["Names"]]["status"] = 1
|
||||
else:
|
||||
ret_dict[data["Names"]]["status"] = 0
|
||||
if "unhealthy" in data["Status"]:
|
||||
ret_dict[data["Names"]]["health"] = 3
|
||||
elif "Restarting" in data["Status"]:
|
||||
ret_dict[data["Names"]]["health"] = 2
|
||||
elif "healthy" in data["Status"]:
|
||||
ret_dict[data["Names"]]["health"] = 0
|
||||
elif "starting" in data["Status"]:
|
||||
ret_dict[data["Names"]]["health"] = 1
|
||||
else:
|
||||
ret_dict[data["Names"]]["health"] = -1
|
||||
|
||||
|
||||
return ret_dict
|
||||
|
||||
def _sizeConv(value):
|
||||
number = ""
|
||||
for d in value:
|
||||
if d.isdigit():
|
||||
number += d
|
||||
elif d == '.':
|
||||
number += d
|
||||
else:
|
||||
continue
|
||||
number = float(number)
|
||||
if "TB" in value:
|
||||
return number * 10**12
|
||||
elif "GB" in value:
|
||||
return number * 10**9
|
||||
elif "MB" in value:
|
||||
return number * 10**6
|
||||
elif "KB" in value:
|
||||
return number * 10**3
|
||||
else:
|
||||
return number
|
||||
|
||||
def getSize():
|
||||
result = subprocess.run(['docker', 'system', 'df', '--format', 'json'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1].split('\n')
|
||||
|
||||
size = {}
|
||||
|
||||
for line in result:
|
||||
a = json.loads(line)
|
||||
size[a["Type"]] = {}
|
||||
size[a["Type"]]["used"] = _sizeConv(a["Size"])
|
||||
size[a["Type"]]["reclaimable"] = _sizeConv(a["Reclaimable"].split(" ")[0])
|
||||
size[a["Type"]]["count_total"] = a["TotalCount"]
|
||||
size[a["Type"]]["count_active"] = a["Active"]
|
||||
|
||||
return size
|
||||
|
||||
def getInfo():
|
||||
docker = {}
|
||||
|
||||
result = subprocess.run(['docker', 'system', 'info', '--format', 'json'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1]
|
||||
info = json.loads(result)
|
||||
|
||||
docker["version"] = info["ServerVersion"]
|
||||
|
||||
result = subprocess.run(['docker', 'system', 'df', '-v', '--format', 'json'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
info = json.loads(result)
|
||||
|
||||
docker["images"] = {}
|
||||
docker["containers"] = {}
|
||||
docker["volumes"] = {}
|
||||
docker["buildcache"] = {}
|
||||
|
||||
for image in info["Images"]:
|
||||
docker["images"][image["ID"]] = {}
|
||||
docker["images"][image["ID"]]["containers"] = image["Containers"]
|
||||
docker["images"][image["ID"]]["created"] = image["CreatedSince"]
|
||||
docker["images"][image["ID"]]["repository"] = image["Repository"]
|
||||
docker["images"][image["ID"]]["size"] = _sizeConv(image["Size"])
|
||||
docker["images"][image["ID"]]["unique_size"] = image["UniqueSize"]
|
||||
|
||||
for container in info["Containers"]:
|
||||
docker["containers"][container["ID"]] = {}
|
||||
docker["containers"][container["ID"]]["created"] = container["CreatedAt"]
|
||||
docker["containers"][container["ID"]]["image"] = container["Image"]
|
||||
docker["containers"][container["ID"]]["volumes"] = container["LocalVolumes"]
|
||||
docker["containers"][container["ID"]]["mounts"] = container["Mounts"]
|
||||
try:
|
||||
docker["containers"][container["ID"]]["name"] = container["Names"]
|
||||
except:
|
||||
docker["containers"][container["ID"]]["name"] = ""
|
||||
try:
|
||||
docker["containers"][container["ID"]]["networks"] = container["Networks"]
|
||||
except:
|
||||
docker["containers"][container["ID"]]["networks"] = ""
|
||||
docker["containers"][container["ID"]]["runtime"] = container["RunningFor"]
|
||||
docker["containers"][container["ID"]]["state"] = container["State"]
|
||||
docker["containers"][container["ID"]]["size"] = _sizeConv(container["Size"])
|
||||
docker["containers"][container["ID"]]["status"] = container["Status"]
|
||||
|
||||
for volume in info["Volumes"]:
|
||||
docker["volumes"][volume["Name"]] = {}
|
||||
docker["volumes"][volume["Name"]]["driver"] = volume["Driver"]
|
||||
docker["volumes"][volume["Name"]]["links"] = volume["Links"]
|
||||
docker["volumes"][volume["Name"]]["mountpoint"] = volume["Mountpoint"]
|
||||
docker["volumes"][volume["Name"]]["size"] = _sizeConv(volume["Size"])
|
||||
|
||||
for build in info["BuildCache"]:
|
||||
docker["buildcache"][build["ID"]] = {}
|
||||
docker["buildcache"][build["ID"]]["type"] = build["CacheType"]
|
||||
docker["buildcache"][build["ID"]]["created"] = build["CreatedSince"]
|
||||
docker["buildcache"][build["ID"]]["in_use"] = build["InUse"]
|
||||
docker["buildcache"][build["ID"]]["last_use"] = build["LastUsedSince"]
|
||||
docker["buildcache"][build["ID"]]["shared"] = build["Shared"]
|
||||
docker["buildcache"][build["ID"]]["size"] = _sizeConv(build["Size"])
|
||||
docker["buildcache"][build["ID"]]["use_count"] = build["UsageCount"]
|
||||
|
||||
return docker
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(json.dumps(getSize()))
|
||||
print(json.dumps(getInfo()))
|
||||
|
||||
#print(health())
|
||||
266
gpu.py
Normal file
266
gpu.py
Normal file
@@ -0,0 +1,266 @@
|
||||
import xmltodict
|
||||
import json
|
||||
import subprocess
|
||||
import os
|
||||
import re
|
||||
|
||||
def _run_nvidia_smi():
|
||||
result = subprocess.run(['nvidia-smi', '-q', '-x'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
return result
|
||||
|
||||
def _run_rocm_smi():
|
||||
try:
|
||||
result = subprocess.run(['rocm-smi', '-a', '--json'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
except:
|
||||
result = subprocess.run(['/opt/rocm/bin/rocm-smi', '-a', '--json'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
result = json.loads(result)
|
||||
|
||||
return result
|
||||
|
||||
def _read_file_number(path):
|
||||
with open(path, "r") as f:
|
||||
number = f.read().split("\n")[0]
|
||||
return float(number)
|
||||
|
||||
def _intel():
|
||||
from time import sleep
|
||||
stats = {}
|
||||
|
||||
with open("/sys/class/drm/card0/device/device", "r") as f:
|
||||
stats["uuid"] = f.read().split("\n")[0]
|
||||
|
||||
result = subprocess.run(['lshw', '-c', 'display', '-json'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
a = json.loads(result)
|
||||
|
||||
result = subprocess.Popen(['intel_gpu_top', '-J'], stdout=subprocess.PIPE)
|
||||
sleep(0.2)
|
||||
result.kill()
|
||||
c,d = result.communicate()
|
||||
|
||||
c = c.decode('utf-8')[3:]
|
||||
c = json.loads(c)
|
||||
|
||||
stats["max_freq"] = _read_file_number("/sys/class/drm/card0/gt_max_freq_mhz") * 10**6
|
||||
stats["min_freq"] = _read_file_number("/sys/class/drm/card0/gt_min_freq_mhz") * 10**6
|
||||
stats["cur_freq"] = _read_file_number("/sys/class/drm/card0/gt_cur_freq_mhz") * 10**6
|
||||
stats["power"] = c["power"]["GPU"]
|
||||
stats["engine_3d"] = c["engines"]["Render/3D"]["busy"]
|
||||
stats["engine_video"] = c["engines"]["Video"]["busy"]
|
||||
stats["usage"] = (stats["engine_3d"] + stats["engine_video"]) / 2
|
||||
stats["model"] = a[0]["product"]
|
||||
stats["driver"] = a[0]["configuration"]["driver"]
|
||||
|
||||
return stats
|
||||
|
||||
def _getAmdGpuMemSize():
|
||||
devs = os.listdir("/sys/class/drm/")
|
||||
cards = {}
|
||||
|
||||
for i in devs:
|
||||
card = re.findall("card[0-9]",i)
|
||||
if card != []:
|
||||
try:
|
||||
with open("/sys/class/drm/"+card[0]+"/device/mem_info_vram_total", "r") as f:
|
||||
mem = f.read()[:-1]
|
||||
with open("/sys/class/drm/"+card[0]+"/device/device", "r") as f:
|
||||
device = f.read()[:-1]
|
||||
cards[device] = mem
|
||||
except:
|
||||
pass
|
||||
|
||||
return cards
|
||||
|
||||
def _getAmdGpuMaxFanspeed():
|
||||
result = subprocess.run(['sensors', '-j', '-A'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
devices = json.loads(result)
|
||||
for device in devices:
|
||||
if "amdgpu" in device:
|
||||
for entry in devices[device]:
|
||||
if "fan" in entry:
|
||||
for rpm in devices[device][entry]:
|
||||
if "max" in rpm:
|
||||
return devices[device][entry][rpm]
|
||||
return 1
|
||||
|
||||
def readGpu(vendor="nVidia"):
|
||||
data = {}
|
||||
data["about"] = {}
|
||||
data["gpu"] = {}
|
||||
|
||||
if vendor == "nVidia":
|
||||
whitelist = ["driver_version","cuda_version","product_name","uuid","vbios_version","fan_speed","performance_state"] #amd: valid?,not_valid,valid,valid,valid,valid,not_valid
|
||||
|
||||
smi = xmltodict.parse(_run_nvidia_smi())
|
||||
uuid = smi["nvidia_smi_log"]["gpu"]["uuid"]
|
||||
|
||||
data["gpu"][uuid] = {}
|
||||
data["gpu"][uuid]["throttle"] = {}
|
||||
data["gpu"][uuid]["util"] = {}
|
||||
|
||||
for i in smi["nvidia_smi_log"]:
|
||||
if i in whitelist:
|
||||
data["about"][i] = smi["nvidia_smi_log"][i]
|
||||
|
||||
for i in smi["nvidia_smi_log"]["gpu"]:
|
||||
if i in whitelist:
|
||||
data["gpu"][uuid][i] = smi["nvidia_smi_log"]["gpu"][i]
|
||||
|
||||
for i in smi["nvidia_smi_log"]["gpu"]["clocks_event_reasons"]:
|
||||
active = 1
|
||||
if "Not Active" in smi["nvidia_smi_log"]["gpu"]["clocks_event_reasons"][i]:
|
||||
active = 0
|
||||
data["gpu"][uuid]["throttle"][i.replace("clocks_event_reason_", "")] = active
|
||||
|
||||
data["gpu"][uuid]["memory_total"] = float(smi["nvidia_smi_log"]["gpu"]["fb_memory_usage"]["total"].split(" ")[0])
|
||||
data["gpu"][uuid]["memory_used"] = float(smi["nvidia_smi_log"]["gpu"]["fb_memory_usage"]["used"].split(" ")[0])
|
||||
data["gpu"][uuid]["util"] = smi["nvidia_smi_log"]["gpu"]["utilization"]
|
||||
data["gpu"][uuid]["temp"] = float(smi["nvidia_smi_log"]["gpu"]["temperature"]["gpu_temp"].split(" ")[0])
|
||||
data["gpu"][uuid]["power"] = float(smi["nvidia_smi_log"]["gpu"]["gpu_power_readings"]["instant_power_draw"].split(" ")[0])
|
||||
data["gpu"][uuid]["power_limit"] = float(smi["nvidia_smi_log"]["gpu"]["gpu_power_readings"]["current_power_limit"].split(" ")[0])
|
||||
data["gpu"][uuid]["gpu_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["graphics_clock"].split(" ")[0])
|
||||
data["gpu"][uuid]["mem_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["mem_clock"].split(" ")[0])
|
||||
data["gpu"][uuid]["sm_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["sm_clock"].split(" ")[0])
|
||||
data["gpu"][uuid]["video_clock"] = float(smi["nvidia_smi_log"]["gpu"]["clocks"]["video_clock"].split(" ")[0])
|
||||
data["gpu"][uuid]["gpu_max_clock"] = float(smi["nvidia_smi_log"]["gpu"]["max_clocks"]["graphics_clock"].split(" ")[0])
|
||||
data["gpu"][uuid]["mem_max_clock"] = float(smi["nvidia_smi_log"]["gpu"]["max_clocks"]["mem_clock"].split(" ")[0])
|
||||
|
||||
data["gpu"][uuid]["fan_speed"] = float(data["gpu"][uuid]["fan_speed"].split(" ")[0])
|
||||
for i in data["gpu"][uuid]["util"]:
|
||||
data["gpu"][uuid]["util"][i] = float(data["gpu"][uuid]["util"][i].split(" ")[0])
|
||||
|
||||
elif vendor == "AMD":
|
||||
smi = _run_rocm_smi()
|
||||
memsize = _getAmdGpuMemSize()
|
||||
|
||||
for card in smi:
|
||||
if card == "system":
|
||||
data["about"]["driver_version"] = smi["system"]["Driver version"]
|
||||
else:
|
||||
data["gpu"][smi[card]["Device ID"]] = {}
|
||||
try:
|
||||
data["gpu"][smi[card]["Device ID"]]["fan_speed"] = 100 * float(smi[card]["current_fan_speed (rpm)"]) / _getAmdGpuMaxFanspeed()
|
||||
except:
|
||||
data["gpu"][smi[card]["Device ID"]]["fan_speed"] = 0.0
|
||||
data["gpu"][smi[card]["Device ID"]]["vbios_version"] = smi[card]["VBIOS version"]
|
||||
data["gpu"][smi[card]["Device ID"]]["product_name"] = smi[card]["Device Name"]
|
||||
data["gpu"][smi[card]["Device ID"]]["uuid"] = smi[card]["Unique ID"]
|
||||
data["gpu"][smi[card]["Device ID"]]["memory_total"] = int(memsize[smi[card]["Device ID"]])
|
||||
data["gpu"][smi[card]["Device ID"]]["memory_used"] = (int(memsize[smi[card]["Device ID"]]) / 100) * float(smi[card]["GPU Memory Allocated (VRAM%)"])
|
||||
data["gpu"][smi[card]["Device ID"]]["temp"] = float(smi[card]["Temperature (Sensor edge) (C)"])
|
||||
try:
|
||||
data["gpu"][smi[card]["Device ID"]]["power"] = float(smi[card]["Average Graphics Package Power (W)"])
|
||||
except:
|
||||
try:
|
||||
data["gpu"][smi[card]["Device ID"]]["power"] = float(smi[card]["Current Socket Graphics Package Power (W)"])
|
||||
except:
|
||||
data["gpu"][smi[card]["Device ID"]]["power"] = 0.0
|
||||
try:
|
||||
data["gpu"][smi[card]["Device ID"]]["power_limit"] = float(smi[card]["Max Graphics Package Power (W)"])
|
||||
except:
|
||||
data["gpu"][smi[card]["Device ID"]]["power_limit"] = 65.0
|
||||
data["gpu"][smi[card]["Device ID"]]["gpu_clock"] = float(smi[card]["sclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6
|
||||
data["gpu"][smi[card]["Device ID"]]["mem_clock"] = float(smi[card]["mclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6
|
||||
try:
|
||||
data["gpu"][smi[card]["Device ID"]]["sm_clock"] = float(smi[card]["current_dclk0 (MHz)"]) * 10**6
|
||||
data["gpu"][smi[card]["Device ID"]]["video_clock"] = float(smi[card]["current_vclk0 (MHz)"]) * 10**6
|
||||
except:
|
||||
data["gpu"][smi[card]["Device ID"]]["sm_clock"] = 0.0
|
||||
data["gpu"][smi[card]["Device ID"]]["video_clock"] = 0.0
|
||||
data["gpu"][smi[card]["Device ID"]]["gpu_max_clock"] = float(smi[card]["Valid sclk range"].replace("Mhz","").split(" - ")[1]) * 10**6
|
||||
try:
|
||||
data["gpu"][smi[card]["Device ID"]]["mem_max_clock"] = float(smi[card]["Valid mclk range"].replace("Mhz","").split(" - ")[1]) * 10**6
|
||||
except:
|
||||
data["gpu"][smi[card]["Device ID"]]["mem_max_clock"] = float(smi[card]["mclk clock speed:"].replace("(","").replace("Mhz)","")) * 10**6
|
||||
|
||||
util = {}
|
||||
util["gpu_util"] = float(smi[card]["GPU use (%)"])
|
||||
util["memory_util"] = float(smi[card]["GPU Memory Allocated (VRAM%)"])
|
||||
|
||||
data["gpu"][smi[card]["Device ID"]]["throttle"] = {}
|
||||
try:
|
||||
data["gpu"][smi[card]["Device ID"]]["throttle"]["status"] = float(smi[card]["throttle_status"])
|
||||
except:
|
||||
data["gpu"][smi[card]["Device ID"]]["throttle"]["status"] = 1
|
||||
|
||||
data["gpu"][smi[card]["Device ID"]]["util"] = util
|
||||
data["gpu"][smi[card]["Device ID"]]["performance_state"] = "N/A"
|
||||
|
||||
elif vendor == "Intel":
|
||||
try:
|
||||
intel = _intel()
|
||||
except:
|
||||
intel = {'uuid': 'Error', 'max_freq': 0.0, 'min_freq': 0.0, 'cur_freq': 0.0, 'power': 0.0, 'engine_3d': 0.0, 'engine_video': 0.0, 'usage': 0.0, 'model': 'Error', 'driver': 'Error'}
|
||||
|
||||
uuid = intel["uuid"]
|
||||
data["gpu"][uuid] = {}
|
||||
data["gpu"][uuid]["throttle"] = {}
|
||||
data["gpu"][uuid]["util"] = {}
|
||||
|
||||
data["about"]["driver_version"] = intel["driver"]
|
||||
data["gpu"][uuid]["throttle"]["status"] = 0
|
||||
util = {}
|
||||
util["gpu_util"] = intel["usage"]
|
||||
util["memory_util"] = 0
|
||||
data["gpu"][uuid]["util"] = util
|
||||
data["gpu"][uuid]["fan_speed"] = 0
|
||||
data["gpu"][uuid]["vbios_version"] = "0.0.0"
|
||||
data["gpu"][uuid]["product_name"] = intel["model"]
|
||||
data["gpu"][uuid]["uuid"] = uuid
|
||||
data["gpu"][uuid]["memory_total"] = 1
|
||||
data["gpu"][uuid]["memory_used"] = 0
|
||||
data["gpu"][uuid]["temp"] = 0
|
||||
data["gpu"][uuid]["power"] = intel["power"]
|
||||
data["gpu"][uuid]["power_limit"] = 15
|
||||
data["gpu"][uuid]["gpu_clock"] = intel["cur_freq"]
|
||||
data["gpu"][uuid]["mem_clock"] = 0
|
||||
data["gpu"][uuid]["sm_clock"] = 0
|
||||
data["gpu"][uuid]["video_clock"] = 0
|
||||
data["gpu"][uuid]["mem_max_clock"] = 1
|
||||
data["gpu"][uuid]["gpu_max_clock"] = intel["max_freq"]
|
||||
data["gpu"][uuid]["performance_state"] = "N/A"
|
||||
|
||||
else:
|
||||
uuid = "unsupported"
|
||||
data["gpu"][uuid] = {}
|
||||
data["gpu"][uuid]["throttle"] = {}
|
||||
data["gpu"][uuid]["util"] = {}
|
||||
|
||||
data["about"]["driver_version"] = "Unknown"
|
||||
data["gpu"][uuid]["throttle"]["status"] = 0
|
||||
util = {}
|
||||
util["gpu_util"] = 0
|
||||
util["memory_util"] = 0
|
||||
data["gpu"][uuid]["util"] = util
|
||||
data["gpu"][uuid]["fan_speed"] = 0
|
||||
data["gpu"][uuid]["vbios_version"] = "0.0.0"
|
||||
data["gpu"][uuid]["product_name"] = "Unknown"
|
||||
data["gpu"][uuid]["uuid"] = uuid
|
||||
data["gpu"][uuid]["memory_total"] = 1
|
||||
data["gpu"][uuid]["memory_used"] = 0
|
||||
data["gpu"][uuid]["temp"] = 0
|
||||
data["gpu"][uuid]["power"] = 0
|
||||
data["gpu"][uuid]["power_limit"] = 1
|
||||
data["gpu"][uuid]["gpu_clock"] = 0
|
||||
data["gpu"][uuid]["mem_clock"] = 0
|
||||
data["gpu"][uuid]["sm_clock"] = 0
|
||||
data["gpu"][uuid]["video_clock"] = 0
|
||||
data["gpu"][uuid]["mem_max_clock"] = 1
|
||||
data["gpu"][uuid]["gpu_max_clock"] = 1
|
||||
data["gpu"][uuid]["performance_state"] = "N/A"
|
||||
|
||||
return data
|
||||
|
||||
if __name__ == "__main__":
|
||||
#print(json.dumps(_run_rocm_smi(), indent=1))
|
||||
#print(json.dumps(_getAmdGpuMemSize(), indent=1))
|
||||
#print(json.dumps(readGpu(vendor="AMD"), indent=1))
|
||||
_getAmdGpuMaxFanspeed()
|
||||
8
hardware-monitor.service
Normal file
8
hardware-monitor.service
Normal file
@@ -0,0 +1,8 @@
|
||||
[Unit]
|
||||
Description=Hardware monitor service
|
||||
|
||||
[Service]
|
||||
ExecStart=/opt/monitoring/bin/python /opt/monitoring/main.py
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
115
intelPower.py
Normal file
115
intelPower.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import logging
|
||||
import os.path
|
||||
import argparse
|
||||
from itertools import count
|
||||
from struct import unpack
|
||||
from time import sleep
|
||||
from warnings import warn
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger("intel-master")
|
||||
|
||||
class IntelPower:
|
||||
RAPL_MSR_POWER_UNIT = 0x606
|
||||
RAPL_MSR_ENERGY = 0x611
|
||||
RAPL_MSR_PP0_ENERGY = 0x639
|
||||
|
||||
def __init__(self):
|
||||
self._energy_unit = self._get_power_units()
|
||||
self._package_topology = self._detect_physical_package_topology()
|
||||
self._cores = list(self._package_topology.keys())
|
||||
self._cores = sorted(self._cores)
|
||||
self.timestamp = 0
|
||||
self.package_energy = -1
|
||||
self.core_energy = -1
|
||||
|
||||
def _read_msr(self, cpu_id, offset):
|
||||
msr_file = "/dev/cpu/{}/msr".format(cpu_id)
|
||||
try:
|
||||
with open(msr_file, "rb", buffering=8192) as f:
|
||||
f.seek(offset)
|
||||
return self._decode_int64(f.read(8))
|
||||
except PermissionError:
|
||||
raise PermissionError("root privilege is required to read model-specific registers")
|
||||
except FileNotFoundError:
|
||||
raise FileNotFoundError("msr driver is not loaded, try \"sudo modprobe msr\" to load msr module")
|
||||
|
||||
@staticmethod
|
||||
def _decode_int64(buffer):
|
||||
return unpack("q", buffer)[0]
|
||||
|
||||
@staticmethod
|
||||
def _detect_physical_package_topology():
|
||||
cpu_package_mapping = {}
|
||||
for cpu_id in count():
|
||||
filename = "/sys/devices/system/cpu/cpu{}/topology/physical_package_id".format(cpu_id)
|
||||
if os.path.isfile(filename):
|
||||
with open(filename, "r") as f:
|
||||
package_id = int(f.read())
|
||||
logger.debug("detected cpu {} in socket {}".format(cpu_id, package_id))
|
||||
cpu_package_mapping[cpu_id] = package_id
|
||||
else:
|
||||
return cpu_package_mapping
|
||||
|
||||
def _get_power_units(self):
|
||||
power_unit = self._read_msr(0, self.RAPL_MSR_POWER_UNIT)
|
||||
raw_unit = (power_unit >> 8) & 0x1F;
|
||||
logger.debug("CPU energy unit is 1/2^{}".format(power_unit))
|
||||
power_unit = 0.5 ** raw_unit
|
||||
return power_unit
|
||||
|
||||
def _read_package_energy(self, cpu_id):
|
||||
energy = self._read_msr(cpu_id, self.RAPL_MSR_ENERGY)
|
||||
logger.debug("CPU {} current package energy {} J".format(cpu_id, energy, self._energy_unit))
|
||||
return energy
|
||||
|
||||
def _read_core_energy(self, cpu_id):
|
||||
energy = self._read_msr(cpu_id, self.RAPL_MSR_PP0_ENERGY)
|
||||
logger.debug("CPU {} current package energy {} J".format(cpu_id, energy, self._energy_unit))
|
||||
return energy
|
||||
|
||||
def _calc_power_wtime(self, before, after, duration):
|
||||
return (after - before) * self._energy_unit / duration
|
||||
|
||||
def measure_nonblocking(self):
|
||||
timestamp = datetime.now()
|
||||
package_energy = {c: self._read_package_energy(c) for c in self._cores}
|
||||
core_energy = {c: self._read_core_energy(c) for c in self._cores}
|
||||
|
||||
if self.package_energy != -1:
|
||||
time_delta = (timestamp - self.timestamp).total_seconds()
|
||||
package_power = {c: self._calc_power_wtime(self.package_energy[c], package_energy[c], time_delta) for c in self._cores}
|
||||
core_power = {c: self._calc_power_wtime(self.core_energy[c], core_energy[c], time_delta) for c in self._cores}
|
||||
else:
|
||||
for c in self._cores:
|
||||
package_power = {c: 0 for c in self._cores}
|
||||
core_power = {c: 0 for c in self._cores}
|
||||
|
||||
self.timestamp = timestamp
|
||||
self.package_energy = package_energy
|
||||
self.core_energy = core_energy
|
||||
|
||||
avg_pp = 0
|
||||
avg_cp = 0
|
||||
for c in self._cores:
|
||||
avg_pp += package_power[c]
|
||||
avg_cp += core_power[c]
|
||||
avg_pp = avg_pp / len(self._cores)
|
||||
avg_cp = avg_cp / len(self._cores)
|
||||
|
||||
return avg_pp, avg_cp
|
||||
|
||||
def read(self):
|
||||
return self._read_msr(1, self.RAPL_MSR_POWER_UNIT)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(IntelPower().read())
|
||||
print(IntelPower()._detect_physical_package_topology())
|
||||
|
||||
ip = IntelPower()
|
||||
|
||||
pp, cp = ip.measure_nonblocking()
|
||||
print(pp, cp)
|
||||
sleep(1)
|
||||
pp, cp = ip.measure_nonblocking()
|
||||
print(pp, cp)
|
||||
65
kvmSensors.py
Normal file
65
kvmSensors.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import subprocess
|
||||
|
||||
def fans(data):
|
||||
if "FAN" in data[0]:
|
||||
fandata = {}
|
||||
fandata["id"] = data[0]
|
||||
fandata["rpm"] = data[1]
|
||||
fandata["status"] = data[3]
|
||||
return fandata
|
||||
|
||||
def voltage(data):
|
||||
if "VOLT" in data[0]:
|
||||
voltdata = {}
|
||||
voltdata["id"] = data[0]
|
||||
voltdata["voltage"] = data[1]
|
||||
voltdata["status"] = data[3]
|
||||
|
||||
return voltdata
|
||||
|
||||
def temp(data):
|
||||
if "TEMP" in data[0]:
|
||||
tempdata = {}
|
||||
tempdata["id"] = data[0]
|
||||
tempdata["temp"] = data[1]
|
||||
tempdata["status"] = data[3]
|
||||
|
||||
return tempdata
|
||||
|
||||
def readSensors():
|
||||
result = subprocess.run(['ipmitool', 'sensor'], stdout=subprocess.PIPE)
|
||||
result = str(result.stdout).replace(" ", "").replace("b'", "").replace("'", "").split("\\n")
|
||||
|
||||
retdata = {}
|
||||
fandata = {}
|
||||
tempdata = {}
|
||||
voltdata = {}
|
||||
fanindex = 0
|
||||
voltindex = 0
|
||||
tempindex = 0
|
||||
|
||||
for sensor in result:
|
||||
data = sensor.split("|")
|
||||
try:
|
||||
if data[1] != "na":
|
||||
if data[1] != "0x0":
|
||||
# print(data)
|
||||
if fans(data) != None:
|
||||
fandata[str(fanindex)] = fans(data)
|
||||
fanindex += 1
|
||||
if voltage(data) != None:
|
||||
voltdata[str(voltindex)] = voltage(data)
|
||||
voltindex += 1
|
||||
if temp(data) != None:
|
||||
tempdata[str(tempindex)] = temp(data)
|
||||
tempindex += 1
|
||||
|
||||
except: # Exception as e: print(e)
|
||||
pass
|
||||
retdata["fans"] = fandata
|
||||
retdata["volt"] = voltdata
|
||||
retdata["temp"] = tempdata
|
||||
return retdata
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(readSensors())
|
||||
690
main.py
Normal file
690
main.py
Normal file
@@ -0,0 +1,690 @@
|
||||
from time import sleep
|
||||
import datetime
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
|
||||
import promMon
|
||||
import mon_pkg_update
|
||||
|
||||
#===INIT========================================================================
|
||||
filedir = os.path.dirname(os.path.abspath(sys.argv[0]))
|
||||
|
||||
vols_to_scan = {}
|
||||
vols_to_scan["zfs"] = []
|
||||
vols_to_scan["non_zfs"] = []
|
||||
nics_to_scan = []
|
||||
SMART_data_update_period_seconds = 60
|
||||
last_SMART_read_timestamp = 0
|
||||
packages_data_update_period_seconds = 3600
|
||||
last_packages_read_timestamp = 0
|
||||
docker_data_update_period_seconds = 3600
|
||||
last_docker_read_timestamp = 0
|
||||
update_scan_period_seconds = 300
|
||||
last_update_scan_timestamp = 0
|
||||
update_from_git_tags = True
|
||||
gpu_vendor = "nVidia"
|
||||
|
||||
# load config.json file
|
||||
try:
|
||||
with open(filedir+"/config.json", "r") as f:
|
||||
config = json.loads(f.read())
|
||||
for module in config["modules"]:
|
||||
if "ryzenPower" in module:
|
||||
import ryzenPower
|
||||
if "kvmSensors" in module:
|
||||
import kvmSensors as ks
|
||||
if "sysinfo" in module:
|
||||
import sysinfo as si
|
||||
if "docker" in module:
|
||||
import docker as do
|
||||
if "SMART" in module:
|
||||
import SMART as sm
|
||||
if "sensors" in module:
|
||||
import sensors as lmsn
|
||||
if "intelPower" in module:
|
||||
import intelPower
|
||||
if "procMon" in module:
|
||||
import procMon as pr
|
||||
if "RAS" in module:
|
||||
import RAS
|
||||
if "cpuinfo" in module:
|
||||
import cpuinfo as cinfo
|
||||
if "gpu" in module:
|
||||
import gpu
|
||||
if "packages" in module:
|
||||
import packages
|
||||
try:
|
||||
if len(config["volumes"]["zfs"]) != 0:
|
||||
for i in config["volumes"]["zfs"]:
|
||||
vols_to_scan["zfs"].append(i)
|
||||
if len(config["volumes"]["non_zfs"]) != 0:
|
||||
for i in config["volumes"]["non_zfs"]:
|
||||
vols_to_scan["non_zfs"].append(i)
|
||||
except:
|
||||
print("No volumes object detected")
|
||||
try:
|
||||
if len(config["network"]["nics"]) != 0:
|
||||
for nic in config["network"]["nics"]:
|
||||
nics_to_scan.append(nic)
|
||||
except:
|
||||
print("No network object detected")
|
||||
try:
|
||||
gpu_vendor = config["gpu"]
|
||||
except:
|
||||
pass
|
||||
except:
|
||||
print("No config file found, exitting")
|
||||
exit(255)
|
||||
|
||||
|
||||
proc_last = 0
|
||||
|
||||
if "promMon" not in sys.modules:
|
||||
print("Prometheus exporter helper not inported, exitting")
|
||||
exit(1)
|
||||
|
||||
pm = promMon.prometheus(port=9339, name="hw-monitor")
|
||||
|
||||
pm.add_monitor("self_loop_time", "us")
|
||||
pm.add_monitor("version", "", tags=("version",))
|
||||
|
||||
if "docker" in sys.modules:
|
||||
pm.add_monitor("docker_version", "")
|
||||
pm.add_monitor("docker_overall_info", "", tags=("module","type"))
|
||||
pm.add_monitor("docker_overall_size", "B", tags=("module","type"))
|
||||
pm.add_monitor("docker_images", "", tags=("id","containers","created","repository","unique_size"))
|
||||
pm.add_monitor("docker_containers", "", tags=("id","created","image","volumes","mounts","name","networks","runtime","state","status"))
|
||||
pm.add_monitor("docker_volumes", "", tags=("name","driver","links","mountpoint"))
|
||||
pm.add_monitor("docker_build_cache", "", tags=("id","type","created","in_use","last_use","shared","use_count"))
|
||||
|
||||
if "ryzenPower" in sys.modules:
|
||||
pm.add_monitor("package_power", "W")
|
||||
pm.add_monitor("core_total", "W")
|
||||
pm.add_monitor("core", "W")
|
||||
rp = ryzenPower.RyzenPower()
|
||||
|
||||
if "intelPower" in sys.modules:
|
||||
ip = intelPower.IntelPower()
|
||||
|
||||
if "kvmSensors" in sys.modules:
|
||||
pm.add_monitor("fan_rpm", "RPM")
|
||||
pm.add_monitor("fan_ok", "")
|
||||
pm.add_monitor("temp_celsius", "C")
|
||||
pm.add_monitor("temp_ok", "")
|
||||
pm.add_monitor("voltage", "V")
|
||||
pm.add_monitor("voltage_ok", "")
|
||||
|
||||
if "cpuinfo" in sys.modules:
|
||||
pm.add_monitor("cpu_info", "", tags=("vendor","model","cpus"))
|
||||
|
||||
if "sysinfo" in sys.modules:
|
||||
pm.add_monitor("disk_read", "B/s")
|
||||
pm.add_monitor("disk_write", "B/s")
|
||||
pm.add_monitor("disk_io_read", "iops")
|
||||
pm.add_monitor("disk_io_write", "iops")
|
||||
pm.add_monitor("disk_io_read_time", "ms")
|
||||
pm.add_monitor("disk_io_write_time", "ms")
|
||||
pm.add_monitor("disk_io_read_merged", "")
|
||||
pm.add_monitor("disk_io_write_merged", "")
|
||||
pm.add_monitor("disk_busy", "ms")
|
||||
pm.add_monitor("cpu_count", "")
|
||||
pm.add_monitor("cpu_frequency", "Hz")
|
||||
pm.add_monitor("cpu_usage", "%")
|
||||
pm.add_monitor("uptime", "s")
|
||||
pm.add_monitor("system_info", "", tags=("hostname","kernel","board"))
|
||||
pm.add_monitor("ip_addrs", "", tags=("interface","ip",))
|
||||
pm.add_monitor("user_sessions", "")
|
||||
pm.add_monitor("users", "", tags=("user","from"))
|
||||
|
||||
if "SMART" in sys.modules:
|
||||
pm.add_monitor("smart_attributes", "", tags=("serial","device","attribute","id","value","thres","worst","raw"))
|
||||
|
||||
if "gpu" in sys.modules:
|
||||
pm.add_monitor("gpu_info", "", tags=("name","vbios","driver","pstate","uuid"))
|
||||
pm.add_monitor("gpu_util", "%", tags=("name","uuid","stat"))
|
||||
pm.add_monitor("gpu_throttle", "", tags=("name","uuid","stat"))
|
||||
pm.add_monitor("gpu_memory_used", "MB", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_memory_total", "MB", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_power", "W", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_power_limit", "W", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_temp", "C", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_fan_speed", "%", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_gpu_clock", "Hz", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_mem_clock", "Hz", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_sm_clock", "Hz", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_video_clock", "Hz", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_gpu_clock_max", "Hz", tags=("name","uuid"))
|
||||
pm.add_monitor("gpu_mem_clock_max", "Hz", tags=("name","uuid"))
|
||||
|
||||
if "packages" in sys.modules:
|
||||
pm.add_monitor("update_pkg_count", "", tags=("package_mgr",))
|
||||
pm.add_monitor("update_pkg_updatable", "", tags=("package","version","repository"))
|
||||
pm.add_monitor("installed_pkg_count", "", tags=("package_mgr",))
|
||||
pm.add_monitor("installed_pkgs", "", tags=("package","version","repository"))
|
||||
pm.add_monitor("installed_pkg_size", "", tags=("package_mgr",))
|
||||
|
||||
def self_monitoring(name, start):
|
||||
end = datetime.datetime.now()
|
||||
task_time = end - start
|
||||
pm.monitor("self_loop_time", (name,), task_time.microseconds + (task_time.seconds * 1000000))
|
||||
|
||||
return datetime.datetime.now()
|
||||
|
||||
while True:
|
||||
mon_time = 0
|
||||
loop_start = datetime.datetime.now()
|
||||
|
||||
#===VARIABLE INIT===============================================================
|
||||
|
||||
if "ryzenPower" in sys.modules:
|
||||
package, cores = rp.measure_nonblocking()
|
||||
mon_time = self_monitoring("zenpower", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "kvmSensors" in sys.modules:
|
||||
kvm = ks.readSensors()
|
||||
mon_time = self_monitoring("kvm", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "sysinfo" in sys.modules:
|
||||
disks = si.getDisk()
|
||||
mon_time = self_monitoring("disks", loop_start if mon_time == 0 else mon_time)
|
||||
cpu = si.getCPU()
|
||||
mon_time = self_monitoring("cpu", loop_start if mon_time == 0 else mon_time)
|
||||
memory = si.getMemory()
|
||||
mon_time = self_monitoring("memory", loop_start if mon_time == 0 else mon_time)
|
||||
try:
|
||||
partitions = si.getPartitions()
|
||||
mon_time = self_monitoring("partitions", loop_start if mon_time == 0 else mon_time)
|
||||
except:
|
||||
partitions = 0
|
||||
try:
|
||||
zfs = si.getZFS()
|
||||
mon_time = self_monitoring("zfs", loop_start if mon_time == 0 else mon_time)
|
||||
except:
|
||||
zfs = 0
|
||||
uptime = si.getUptime()
|
||||
mon_time = self_monitoring("uptime", loop_start if mon_time == 0 else mon_time)
|
||||
network = si.getNetwork()
|
||||
ipaddrs = si.getIP()
|
||||
mon_time = self_monitoring("network", loop_start if mon_time == 0 else mon_time)
|
||||
systeminfo = si.getSysInfo()
|
||||
mon_time = self_monitoring("systeminfo", loop_start if mon_time == 0 else mon_time)
|
||||
users = si.users()
|
||||
mon_time = self_monitoring("users", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "SMART" in sys.modules:
|
||||
if last_SMART_read_timestamp == 0:
|
||||
smart = sm.getAllDeviceAttributes()
|
||||
last_SMART_read_timestamp = datetime.datetime.now()
|
||||
else:
|
||||
timedelta = (datetime.datetime.now() - last_SMART_read_timestamp).total_seconds()
|
||||
if timedelta >= SMART_data_update_period_seconds:
|
||||
smart = sm.getAllDeviceAttributes()
|
||||
last_SMART_read_timestamp = datetime.datetime.now()
|
||||
mon_time = self_monitoring("smart", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "docker" in sys.modules:
|
||||
containers = do.health()
|
||||
try:
|
||||
if last_docker_read_timestamp == 0:
|
||||
docker_info = do.getInfo()
|
||||
docker_size = do.getSize()
|
||||
last_docker_read_timestamp = datetime.datetime.now()
|
||||
else:
|
||||
timedelta = (datetime.datetime.now() - last_docker_read_timestamp).total_seconds()
|
||||
if timedelta >= docker_data_update_period_seconds:
|
||||
docker_info = do.getInfo()
|
||||
docker_size = do.getSize()
|
||||
last_docker_read_timestamp = datetime.datetime.now()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
mon_time = self_monitoring("containers", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "sensors" in sys.modules:
|
||||
temperatures = lmsn.getTemps()
|
||||
mon_time = self_monitoring("temperatures", loop_start if mon_time == 0 else mon_time)
|
||||
fans = lmsn.getFans()
|
||||
mon_time = self_monitoring("fans", loop_start if mon_time == 0 else mon_time)
|
||||
voltages = lmsn.getVoltages()
|
||||
mon_time = self_monitoring("voltage", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "intelPower" in sys.modules:
|
||||
package, cores = ip.measure_nonblocking()
|
||||
mon_time = self_monitoring("cpu_power", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "RAS" in sys.modules:
|
||||
ras = RAS.readRAS()
|
||||
mon_time = self_monitoring("ras", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "procMon" in sys.modules:
|
||||
proc = pr.exportProcesses(proc_last)
|
||||
proc_last = proc
|
||||
mon_time = self_monitoring("proc", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "cpuinfo" in sys.modules:
|
||||
cpuinfo = cinfo.getCpuInfo()
|
||||
mon_time = self_monitoring("cpuinfo", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "gpu" in sys.modules:
|
||||
gpuinfo = gpu.readGpu(gpu_vendor)
|
||||
mon_time = self_monitoring("gpu", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if "packages" in sys.modules:
|
||||
if last_packages_read_timestamp == 0:
|
||||
up = packages.getPackages()
|
||||
last_packages_read_timestamp = datetime.datetime.now()
|
||||
else:
|
||||
timedelta = (datetime.datetime.now() - last_packages_read_timestamp).total_seconds()
|
||||
if timedelta >= packages_data_update_period_seconds:
|
||||
up = packages.getPackages()
|
||||
last_packages_read_timestamp = datetime.datetime.now()
|
||||
mon_time = self_monitoring("packages", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
if last_update_scan_timestamp == 0:
|
||||
version = mon_pkg_update.getCurrentTag()
|
||||
if update_from_git_tags:
|
||||
mon_pkg_update.update()
|
||||
last_update_scan_timestamp = datetime.datetime.now()
|
||||
else:
|
||||
timedelta = (datetime.datetime.now() - last_update_scan_timestamp).total_seconds()
|
||||
if timedelta >= update_scan_period_seconds:
|
||||
version = mon_pkg_update.getCurrentTag()
|
||||
if update_from_git_tags:
|
||||
mon_pkg_update.update()
|
||||
last_update_scan_timestamp = datetime.datetime.now()
|
||||
mon_time = self_monitoring("version_check", loop_start if mon_time == 0 else mon_time)
|
||||
|
||||
#===MOVE VARS TO PROMETHEUS EXPORTER============================================
|
||||
|
||||
pm.monitor("version", (version,), 1)
|
||||
|
||||
if "sysinfo" in sys.modules:
|
||||
pm.monitor("uptime", ("sensors",), uptime)
|
||||
pm.monitor("cpu_count", ("sensors",), cpu["cpu_count"])
|
||||
pm.monitor("cpu_usage", ("sensors",), cpu["usage"])
|
||||
for disk in disks:
|
||||
pm.monitor("disk_read", (disk,), disks[disk]["read"])
|
||||
pm.monitor("disk_write", (disk,), disks[disk]["write"])
|
||||
pm.monitor("disk_io_read", (disk,), disks[disk]["io_read"])
|
||||
pm.monitor("disk_io_write", (disk,), disks[disk]["io_write"])
|
||||
pm.monitor("disk_io_read_time", (disk,), disks[disk]["io_read_time"])
|
||||
pm.monitor("disk_io_write_time", (disk,), disks[disk]["io_write_time"])
|
||||
pm.monitor("disk_io_read_merged", (disk,), disks[disk]["io_read_merged"])
|
||||
pm.monitor("disk_io_write_merged", (disk,), disks[disk]["io_write_merged"])
|
||||
pm.monitor("disk_busy", (disk,), disks[disk]["busy"])
|
||||
|
||||
for core in cpu["frequency"]:
|
||||
pm.monitor("cpu_frequency", (str(core),), cpu["frequency"][core])
|
||||
|
||||
for t in cpu["time_percent"]:
|
||||
try:
|
||||
pm.monitor("cpu_time_percent", (t,), cpu["time_percent"][t])
|
||||
except:
|
||||
pm.add_monitor("cpu_time_percent", "%")
|
||||
pm.monitor("cpu_time_percent", (t,), cpu["time_percent"][t])
|
||||
|
||||
for i in memory:
|
||||
try:
|
||||
pm.monitor("memory_"+i, ("memory",), memory[i])
|
||||
except:
|
||||
pm.add_monitor("memory_"+i, "%")
|
||||
pm.monitor("memory_"+i, ("memory",), memory[i])
|
||||
|
||||
if zfs != 0:
|
||||
for pool in zfs:
|
||||
if len(vols_to_scan["zfs"]) != 0:
|
||||
if pool not in vols_to_scan["zfs"]:
|
||||
continue
|
||||
try:
|
||||
pm.monitor("zfs_state", (pool,), zfs[pool]["state"])
|
||||
pm.monitor("zfs_size", (pool,), zfs[pool]["size"])
|
||||
pm.monitor("zfs_used", (pool,), zfs[pool]["used"])
|
||||
pm.monitor("zfs_free", (pool,), zfs[pool]["free"])
|
||||
pm.monitor("zfs_fragmentation", (pool,), zfs[pool]["fragmentation"])
|
||||
pm.monitor("zfs_dedup", (pool,), zfs[pool]["dedup"])
|
||||
except:
|
||||
pm.add_monitor("zfs_state", "")
|
||||
pm.add_monitor("zfs_size", "B")
|
||||
pm.add_monitor("zfs_used", "B")
|
||||
pm.add_monitor("zfs_free", "B")
|
||||
pm.add_monitor("zfs_fragmentation", "%")
|
||||
pm.add_monitor("zfs_dedup", "")
|
||||
pm.monitor("zfs_state", (pool,), zfs[pool]["state"])
|
||||
pm.monitor("zfs_size", (pool,), zfs[pool]["size"])
|
||||
pm.monitor("zfs_used", (pool,), zfs[pool]["used"])
|
||||
pm.monitor("zfs_free", (pool,), zfs[pool]["free"])
|
||||
pm.monitor("zfs_fragmentation", (pool,), zfs[pool]["fragmentation"])
|
||||
pm.monitor("zfs_dedup", (pool,), zfs[pool]["dedup"])
|
||||
|
||||
for nic in network:
|
||||
if len(nics_to_scan) != 0:
|
||||
if nic not in nics_to_scan:
|
||||
continue
|
||||
try:
|
||||
pm.monitor("network_rx", (nic,), network[nic]["rx"])
|
||||
pm.monitor("network_tx", (nic,), network[nic]["tx"])
|
||||
pm.monitor("network_err_rx", (nic,), network[nic]["err_rx"])
|
||||
pm.monitor("network_err_tx", (nic,), network[nic]["err_tx"])
|
||||
pm.monitor("network_drop_rx", (nic,), network[nic]["drop_rx"])
|
||||
pm.monitor("network_drop_tx", (nic,), network[nic]["drop_tx"])
|
||||
pm.monitor("network_packet_rx", (nic,), network[nic]["packet_rx"])
|
||||
pm.monitor("network_packet_tx", (nic,), network[nic]["packet_tx"])
|
||||
except:
|
||||
pm.add_monitor("network_rx", "B")
|
||||
pm.add_monitor("network_tx", "B")
|
||||
pm.add_monitor("network_err_rx", "")
|
||||
pm.add_monitor("network_err_tx", "")
|
||||
pm.add_monitor("network_drop_rx", "")
|
||||
pm.add_monitor("network_drop_tx", "")
|
||||
pm.add_monitor("network_packet_rx", "")
|
||||
pm.add_monitor("network_packet_tx", "")
|
||||
pm.monitor("network_rx", (nic,), network[nic]["rx"])
|
||||
pm.monitor("network_tx", (nic,), network[nic]["tx"])
|
||||
pm.monitor("network_err_rx", (nic,), network[nic]["err_rx"])
|
||||
pm.monitor("network_err_tx", (nic,), network[nic]["err_tx"])
|
||||
pm.monitor("network_drop_rx", (nic,), network[nic]["drop_rx"])
|
||||
pm.monitor("network_drop_tx", (nic,), network[nic]["drop_tx"])
|
||||
pm.monitor("network_packet_rx", (nic,), network[nic]["packet_rx"])
|
||||
pm.monitor("network_packet_tx", (nic,), network[nic]["packet_tx"])
|
||||
|
||||
if partitions != 0:
|
||||
for part in partitions:
|
||||
if len(vols_to_scan["non_zfs"]) != 0:
|
||||
if part not in vols_to_scan["non_zfs"]:
|
||||
continue
|
||||
try:
|
||||
pm.monitor("partition_size", (part,), partitions[part]["size"])
|
||||
pm.monitor("partition_used", (part,), partitions[part]["used"])
|
||||
pm.monitor("partition_free", (part,), partitions[part]["free"])
|
||||
except:
|
||||
pm.add_monitor("partition_size", "B")
|
||||
pm.add_monitor("partition_used", "B")
|
||||
pm.add_monitor("partition_free", "B")
|
||||
pm.monitor("partition_size", (part,), partitions[part]["size"])
|
||||
pm.monitor("partition_used", (part,), partitions[part]["used"])
|
||||
pm.monitor("partition_free", (part,), partitions[part]["free"])
|
||||
|
||||
pm.monitor("system_info", (systeminfo["hostname"],systeminfo["kernel"],systeminfo["board"]), 1)
|
||||
|
||||
pm.delete_monitor("user_sessions")
|
||||
pm.delete_monitor("users")
|
||||
|
||||
try:
|
||||
for user in users:
|
||||
pm.monitor("user_sessions", (user,), users[user]["sessions"])
|
||||
for session in users[user]["session"]:
|
||||
pm.monitor("users", (user,users[user]["session"][session]["from"]), 1)
|
||||
except:
|
||||
pass
|
||||
|
||||
pm.delete_monitor("ip_addrs")
|
||||
for interface in ipaddrs:
|
||||
pm.monitor("ip_addrs", (interface,ipaddrs[interface]), 1)
|
||||
|
||||
if "ryzenPower" in sys.modules:
|
||||
pm.monitor("package_power", ("sensors",), package)
|
||||
core_total = 0
|
||||
for core in cores:
|
||||
pm.monitor("core", (str(int(core/2)),), cores[core])
|
||||
core_total += cores[core]
|
||||
pm.monitor("core_total", ("sensors",), core_total)
|
||||
|
||||
if "kvmSensors" in sys.modules:
|
||||
for fan in kvm["fans"]:
|
||||
pm.monitor("fan_rpm", (kvm["fans"][fan]["id"],), kvm["fans"][fan]["rpm"])
|
||||
if kvm["fans"][fan]["status"] == "ok":
|
||||
pm.monitor("fan_ok", (kvm["fans"][fan]["id"],), 1)
|
||||
else:
|
||||
pm.monitor("fan_ok", (kvm["fans"][fan]["id"],), 0)
|
||||
|
||||
for temp in kvm["temp"]:
|
||||
pm.monitor("temp_celsius", (kvm["temp"][temp]["id"],), kvm["temp"][temp]["temp"])
|
||||
if kvm["temp"][temp]["status"] == "ok":
|
||||
pm.monitor("temp_ok", (kvm["temp"][temp]["id"],), 1)
|
||||
else:
|
||||
pm.monitor("temp_ok", (kvm["temp"][temp]["id"],), 0)
|
||||
|
||||
for volt in kvm["volt"]:
|
||||
pm.monitor("voltage", (kvm["volt"][volt]["id"],), kvm["volt"][volt]["voltage"])
|
||||
if kvm["volt"][volt]["status"] == "ok":
|
||||
pm.monitor("voltage_ok", (kvm["volt"][volt]["id"],), 1)
|
||||
else:
|
||||
pm.monitor("voltage_ok", (kvm["volt"][volt]["id"],), 0)
|
||||
|
||||
if "docker" in sys.modules:
|
||||
pm.delete_monitor("docker_version")
|
||||
pm.delete_monitor("docker_images")
|
||||
pm.delete_monitor("docker_containers")
|
||||
pm.delete_monitor("docker_volumes")
|
||||
pm.delete_monitor("docker_build_cache")
|
||||
|
||||
for container in containers:
|
||||
try:
|
||||
pm.monitor("docker_status", (container,), containers[container]["status"])
|
||||
pm.monitor("docker_health", (container,), containers[container]["health"])
|
||||
except:
|
||||
pm.add_monitor("docker_status", "")
|
||||
pm.add_monitor("docker_health", "")
|
||||
pm.monitor("docker_status", (container,), containers[container]["status"])
|
||||
pm.monitor("docker_health", (container,), containers[container]["health"])
|
||||
|
||||
try:
|
||||
pm.monitor("docker_version", (docker_info["version"],), 1)
|
||||
for module in docker_size:
|
||||
pm.monitor("docker_overall_info", (module,"count_total"), int(docker_size[module]["count_total"]))
|
||||
pm.monitor("docker_overall_info", (module,"count_active"), int(docker_size[module]["count_active"]))
|
||||
pm.monitor("docker_overall_size", (module,"used"), docker_size[module]["used"])
|
||||
pm.monitor("docker_overall_size", (module,"reclaimable"), docker_size[module]["reclaimable"])
|
||||
|
||||
for image in docker_info["images"]:
|
||||
pm.monitor("docker_images", (image,docker_info["images"][image]["containers"],docker_info["images"][image]["created"],docker_info["images"][image]["repository"],docker_info["images"][image]["unique_size"]), docker_info["images"][image]["size"])
|
||||
|
||||
for container in docker_info["containers"]:
|
||||
pm.monitor("docker_containers", (container,docker_info["containers"][container]["created"],docker_info["containers"][container]["image"],docker_info["containers"][container]["volumes"],docker_info["containers"][container]["mounts"],docker_info["containers"][container]["name"],docker_info["containers"][container]["networks"],docker_info["containers"][container]["runtime"],docker_info["containers"][container]["state"],docker_info["containers"][container]["status"]), docker_info["containers"][container]["size"])
|
||||
|
||||
for volume in docker_info["volumes"]:
|
||||
pm.monitor("docker_volumes", (volume,docker_info["volumes"][volume]["driver"],docker_info["volumes"][volume]["links"],docker_info["volumes"][volume]["mountpoint"]), docker_info["volumes"][volume]["size"])
|
||||
|
||||
for cache in docker_info["buildcache"]:
|
||||
pm.monitor("docker_build_cache", (cache,docker_info["buildcache"][cache]["type"],docker_info["buildcache"][cache]["created"],docker_info["buildcache"][cache]["in_use"],docker_info["buildcache"][cache]["last_use"],docker_info["buildcache"][cache]["shared"],docker_info["buildcache"][cache]["use_count"]), docker_info["buildcache"][cache]["size"])
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if "sensors" in sys.modules:
|
||||
for temp in temperatures:
|
||||
try:
|
||||
for sensor in temperatures[temp]:
|
||||
if "coretemp" in temp:
|
||||
pm.monitor("temp_celsius_lm", (str(sensor),), temperatures[temp][sensor])
|
||||
else:
|
||||
pm.monitor("temp_celsius_lm", (str(temp)+'_'+str(sensor),), temperatures[temp][sensor])
|
||||
except:
|
||||
pm.add_monitor("temp_celsius_lm", "C")
|
||||
for sensor in temperatures[temp]:
|
||||
if "coretemp" in temp:
|
||||
pm.monitor("temp_celsius_lm", (str(sensor),), temperatures[temp][sensor])
|
||||
else:
|
||||
pm.monitor("temp_celsius_lm", (str(temp)+'_'+str(sensor),), temperatures[temp][sensor])
|
||||
|
||||
for fan in fans:
|
||||
try:
|
||||
for sensor in fans[fan]:
|
||||
pm.monitor("fans_lm", (str(sensor),), fans[fan][sensor])
|
||||
except:
|
||||
pm.add_monitor("fans_lm", "RPM")
|
||||
for sensor in fans[fan]:
|
||||
pm.monitor("fans_lm", (str(sensor),), fans[fan][sensor])
|
||||
|
||||
for voltage in voltages:
|
||||
try:
|
||||
for sensor in voltages[voltage]:
|
||||
pm.monitor("voltages_lm", (str(sensor),), voltages[voltage][sensor])
|
||||
except:
|
||||
pm.add_monitor("voltages_lm", "V")
|
||||
for sensor in voltages[voltage]:
|
||||
pm.monitor("voltages_lm", (str(sensor),), voltages[voltage][sensor])
|
||||
|
||||
if "intelPower" in sys.modules:
|
||||
try:
|
||||
pm.monitor("intel_cpu_power", ("core",), cores)
|
||||
pm.monitor("intel_cpu_power", ("package",), package)
|
||||
except:
|
||||
pm.add_monitor("intel_cpu_power", "W")
|
||||
pm.monitor("intel_cpu_power", ("core",), cores)
|
||||
pm.monitor("intel_cpu_power", ("package",), package)
|
||||
|
||||
if "RAS" in sys.modules:
|
||||
for item in ras:
|
||||
try:
|
||||
for tag in ras[item]:
|
||||
if tag == "total_errors":
|
||||
pm.monitor("ras_total", (item.replace(' ','_'),), ras[item][tag])
|
||||
else:
|
||||
for error in ras[item][tag]:
|
||||
pm.monitor("ras_"+error, (item.replace(' ','_')+"_"+tag,), ras[item][tag][error])
|
||||
except:
|
||||
for tag in ras[item]:
|
||||
if tag == "total_errors":
|
||||
try:
|
||||
pm.add_monitor("ras_total", "")
|
||||
pm.monitor("ras_total", (item.replace(' ','_'),), ras[item][tag])
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
for error in ras[item][tag]:
|
||||
try:
|
||||
pm.add_monitor("ras_"+error, "")
|
||||
pm.monitor("ras_"+error, (item.replace(' ','_')+"_"+tag,), ras[item][tag][error])
|
||||
except:
|
||||
pass
|
||||
|
||||
if "SMART" in sys.modules:
|
||||
try:
|
||||
pm.add_monitor("smart_bytes_written", "B", tags=("serial","instance"))
|
||||
except:
|
||||
pass
|
||||
|
||||
pm.delete_monitor("smart_attributes")
|
||||
|
||||
for device in smart:
|
||||
serial = smart[device]["serial_number"]
|
||||
|
||||
pm.monitor("smart_bytes_written", (serial,device), smart[device]["bytes_written"])
|
||||
|
||||
for attr in smart[device]["data"]:
|
||||
if smart[device]["type"] == "ATA":
|
||||
try:
|
||||
pm.monitor("smart_raw_"+attr, (serial,device), smart[device]["data"][attr]["raw"])
|
||||
pm.monitor("smart_value_"+attr, (serial,device), smart[device]["data"][attr]["value"])
|
||||
pm.monitor("smart_thr_"+attr, (serial,device), smart[device]["data"][attr]["thr"])
|
||||
pm.monitor("smart_worst_"+attr, (serial,device), smart[device]["data"][attr]["worst"])
|
||||
pm.monitor("smart_attributes", (serial,device,attr,smart[device]["data"][attr]["id"],smart[device]["data"][attr]["value"],smart[device]["data"][attr]["thr"],smart[device]["data"][attr]["worst"],smart[device]["data"][attr]["raw"]), smart[device]["data"][attr]["raw"])
|
||||
except:
|
||||
try:
|
||||
pm.add_monitor("smart_raw_"+attr, "", tags=("serial","instance"))
|
||||
pm.add_monitor("smart_value_"+attr, "", tags=("serial","instance"))
|
||||
pm.add_monitor("smart_thr_"+attr, "", tags=("serial","instance"))
|
||||
pm.add_monitor("smart_worst_"+attr, "", tags=("serial","instance"))
|
||||
pm.monitor("smart_raw_"+attr, (serial,device), smart[device]["data"][attr]["raw"])
|
||||
pm.monitor("smart_value_"+attr, (serial,device), smart[device]["data"][attr]["value"])
|
||||
pm.monitor("smart_thr_"+attr, (serial,device), smart[device]["data"][attr]["thr"])
|
||||
pm.monitor("smart_worst_"+attr, (serial,device), smart[device]["data"][attr]["worst"])
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
pm.monitor("smart_raw_"+attr, (serial,device), smart[device]["data"][attr])
|
||||
pm.monitor("smart_attributes", (serial,device,attr,"","","","",smart[device]["data"][attr]), smart[device]["data"][attr])
|
||||
except:
|
||||
try:
|
||||
pm.add_monitor("smart_raw_"+attr, "", tags=("serial","instance"))
|
||||
pm.monitor("smart_raw_"+attr, (serial,device), smart[device]["data"][attr])
|
||||
except:
|
||||
pass
|
||||
|
||||
if "procMon" in sys.modules:
|
||||
try:
|
||||
pm.add_monitor("proc_summary", "", tags=("PID","CPU","VIRT","RAM","% RAM","THR","STARTTIME","RUNTIME","PARENT","STATE","COMM"))
|
||||
pm.add_monitor("proc_cpu", "%")
|
||||
pm.add_monitor("proc_memory_used", "B")
|
||||
pm.add_monitor("proc_memory_virt", "B")
|
||||
pm.add_monitor("proc_memory_percent", "%")
|
||||
pm.add_monitor("proc_page_fault_minor", "")
|
||||
pm.add_monitor("proc_page_fault_major", "")
|
||||
except:
|
||||
pass
|
||||
|
||||
pm.delete_monitor("proc_summary")
|
||||
pm.delete_monitor("proc_cpu")
|
||||
pm.delete_monitor("proc_memory_used")
|
||||
pm.delete_monitor("proc_memory_virt")
|
||||
pm.delete_monitor("proc_memory_percent")
|
||||
pm.delete_monitor("proc_page_fault_minor")
|
||||
pm.delete_monitor("proc_page_fault_major")
|
||||
|
||||
for pid in proc:
|
||||
try:
|
||||
pm.monitor("proc_summary", (proc[pid]["id"],proc[pid]["cpu"],str(int(proc[pid]["virt"]) / 1000)+" kB",str(int(proc[pid]["memory"]) / 1000)+" kB",round(float(proc[pid]["memory_percent"]),2),proc[pid]["threadcnt"],proc[pid]["starttime"],proc[pid]["runtime_seconds"],proc[pid]["parent_pid"],proc[pid]["state"],proc[pid]["comm"]), 1)
|
||||
pm.monitor("proc_cpu", (str(proc[pid]["id"])+"_"+proc[pid]["comm"],), proc[pid]["cpu"])
|
||||
pm.monitor("proc_memory_used", (str(proc[pid]["id"])+"_"+proc[pid]["comm"],), proc[pid]["memory"])
|
||||
pm.monitor("proc_memory_virt", (str(proc[pid]["id"])+"_"+proc[pid]["comm"],), proc[pid]["virt"])
|
||||
pm.monitor("proc_memory_percent", (str(proc[pid]["id"])+"_"+proc[pid]["comm"],), proc[pid]["memory_percent"])
|
||||
pm.monitor("proc_page_fault_minor", (str(proc[pid]["id"])+"_"+proc[pid]["comm"],), proc[pid]["page_fault_minor"])
|
||||
pm.monitor("proc_page_fault_major", (str(proc[pid]["id"])+"_"+proc[pid]["comm"],), proc[pid]["page_fault_major"])
|
||||
except:
|
||||
pass
|
||||
|
||||
if "cpuinfo" in sys.modules:
|
||||
pm.monitor("cpu_info", (cpuinfo["vendor"],cpuinfo["model"],cpuinfo["cpus"]), 1)
|
||||
|
||||
if "gpu" in sys.modules:
|
||||
pm.delete_monitor("gpu_info")
|
||||
pm.delete_monitor("gpu_util")
|
||||
pm.delete_monitor("gpu_throttle")
|
||||
|
||||
for device in gpuinfo["gpu"]:
|
||||
pm.monitor("gpu_info", (gpuinfo["gpu"][device]["product_name"], gpuinfo["gpu"][device]["vbios_version"], gpuinfo["about"]["driver_version"], gpuinfo["gpu"][device]["performance_state"], device), 1)
|
||||
|
||||
for item in gpuinfo["gpu"][device]["util"]:
|
||||
pm.monitor("gpu_util", (gpuinfo["gpu"][device]["product_name"], device, item), gpuinfo["gpu"][device]["util"][item])
|
||||
for item in gpuinfo["gpu"][device]["throttle"]:
|
||||
pm.monitor("gpu_throttle", (gpuinfo["gpu"][device]["product_name"], device, item), gpuinfo["gpu"][device]["throttle"][item])
|
||||
|
||||
pm.monitor("gpu_fan_speed", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["fan_speed"])
|
||||
pm.monitor("gpu_memory_used", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["memory_used"])
|
||||
pm.monitor("gpu_memory_total", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["memory_total"])
|
||||
pm.monitor("gpu_temp", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["temp"])
|
||||
pm.monitor("gpu_power", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["power"])
|
||||
pm.monitor("gpu_power_limit", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["power_limit"])
|
||||
pm.monitor("gpu_gpu_clock", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["gpu_clock"])
|
||||
pm.monitor("gpu_mem_clock", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["mem_clock"])
|
||||
pm.monitor("gpu_sm_clock", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["sm_clock"])
|
||||
pm.monitor("gpu_video_clock", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["video_clock"])
|
||||
pm.monitor("gpu_gpu_clock_max", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["gpu_max_clock"])
|
||||
pm.monitor("gpu_mem_clock_max", (gpuinfo["gpu"][device]["product_name"], device), gpuinfo["gpu"][device]["mem_max_clock"])
|
||||
|
||||
if "packages" in sys.modules:
|
||||
pm.delete_monitor("update_pkg_updatable")
|
||||
pm.delete_monitor("update_pkg_count")
|
||||
pm.delete_monitor("installed_pkg_count")
|
||||
pm.delete_monitor("installed_pkgs")
|
||||
pm.delete_monitor("installed_pkg_size")
|
||||
|
||||
pm.monitor("update_pkg_count", (up["package_mgr"],), len(up["updatable"]))
|
||||
pm.monitor("installed_pkg_count", (up["package_mgr"],), len(up["installed"]))
|
||||
pm.monitor("installed_pkg_size", (up["package_mgr"],), up["total_size"])
|
||||
|
||||
try:
|
||||
for package in up["installed"]:
|
||||
pm.monitor("installed_pkgs", (package, up["installed"][package]["version"], up["installed"][package]["repository"]), up["installed"][package]["size"])
|
||||
except:
|
||||
pass
|
||||
if len(up["updatable"]) > 0:
|
||||
for package in up["updatable"]:
|
||||
pm.monitor("update_pkg_updatable", (package, up["updatable"][package]["version"], up["updatable"][package]["repository"]), 1)
|
||||
|
||||
mon_time = self_monitoring("prom_export", loop_start if mon_time == 0 else mon_time)
|
||||
loop_time_spent = self_monitoring("hw-mon-loop", loop_start)
|
||||
|
||||
if (loop_start - datetime.datetime.now()).total_seconds() < 1:
|
||||
sleep(1 - (loop_start - datetime.datetime.now()).total_seconds())
|
||||
46
mon_pkg_update.py
Normal file
46
mon_pkg_update.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import subprocess
|
||||
|
||||
def pullTags():
|
||||
result = subprocess.run(['git', 'fetch', '--tags'], stdout=subprocess.PIPE)
|
||||
|
||||
def getCurrentTag():
|
||||
result = subprocess.run(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1]
|
||||
|
||||
result = subprocess.run(['git', 'name-rev', '--tags', '--name-only', result], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1]
|
||||
|
||||
return result
|
||||
|
||||
def checkTag():
|
||||
pullTags()
|
||||
|
||||
result = subprocess.run(['git', 'tag'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1].split('\n')
|
||||
|
||||
latest_tag = getCurrentTag()
|
||||
|
||||
for tag in result:
|
||||
if float(tag[1:]) > float(latest_tag[1:]):
|
||||
latest_tag = tag
|
||||
|
||||
return latest_tag
|
||||
|
||||
def update(recursive=False):
|
||||
current_tag = getCurrentTag()
|
||||
latest_tag = checkTag()
|
||||
|
||||
if recursive:
|
||||
print(current_tag, latest_tag)
|
||||
|
||||
if current_tag != latest_tag:
|
||||
print("update")
|
||||
result = subprocess.run(['git', 'checkout', latest_tag], stdout=subprocess.PIPE)
|
||||
update(recursive=True)
|
||||
else:
|
||||
print("no update nessesary")
|
||||
|
||||
result = subprocess.run(['systemctl', 'restart', 'hardware-monitor.service'], stdout=subprocess.PIPE)
|
||||
|
||||
if __name__ == "__main__":
|
||||
update()
|
||||
140
packages.py
Normal file
140
packages.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import re
|
||||
import subprocess
|
||||
from time import sleep
|
||||
|
||||
def _getDist():
|
||||
with open("/etc/os-release", "r") as f:
|
||||
release = f.read().split('\n')
|
||||
for line in release:
|
||||
try:
|
||||
s = line.split("=")
|
||||
if s[0] == "ID":
|
||||
os_id = s[1]
|
||||
break
|
||||
except:
|
||||
pass
|
||||
|
||||
return os_id
|
||||
|
||||
def _isProxmox():
|
||||
ispve = {}
|
||||
ispve["ispve"] = False
|
||||
try:
|
||||
result = subprocess.run(['pveversion'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
ispve["ispve"] = True
|
||||
ispve["string"] = result
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
return ispve
|
||||
|
||||
def _sizeMultiplier(unit):
|
||||
if unit == "KiB":
|
||||
return 1
|
||||
elif unit == "MiB":
|
||||
return 1024
|
||||
elif unit == "GiB":
|
||||
return 1024**2
|
||||
elif unit == "TiB":
|
||||
return 1024**3
|
||||
else:
|
||||
return 0
|
||||
|
||||
def getPackages():
|
||||
dist = _getDist()
|
||||
ispve = _isProxmox()
|
||||
update_cmd = "apt update"
|
||||
pkgs = {}
|
||||
pkgs["updatable"] = {}
|
||||
pkgs["installed"] = {}
|
||||
|
||||
if dist != "debian":
|
||||
if dist == "arch":
|
||||
update_cmd = "pacman -Sup --print-format %r,%n,%v"
|
||||
|
||||
if "apt" in update_cmd:
|
||||
pkgs["package_mgr"] = "apt"
|
||||
result = subprocess.run(update_cmd.split(" "), stdout=subprocess.PIPE)
|
||||
update_cmd = "apt list --upgradable"
|
||||
result = subprocess.run(update_cmd.split(" "), stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[11:-1].split("\n")
|
||||
if result[0] == '':
|
||||
result = []
|
||||
for pkg in result:
|
||||
try:
|
||||
pkgs["updatable"][pkg.split(" ")[0].split("/")[0]] = {}
|
||||
pkgs["updatable"][pkg.split(" ")[0].split("/")[0]]["version"] = pkg.split(" ")[1]
|
||||
pkgs["updatable"][pkg.split(" ")[0].split("/")[0]]["repository"] = pkg.split(" ")[0].split("/")[1]
|
||||
except:
|
||||
pass
|
||||
result = subprocess.run(["apt", "list"], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[11:-1].split("\n")
|
||||
for pkg in result:
|
||||
pkgs["installed"][pkg.split(" ")[0].split("/")[0]] = {}
|
||||
pkgs["installed"][pkg.split(" ")[0].split("/")[0]]["version"] = pkg.split(" ")[1]
|
||||
pkgs["installed"][pkg.split(" ")[0].split("/")[0]]["repository"] = pkg.split(" ")[0].split("/")[1]
|
||||
result = subprocess.run(["dpkg-query","-W","--showformat='${Package} ${Installed-Size}\n'"], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-2].replace("'", "").split("\n")
|
||||
for pkg in result:
|
||||
try:
|
||||
size = float(pkg.split(" ")[1])
|
||||
except:
|
||||
size = 0.0
|
||||
pkgs["installed"][pkg.split(" ")[0]]["size"] = size
|
||||
insld = {}
|
||||
for pkg in pkgs["installed"]:
|
||||
if "size" in pkgs["installed"][pkg]:
|
||||
insld[pkg] = pkgs["installed"][pkg]
|
||||
pkgs["installed"] = insld
|
||||
|
||||
if "pacman" in update_cmd:
|
||||
pkgs["package_mgr"] = "pacman"
|
||||
result = subprocess.run(update_cmd.split(" "), stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1].split("\n")
|
||||
for pkg in result:
|
||||
pkgs["updatable"][pkg.split(",")[1]] = {}
|
||||
pkgs["updatable"][pkg.split(",")[1]]["version"] = pkg.split(",")[2]
|
||||
pkgs["updatable"][pkg.split(",")[1]]["repository"] = pkg.split(",")[0]
|
||||
result = subprocess.run(["pacman", "-Qn"], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1].split("\n")
|
||||
for pkg in result:
|
||||
pkgs["installed"][pkg.split(" ")[0]] = {}
|
||||
pkgs["installed"][pkg.split(" ")[0]]["version"] = pkg.split(" ")[1]
|
||||
pkgs["installed"][pkg.split(" ")[0]]["repository"] = "pacman.conf"
|
||||
result = subprocess.run(["pacman", "-Qm"], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1].split("\n")
|
||||
for pkg in result:
|
||||
pkgs["installed"][pkg.split(" ")[0]] = {}
|
||||
pkgs["installed"][pkg.split(" ")[0]]["version"] = pkg.split(" ")[1]
|
||||
pkgs["installed"][pkg.split(" ")[0]]["repository"] = "user/AUR"
|
||||
result = subprocess.run(["pacman", "-Qi"], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')[:-1].split("\n\n")
|
||||
|
||||
for pkg in result:
|
||||
p = pkg.split("\n")
|
||||
for i in p:
|
||||
if "Name" in i:
|
||||
package = i.split(": ")[1]
|
||||
if "Installed Size" in i:
|
||||
size = i.split(": ")[1]
|
||||
try:
|
||||
pkgs["installed"][package]["size"] = float(size.split(" ")[0]) * _sizeMultiplier(size.split(" ")[1])
|
||||
except:
|
||||
pass
|
||||
|
||||
total_size = 0
|
||||
for pkg in pkgs["installed"]:
|
||||
try:
|
||||
total_size += pkgs["installed"][pkg]["size"]
|
||||
except:
|
||||
pass
|
||||
pkgs["total_size"] = total_size
|
||||
|
||||
return pkgs
|
||||
|
||||
if __name__ == "__main__":
|
||||
#print(_getDist())
|
||||
#print(_isProxmox())
|
||||
print(getPackages())
|
||||
print(len(getPackages()['installed']))
|
||||
128
procMon.py
Normal file
128
procMon.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import promMon as pm
|
||||
import os
|
||||
import subprocess
|
||||
from time import sleep
|
||||
|
||||
result = subprocess.run(['getconf', 'CLK_TCK'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
tikspersec = int(result)
|
||||
|
||||
def getPIDs():
|
||||
pids = []
|
||||
for i in os.listdir("/proc"):
|
||||
if i.isdigit():
|
||||
pids.append(i)
|
||||
|
||||
return pids
|
||||
|
||||
def parseStatus(pid):
|
||||
with open("/proc/"+str(pid)+"/status", "r") as f:
|
||||
status = f.read().replace("\t","").split("\n")
|
||||
status.pop(-1)
|
||||
status_dict = {}
|
||||
for item in status:
|
||||
i = item.split(":")
|
||||
try:
|
||||
k = i[1].split(" ")
|
||||
data = []
|
||||
for j in k:
|
||||
if j != "":
|
||||
data.append(j)
|
||||
status_dict[i[0]] = data[0]+" "+data[1]
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
return status_dict
|
||||
|
||||
def parseMeminfo():
|
||||
with open("/proc/meminfo", "r") as f:
|
||||
meminfo = f.read().split("\n")
|
||||
meminfo_dict = {}
|
||||
for i in meminfo:
|
||||
i = i.split(" ")
|
||||
vals = []
|
||||
for j in i:
|
||||
if j != "":
|
||||
vals.append(j.replace(":",""))
|
||||
try:
|
||||
meminfo_dict[vals[0]] = int(vals[1]) * byteMult(vals[2])
|
||||
except:
|
||||
pass
|
||||
return meminfo_dict
|
||||
|
||||
def byteMult(value):
|
||||
if value == "B":
|
||||
return 1
|
||||
elif value == "kB":
|
||||
return 1000
|
||||
elif value == "MB":
|
||||
return 1000000
|
||||
elif value == "GB":
|
||||
return 1000000000
|
||||
elif value == "TB":
|
||||
return 1000000000000
|
||||
|
||||
def exportProcesses(last_proc_stat=0):
|
||||
pids = getPIDs()
|
||||
processes = {}
|
||||
|
||||
with open("/proc/uptime", "r") as f:
|
||||
uptime = float(f.read().split(" ")[0])
|
||||
|
||||
for pid in pids:
|
||||
try:
|
||||
with open("/proc/"+pid+"/stat", "r") as f:
|
||||
stat = f.read().replace("(", "").split(") ")
|
||||
stat[1] = stat[1].replace('\n', "")
|
||||
s0 = stat[0].split(" ", 1)
|
||||
s1 = stat[1].split(" ")
|
||||
stat = s0 + s1
|
||||
|
||||
processes[pid] = {}
|
||||
processes[pid]["id"] = stat[0]
|
||||
processes[pid]["comm"] = stat[1]
|
||||
processes[pid]["virt"] = stat[22]
|
||||
processes[pid]["cpu_tiks_user"] = int(stat[13])
|
||||
processes[pid]["cpu_tiks_system"] = int(stat[14])
|
||||
processes[pid]["cpu_secs"] = (int(stat[13]) + int(stat[14])) / tikspersec
|
||||
processes[pid]["page_fault_major"] = stat[11]
|
||||
processes[pid]["page_fault_minor"] = stat[9]
|
||||
processes[pid]["starttime"] = int(stat[21]) / tikspersec
|
||||
processes[pid]["threadcnt"] = stat[19]
|
||||
processes[pid]["state"] = stat[2]
|
||||
processes[pid]["parent_pid"] = stat[3]
|
||||
processes[pid]["runtime_seconds"] = uptime - processes[pid]["starttime"]
|
||||
processes[pid]["cpu_total"] = 100 * processes[pid]["cpu_secs"] / processes[pid]["runtime_seconds"]
|
||||
processes[pid]["cpu"] = 0
|
||||
|
||||
if last_proc_stat != 0:
|
||||
try:
|
||||
cpu_secs = processes[pid]["cpu_secs"] - last_proc_stat[pid]["cpu_secs"]
|
||||
sampletime = processes[pid]["runtime_seconds"] - last_proc_stat[pid]["runtime_seconds"]
|
||||
processes[pid]["cpu"] = 100 * cpu_secs / sampletime
|
||||
except:
|
||||
pass
|
||||
|
||||
status = parseStatus(pid)
|
||||
meminfo = parseMeminfo()
|
||||
memory = status["VmData"].split(" ")
|
||||
if memory[0] == "":
|
||||
memory.pop(0)
|
||||
processes[pid]["memory"] = int(memory[0]) * byteMult(memory[1])
|
||||
processes[pid]["memory_percent"] = 100 * (int(memory[0]) * byteMult(memory[1])) / meminfo["MemTotal"]
|
||||
except Exception as e:
|
||||
# print(e)
|
||||
pass
|
||||
|
||||
return processes
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
proc = exportProcesses()
|
||||
for i in range(0,10):
|
||||
proc = exportProcesses(proc)
|
||||
for pid in proc:
|
||||
print(proc[pid])
|
||||
sleep(2)
|
||||
|
||||
58
promMon.py
Normal file
58
promMon.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# Author: Antonin Kaplan
|
||||
# Date: 2025-12-23
|
||||
#
|
||||
# Prometheus client library wrapper for easier usage
|
||||
#
|
||||
# On path through the deepest forest even the dimmest light shines on your path to enlightenment
|
||||
|
||||
from prometheus_client import start_http_server, Gauge, Counter
|
||||
|
||||
# create prometheus monitoring object
|
||||
# @param name of the monitores app for example: hw-monitor
|
||||
# @param port of the exported endpoint which can be scraped by prometheus
|
||||
# @retval None
|
||||
class prometheus:
|
||||
def __init__(self, name="promMon", port=8000):
|
||||
self.name = name
|
||||
self.port = port
|
||||
self.monitors = {}
|
||||
|
||||
start_http_server(self.port)
|
||||
|
||||
# create variable for monitoring and add it to dict
|
||||
# @param type of variable (Gauge, Counter)
|
||||
# @param name of monitored value
|
||||
# @param unit of measurement of specified value
|
||||
# @retval None
|
||||
def add_monitor(self, name, unit, tags=["instance"], type="Gauge"):
|
||||
if type == "Gauge":
|
||||
self.monitors[name] = Gauge(self.name+"_"+name, unit, tags)
|
||||
elif type == "Counter":
|
||||
self.monitors[name] = Counter(self.name+"_"+name, unit, tags)
|
||||
else:
|
||||
pass
|
||||
|
||||
# delete all tags from monitoring variable (variable stays) useful when storing data in tags like SMART or processes
|
||||
# @param name of monitored value
|
||||
# retval none
|
||||
def delete_monitor(self, name):
|
||||
self.monitors[name].clear()
|
||||
|
||||
# monitor function for updating monitored values
|
||||
# @param name of monitored value
|
||||
# @param instance identificator eg. IP address
|
||||
# @param value of monitored variable
|
||||
# @retval None
|
||||
def monitor(self, name, tags, value):
|
||||
self.monitors[name].labels(*tags).set(value)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from time import sleep
|
||||
|
||||
pm = prometheus(port=9339, name="test")
|
||||
pm.add_monitor("time", "s", tags=["ip"])
|
||||
time = 0
|
||||
while True:
|
||||
pm.monitor("time", ("localhost",), time)
|
||||
time += 1
|
||||
sleep(1)
|
||||
209
ryzenPower.py
Normal file
209
ryzenPower.py
Normal file
@@ -0,0 +1,209 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# ryzen-power: measure AMD Ryzen CPU power consumption.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
# This program is a Python port of rapl-read-ryzen
|
||||
# https://github.com/djselbeck/rapl-read-ryzen
|
||||
|
||||
import logging
|
||||
import os.path
|
||||
import argparse
|
||||
from itertools import count
|
||||
from struct import unpack
|
||||
from time import sleep
|
||||
from warnings import warn
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger("ryzen-master")
|
||||
|
||||
|
||||
class RyzenPower:
|
||||
AMD_MSR_PWR_UNIT_OFFSET = 0xC0010299
|
||||
AMD_MSR_CORE_ENERGY_OFFSET = 0xC001029A
|
||||
AMD_MSR_PACKAGE_ENERGY_OFFSET = 0xC001029B
|
||||
AMD_TIME_UNIT_MASK = 0xF0000
|
||||
AMD_ENERGY_UNIT_MASK = 0x1F00
|
||||
AMD_POWER_UNIT_MASK = 0xF
|
||||
|
||||
def __init__(self, duration=1.0):
|
||||
self._energy_unit = self._get_energy_units()
|
||||
self._is_smt = self._detect_smt()
|
||||
self._package_topology = self._detect_physical_package_topology()
|
||||
self._duration = duration
|
||||
self._cores = list(self._package_topology.keys())
|
||||
if self._is_smt:
|
||||
self._cores = [c for c in self._cores if c % 2 == 0]
|
||||
self._cores = sorted(self._cores)
|
||||
self._msr_fd_cache = {}
|
||||
self.timestamp = 0
|
||||
self.package_energy = -1
|
||||
self.core_energy = -1
|
||||
|
||||
@staticmethod
|
||||
def _read(filename):
|
||||
with open(filename, "r") as f:
|
||||
return f.read()
|
||||
|
||||
def _detect_smt(self):
|
||||
try:
|
||||
smt_status = self._read("/sys/devices/system/cpu/smt/control").strip()
|
||||
logger.debug("CPU smt status is {}".format(smt_status))
|
||||
return smt_status == "on"
|
||||
except FileNotFoundError:
|
||||
warn("unable to detect CPU SMT status, assume SMT is on")
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _detect_physical_package_topology():
|
||||
cpu_package_mapping = {}
|
||||
for cpu_id in count():
|
||||
filename = "/sys/devices/system/cpu/cpu{}/topology/physical_package_id".format(cpu_id)
|
||||
if os.path.isfile(filename):
|
||||
with open(filename, "r") as f:
|
||||
package_id = int(f.read())
|
||||
logger.debug("detected cpu {} in socket {}".format(cpu_id, package_id))
|
||||
cpu_package_mapping[cpu_id] = package_id
|
||||
else:
|
||||
return cpu_package_mapping
|
||||
|
||||
def _read_msr(self, cpu_id, offset):
|
||||
msr_file = "/dev/cpu/{}/msr".format(cpu_id)
|
||||
try:
|
||||
with open(msr_file, "rb", buffering=8192) as f:
|
||||
f.seek(offset)
|
||||
# MSR value is always 64 bits
|
||||
# https://manpages.debian.org/buster/manpages/msr.4.en.html
|
||||
return self._decode_int64(f.read(8))
|
||||
except PermissionError:
|
||||
raise PermissionError("root privilege is required to read model-specific registers")
|
||||
except FileNotFoundError:
|
||||
raise FileNotFoundError("msr driver is not loaded, try \"sudo modprobe msr\" to load msr module")
|
||||
|
||||
@staticmethod
|
||||
def _decode_int64(buffer):
|
||||
return unpack("q", buffer)[0]
|
||||
|
||||
def _read_all_units(self):
|
||||
return self._read_msr(0, self.AMD_MSR_PWR_UNIT_OFFSET)
|
||||
|
||||
def _get_energy_units(self):
|
||||
energy_unit = (self._read_all_units() & self.AMD_ENERGY_UNIT_MASK) >> 8
|
||||
logger.debug("CPU energy unit is 1/2^{}".format(energy_unit))
|
||||
energy_unit = 0.5 ** energy_unit
|
||||
return energy_unit
|
||||
|
||||
def _read_package_energy(self, cpu_id):
|
||||
energy = self._read_msr(cpu_id, self.AMD_MSR_PACKAGE_ENERGY_OFFSET)
|
||||
logger.debug("CPU {} current package energy {} J".format(cpu_id, energy, self._energy_unit))
|
||||
return energy
|
||||
|
||||
def _read_core_energy(self, cpu_id):
|
||||
energy = self._read_msr(cpu_id, self.AMD_MSR_CORE_ENERGY_OFFSET)
|
||||
logger.debug("CPU {} current core energy {} * {} J".format(cpu_id, energy, self._energy_unit))
|
||||
return energy
|
||||
|
||||
def _calc_power(self, before, after):
|
||||
return (after - before) * self._energy_unit / self._duration
|
||||
|
||||
def _calc_power_wtime(self, before, after, duration):
|
||||
return (after - before) * self._energy_unit / duration
|
||||
|
||||
def measure(self):
|
||||
package_energy_before = {c: self._read_package_energy(c) for c in self._cores}
|
||||
core_energy_before = {c: self._read_core_energy(c) for c in self._cores}
|
||||
logger.debug("sleep for {} seconds".format(self._duration))
|
||||
sleep(self._duration)
|
||||
package_energy_after = {c: self._read_package_energy(c) for c in self._cores}
|
||||
core_energy_after = {c: self._read_core_energy(c) for c in self._cores}
|
||||
package_power = {c: self._calc_power(package_energy_before[c], package_energy_after[c]) for c in self._cores}
|
||||
core_power = {c: self._calc_power(core_energy_before[c], core_energy_after[c]) for c in self._cores}
|
||||
return package_power , core_power
|
||||
#print(self._format_result(package_power, core_power))
|
||||
|
||||
def measure_nonblocking(self):
|
||||
timestamp = datetime.now()
|
||||
package_energy = {c: self._read_package_energy(c) for c in self._cores}
|
||||
core_energy = {c: self._read_core_energy(c) for c in self._cores}
|
||||
|
||||
if self.package_energy != -1:
|
||||
time_delta = (timestamp - self.timestamp).total_seconds()
|
||||
package_power = {c: self._calc_power_wtime(self.package_energy[c], package_energy[c], time_delta) for c in self._cores}
|
||||
core_power = {c: self._calc_power_wtime(self.core_energy[c], core_energy[c], time_delta) for c in self._cores}
|
||||
else:
|
||||
for c in self._cores:
|
||||
package_power = {c: 0 for c in self._cores}
|
||||
core_power = {c: 0 for c in self._cores}
|
||||
|
||||
self.package_energy = package_energy
|
||||
self.core_energy = core_energy
|
||||
self.timestamp = timestamp
|
||||
|
||||
avg_pp = 0
|
||||
for c in self._cores:
|
||||
avg_pp += package_power[0]
|
||||
package_power = avg_pp / len(self._cores)
|
||||
|
||||
return package_power, core_power
|
||||
|
||||
@staticmethod
|
||||
def _format_table(table, widths, units):
|
||||
buffer = []
|
||||
for row in table:
|
||||
row_buffer = []
|
||||
for col, width, unit in zip(row, widths, units):
|
||||
if isinstance(col, float):
|
||||
row_buffer.append("{:.2f}{}".format(col, unit).ljust(width))
|
||||
else:
|
||||
row_buffer.append(str(col).ljust(width))
|
||||
buffer.append("".join(row_buffer))
|
||||
return "\n".join(buffer)
|
||||
|
||||
def _format_result(self, package_power, core_power):
|
||||
sockets = sorted(set(self._package_topology.values()))
|
||||
table = [["", "Cores Power", "Package Power"]]
|
||||
for socket in sockets:
|
||||
socket_total_cores_power = 0
|
||||
socket_package_power = 0
|
||||
socket_power_entry = ["SOCKET {: 2}:".format(socket)]
|
||||
table.append(socket_power_entry)
|
||||
for core in self._cores:
|
||||
if self._package_topology[core] == socket:
|
||||
socket_total_cores_power += core_power[core]
|
||||
socket_package_power = package_power[core]
|
||||
table.append([
|
||||
" CORE {: 2}:".format(core // 2 if self._is_smt else core),
|
||||
core_power[core],
|
||||
""
|
||||
])
|
||||
socket_power_entry.append(socket_total_cores_power)
|
||||
socket_power_entry.append(socket_package_power)
|
||||
return self._format_table(table, (16, 16, 16), ("", "W", "W"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Measure power consumption for AMD Ryzen CPU')
|
||||
parser.add_argument("--debug", action='store_true', help="show debug messages")
|
||||
parser.add_argument("-d", "--duration", type=float, default=0.5,
|
||||
help="the duration of measurement in seconds, default is 0.5 second")
|
||||
args = parser.parse_args()
|
||||
if args.debug:
|
||||
stream_handler = logging.StreamHandler()
|
||||
logger.addHandler(stream_handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
package, core = RyzenPower(args.duration).measure()
|
||||
print(RyzenPower(args.duration)._format_result(package, core))
|
||||
65
sensors.py
Normal file
65
sensors.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
def getSensors():
|
||||
result = subprocess.run(['sensors', '-j', '-A'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
devices = json.loads(result)
|
||||
|
||||
return devices
|
||||
|
||||
def getTemps():
|
||||
devices = getSensors()
|
||||
|
||||
temps = {}
|
||||
for device in devices:
|
||||
if "nct" in device:
|
||||
continue
|
||||
temps[device] = {}
|
||||
for sensor in devices[device]:
|
||||
for data in devices[device][sensor]:
|
||||
if "temp" in data:
|
||||
if "input" in data:
|
||||
temps[device][sensor] = devices[device][sensor][data]
|
||||
return temps
|
||||
|
||||
def getVoltages():
|
||||
devices = getSensors()
|
||||
|
||||
voltages = {}
|
||||
for device in devices:
|
||||
if "nct" not in device:
|
||||
continue
|
||||
|
||||
voltages[device] = {}
|
||||
for sensor in devices[device]:
|
||||
if "in" in sensor:
|
||||
for data in devices[device][sensor]:
|
||||
if "input" in data:
|
||||
voltages[device][sensor] = devices[device][sensor][data]
|
||||
return voltages
|
||||
|
||||
def getFans():
|
||||
devices = getSensors()
|
||||
|
||||
fans = {}
|
||||
for device in devices:
|
||||
if "nct" not in device:
|
||||
continue
|
||||
|
||||
fans[device] = {}
|
||||
for sensor in devices[device]:
|
||||
if "fan" in sensor:
|
||||
for data in devices[device][sensor]:
|
||||
if "input" in data:
|
||||
fans[device][sensor] = devices[device][sensor][data]
|
||||
return fans
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(getTemps())
|
||||
print("")
|
||||
print(getVoltages())
|
||||
print("")
|
||||
print(getFans())
|
||||
244
sysinfo.py
Normal file
244
sysinfo.py
Normal file
@@ -0,0 +1,244 @@
|
||||
import psutil
|
||||
import platform
|
||||
from datetime import datetime
|
||||
import subprocess
|
||||
import json
|
||||
import socket
|
||||
|
||||
def getBoard():
|
||||
result = subprocess.run(['dmidecode', '-t', '1'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8').replace("\t","").split("\n")
|
||||
|
||||
for item in result:
|
||||
if "Product Name" in item:
|
||||
return item.split(":")[1][1:]
|
||||
return ""
|
||||
|
||||
def getSysInfo():
|
||||
uname = platform.uname()
|
||||
|
||||
info = {}
|
||||
info["hostname"] = uname.node
|
||||
info["kernel"] = uname.release
|
||||
info["version"] = uname.version
|
||||
info["board"] = getBoard()
|
||||
|
||||
return info
|
||||
|
||||
def getDisk():
|
||||
disk_io = psutil.disk_io_counters(perdisk=True)
|
||||
disk_io_dict = {}
|
||||
|
||||
for disk in disk_io:
|
||||
read_data = False
|
||||
if "nvme" in disk:
|
||||
if "p" not in disk:
|
||||
read_data = True
|
||||
|
||||
if not any(char.isdigit() for char in disk):
|
||||
read_data = True
|
||||
|
||||
if read_data:
|
||||
data = {}
|
||||
data["read"] = disk_io[disk].read_bytes
|
||||
data["write"] = disk_io[disk].write_bytes
|
||||
data["io_read"] = disk_io[disk].read_count
|
||||
data["io_write"] = disk_io[disk].write_count
|
||||
data["io_read_time"] = disk_io[disk].read_time
|
||||
data["io_write_time"] = disk_io[disk].write_time
|
||||
data["io_read_merged"] = disk_io[disk].read_merged_count
|
||||
data["io_write_merged"] = disk_io[disk].write_merged_count
|
||||
data["busy"] = disk_io[disk].busy_time
|
||||
disk_io_dict[disk] = data
|
||||
|
||||
return disk_io_dict
|
||||
|
||||
def getCPU():
|
||||
cpu_dict = {}
|
||||
cpu_dict["time_percent"] = {}
|
||||
cpu_dict["frequency"] = {}
|
||||
|
||||
cpu_time = psutil.cpu_times_percent()
|
||||
freq = psutil.cpu_freq(percpu=True)
|
||||
|
||||
core_index = 0
|
||||
for cpu in freq:
|
||||
cpu_dict["frequency"][core_index] = cpu.current
|
||||
core_index += 1
|
||||
cpu_dict["usage"] = psutil.cpu_percent()
|
||||
cpu_dict["cpu_count"] = psutil.cpu_count()
|
||||
cpu_dict["time_percent"]["user"] = cpu_time.user
|
||||
cpu_dict["time_percent"]["nice"] = cpu_time.nice
|
||||
cpu_dict["time_percent"]["system"] = cpu_time.system
|
||||
cpu_dict["time_percent"]["idle"] = cpu_time.idle
|
||||
cpu_dict["time_percent"]["iowait"] = cpu_time.iowait
|
||||
cpu_dict["time_percent"]["irq"] = cpu_time.irq
|
||||
cpu_dict["time_percent"]["softirq"] = cpu_time.softirq
|
||||
cpu_dict["time_percent"]["steal"] = cpu_time.steal
|
||||
cpu_dict["time_percent"]["guest"] = cpu_time.guest
|
||||
cpu_dict["time_percent"]["guest_nice"] = cpu_time.guest_nice
|
||||
|
||||
return cpu_dict
|
||||
|
||||
def getMemory():
|
||||
mem_dict = {}
|
||||
|
||||
mem = psutil.virtual_memory()
|
||||
swap = psutil.swap_memory()
|
||||
|
||||
mem_dict["total"] = mem.total
|
||||
mem_dict["available"] = mem.available
|
||||
mem_dict["percent"] = mem.percent
|
||||
mem_dict["used"] = mem.used
|
||||
mem_dict["free"] = mem.free
|
||||
mem_dict["active"] = mem.active
|
||||
mem_dict["inactive"] = mem.inactive
|
||||
mem_dict["buffers"] = mem.buffers
|
||||
mem_dict["cached"] = mem.cached
|
||||
mem_dict["shared"] = mem.shared
|
||||
mem_dict["slab"] = mem.slab
|
||||
mem_dict["swap_total"] = swap.total
|
||||
mem_dict["swap_used"] = swap.used
|
||||
mem_dict["swap_free"] = swap.free
|
||||
mem_dict["swap_percent"] = swap.percent
|
||||
mem_dict["swap_in"] = swap.sin
|
||||
mem_dict["swap_out"] = swap.sout
|
||||
|
||||
return mem_dict
|
||||
|
||||
def getPartitions():
|
||||
part_dict = {}
|
||||
partitions = psutil.disk_partitions()
|
||||
|
||||
for part in partitions:
|
||||
name = part.device.split('/')[-1]
|
||||
if "loop" not in name:
|
||||
part_dict[name] = {}
|
||||
|
||||
part_dict[name]["size"] = psutil.disk_usage(part.mountpoint).total
|
||||
part_dict[name]["used"] = psutil.disk_usage(part.mountpoint).used
|
||||
part_dict[name]["free"] = psutil.disk_usage(part.mountpoint).free
|
||||
|
||||
return part_dict
|
||||
|
||||
def getZFS():
|
||||
zfs_dict = {}
|
||||
|
||||
result = subprocess.run(['zpool', 'list', '-jHp'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
zfs = json.loads(result)
|
||||
|
||||
for pool in zfs["pools"]:
|
||||
zfs_dict[pool] = {}
|
||||
match zfs["pools"][pool]["state"]:
|
||||
case "ONLINE":
|
||||
zfs_dict[pool]["state"] = 2
|
||||
case "DEGRADED":
|
||||
zfs_dict[pool]["state"] = 1
|
||||
case _:
|
||||
zfs_dict[pool]["state"] = 0
|
||||
zfs_dict[pool]["size"] = int(zfs["pools"][pool]["properties"]["size"]["value"])
|
||||
zfs_dict[pool]["used"] = int(zfs["pools"][pool]["properties"]["allocated"]["value"])
|
||||
zfs_dict[pool]["free"] = int(zfs["pools"][pool]["properties"]["free"]["value"])
|
||||
zfs_dict[pool]["fragmentation"] = int(zfs["pools"][pool]["properties"]["fragmentation"]["value"])
|
||||
zfs_dict[pool]["dedup"] = float(zfs["pools"][pool]["properties"]["dedupratio"]["value"])
|
||||
|
||||
return zfs_dict
|
||||
|
||||
def getUptime():
|
||||
boot = psutil.boot_time()
|
||||
uptime = datetime.now().timestamp() - boot
|
||||
|
||||
return uptime
|
||||
|
||||
def getNetwork():
|
||||
net_dict = {}
|
||||
net = psutil.net_io_counters(pernic=True)
|
||||
|
||||
for nic in net:
|
||||
if "fw" not in nic:
|
||||
if "lo" not in nic:
|
||||
if "br" not in nic:
|
||||
net_dict[nic] = {}
|
||||
net_dict[nic]["rx"] = net[nic].bytes_recv
|
||||
net_dict[nic]["tx"] = net[nic].bytes_sent
|
||||
net_dict[nic]["err_rx"] = net[nic].errin
|
||||
net_dict[nic]["err_tx"] = net[nic].errout
|
||||
net_dict[nic]["drop_rx"] = net[nic].dropin
|
||||
net_dict[nic]["drop_tx"] = net[nic].dropout
|
||||
net_dict[nic]["packet_tx"] = net[nic].packets_sent
|
||||
net_dict[nic]["packet_rx"] = net[nic].packets_recv
|
||||
|
||||
return net_dict
|
||||
|
||||
def getIP():
|
||||
addresses = psutil.net_if_addrs()
|
||||
|
||||
addr = {}
|
||||
|
||||
for interface in addresses:
|
||||
for type in addresses[interface]:
|
||||
if type.family == socket.AF_INET:
|
||||
addr[interface] = type.address
|
||||
|
||||
return addr
|
||||
|
||||
|
||||
def users():
|
||||
result = subprocess.run(['w'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8').split('\n')
|
||||
|
||||
if "FROM" not in result[1]:
|
||||
result = subprocess.run(['w', '-f'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8').split('\n')
|
||||
|
||||
result.pop(-1)
|
||||
|
||||
header = []
|
||||
users = {}
|
||||
|
||||
if len(result) > 2:
|
||||
for i in range(1,len(result)):
|
||||
a = result[i].split(" ")
|
||||
user = []
|
||||
for item in a:
|
||||
if item != "":
|
||||
if i == 1:
|
||||
header.append(item)
|
||||
else:
|
||||
user.append(item)
|
||||
active_user = ""
|
||||
for item in range(0,len(header)):
|
||||
try:
|
||||
if "USER" in header[item]:
|
||||
if user[item] in users:
|
||||
users[user[item]]["sessions"] = users[user[item]]["sessions"] + 1
|
||||
else:
|
||||
users[user[item]] = {}
|
||||
users[user[item]]["sessions"] = 1
|
||||
users[user[item]]["session"] = {}
|
||||
active_user = user[item]
|
||||
if "FROM" in header[item]:
|
||||
users[active_user]["session"][users[active_user]["sessions"]] = {}
|
||||
if user[item].count(".") == 3:
|
||||
users[active_user]["session"][users[active_user]["sessions"]]["from"] = user[item]
|
||||
else:
|
||||
if user[item] == "-":
|
||||
users[active_user]["session"][users[active_user]["sessions"]]["from"] = user[item]
|
||||
else:
|
||||
users[active_user]["session"][users[active_user]["sessions"]]["from"] = user[item-1]
|
||||
except:
|
||||
pass
|
||||
|
||||
return users
|
||||
|
||||
if __name__ == "__main__":
|
||||
# print(getSysInfo())
|
||||
print(getDisk())
|
||||
# print(users())
|
||||
# print(getCPU())
|
||||
# print(getMemory())
|
||||
# print(getZFS())
|
||||
print(getPartitions())
|
||||
# print(getUptime())
|
||||
# print(getNetwork())
|
||||
27
temps.py
Normal file
27
temps.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
def getSensors():
|
||||
result = subprocess.run(['sensors', '-j', '-A'], stdout=subprocess.PIPE)
|
||||
result = result.stdout.decode('utf-8')
|
||||
|
||||
print(result)
|
||||
devices = json.loads(result)
|
||||
|
||||
return devices
|
||||
|
||||
def getTemps():
|
||||
devices = getSensors()
|
||||
|
||||
temps = {}
|
||||
for device in devices:
|
||||
temps[device] = {}
|
||||
for sensor in devices[device]:
|
||||
for data in devices[device][sensor]:
|
||||
if "temp" in data:
|
||||
if "input" in data:
|
||||
temps[device][sensor] = devices[device][sensor][data]
|
||||
return temps
|
||||
|
||||
if __name__ == "__main__":
|
||||
getTemps()
|
||||
Reference in New Issue
Block a user