You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I've been trying to get l0 sysman examples to work with the B580 and zesDeviceGetProperties always returns 0x78000001 when trying to get info on the B580.
I'm running Ubuntu 24.10 with the kobuk PPA. I've also tried using the Ubuntu 24.10 kernel with Jammy and Noble containers and the latest compute stack from repositories.intel.com... I can't get sysman to work :(
I'm using the following code, taken from various ze examples.
#include<iomanip>
#include<iostream>
#include<vector>
#ifdef __linux__
#include<unistd.h>
#include<sys/types.h>
#endif
#include<level_zero/ze_api.h>
#include<level_zero/zes_api.h>
#include"pti_assert.h"
#include"utils.h"
#include"ze_utils.h"
#defineBYTES_IN_MB (1024 * 1024)
intmain()
{
utils::SetEnv("ZES_ENABLE_SYSMAN", "1");
ze_result_t status = ZE_RESULT_SUCCESS;
zeInit(0);
// Get number of Level Zero driversuint32_t driver_count = 0;
zeDriverGet(&driver_count, nullptr);
// Get list of drivers
std::vector<ze_driver_handle_t> driver_list(driver_count);
zeDriverGet(&driver_count, driver_list.data());
// For each driver, get the list of supported devicesfor (uint32_t i = 0; i < driver_count; ++i)
{
// Get number of devices for the driveruint32_t device_count = 0;
zeDeviceGet(driver_list[i], &device_count, nullptr);
// Get list of devices
std::vector<ze_device_handle_t> device_list(device_count);
zeDeviceGet(driver_list[i], &device_count, device_list.data());
// For each device in the list, check if it's a GPUfor (uint32_t j = 0; j < device_count; ++j)
{
ze_device_handle_t device = device_list[j];
ze_device_properties_t device_properties{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
zeDeviceGetProperties(device, &device_properties);
if (device_properties.type == ZE_DEVICE_TYPE_GPU)
{
// Sysman Device Properties
{
zes_device_properties_t device_props{
ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES,
};
status = zesDeviceGetProperties(device, &device_props);
if (status == ZE_RESULT_SUCCESS)
{
std::cout << "Device: " << device_props.core.name << std::endl;
std::cout << "-- Subdevice Count: " << device_props.numSubdevices << std::endl;
std::cout << "-- Driver Version: " << device_props.driverVersion << std::endl;
}
else
{
std::cout << "Device unknown: " << j << std::endl;
}
}
// Sysman PCI Properties
{
zes_pci_properties_t pci_props{
ZES_STRUCTURE_TYPE_PCI_PROPERTIES,
};
status = zesDevicePciGetProperties(device, &pci_props);
if (status == ZE_RESULT_SUCCESS)
{
std::cout << "-- PCI Bus: " << std::hex << std::setfill('0') << std::setw(4) << pci_props.address.domain << ":" << std::setw(2) << pci_props.address.bus << ":" << std::setw(2) << pci_props.address.device << "." << std::setw(1) << pci_props.address.function << std::dec << std::setfill('') << std::endl;
}
}
// Sysman Memory Properties
{
uint32_t module_count = 0;
status = zesDeviceEnumMemoryModules(device, &module_count, nullptr);
if (status == ZE_RESULT_SUCCESS)
{
if (module_count > 0)
{
std::cout << "-- Memory Modules: " << module_count << std::endl;
std::vector<zes_mem_handle_t> module_list(module_count);
status = zesDeviceEnumMemoryModules(
device, &module_count, module_list.data());
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
for (uint32_t i = 0; i < module_count; ++i)
{
zes_mem_properties_t memory_props{
ZES_STRUCTURE_TYPE_MEM_PROPERTIES,
};
status = zesMemoryGetProperties(module_list[i], &memory_props);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
std::cout << "---- [" << i << "] Module Capacity (MB): " << memory_props.physicalSize / BYTES_IN_MB << std::endl;
}
}
}
}
// Core Frequency
{
uint32_t domain_count = 0;
status = zesDeviceEnumFrequencyDomains(device, &domain_count, nullptr);
if (status == ZE_RESULT_SUCCESS)
{
if (domain_count > 0)
{
std::cout << "-- Frequency Domains: " << domain_count << std::endl;
std::vector<zes_freq_handle_t> domain_list(domain_count);
status = zesDeviceEnumFrequencyDomains(
device, &domain_count, domain_list.data());
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
for (uint32_t i = 0; i < domain_count; ++i)
{
zes_freq_properties_t domain_props{
ZES_STRUCTURE_TYPE_FREQ_PROPERTIES,
};
status = zesFrequencyGetProperties(domain_list[i], &domain_props);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
std::cout << "---- [" << i << "] Clock EU Freq Range (MHz): " << domain_props.min << " - " << domain_props.max << (domain_props.canControl ? " (changeable)" : " (unchangeable)") << std::endl;
zes_freq_state_t state{
ZES_STRUCTURE_TYPE_FREQ_STATE,
};
status = zesFrequencyGetState(domain_list[i], &state);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
std::cout << "---- [" << i << "] Current Clock EU Freq (MHz): " << state.actual << std::endl;
}
}
}
}
// Core Temperature
{
uint32_t sensor_count = 0;
status = zesDeviceEnumTemperatureSensors(
device, &sensor_count, nullptr);
if (status == ZE_RESULT_SUCCESS && sensor_count > 0)
{
#ifdef __linux__
if (geteuid() != 0)
{
std::cout << "Need to be root to see temperature" << std::endl;
return0;
}
#endif
std::cout << "-- Temperature Sensors: " << sensor_count << std::endl;
std::vector<zes_temp_handle_t> sensor_list(sensor_count);
status = zesDeviceEnumTemperatureSensors(
device, &sensor_count, sensor_list.data());
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
for (uint32_t i = 0; i < sensor_count; ++i)
{
zes_temp_properties_t temp_props{
ZES_STRUCTURE_TYPE_TEMP_PROPERTIES,
};
status = zesTemperatureGetProperties(sensor_list[i], &temp_props);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
if (temp_props.type == ZES_TEMP_SENSORS_GPU)
{
double temperature = 0.0f;
status = zesTemperatureGetState(sensor_list[i], &temperature);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
std::cout << "---- [" << i << "] Core Temperature (C): " << temperature << std::endl;
}
}
}
}
}
}
}
return0;
}
Compiling after putting above in monitor.cpp and adding the missing header files from the compute-runtime repo:
g++ -o monitor monitor.cpp -lze_loader
./monitor
Output:
./monitor
Device unknown: 0
Device: Intel(R) UHD Graphics 770
-- Subdevice Count: 0
-- Driver Version: 7209A40C3CFCD5142354A9F
-- PCI Bus: 0000:00:02.0
-- Frequency Domains: 1
---- [0] Clock EU Freq Range (MHz): 300 - 1650 (changeable)
---- [0] Current Clock EU Freq (MHz): 700
sycl-ls (when I have a full oneAPI installation) and clinfo both show the GPU, so looks like maybe just a sysman issue?
clinfo | grep "Device Name"
Device Name Intel(R) Graphics [0xe20b]
Device Name Intel(R) UHD Graphics 770
Device Name Intel(R) Graphics [0xe20b]
Device Name Intel(R) Graphics [0xe20b]
Device Name Intel(R) Graphics [0xe20b]
I'd appreciate any pointers -- I'm just trying to monitor GPU usage while running some AI/ML workloads; intel_gpu_top will show me the UHD 770 info (i915), but I'm hoping to track utilization of the B580 as well.
I can run workloads on the GPU (running ipex-llm w/ nueralchat with pretty good results) but can't figure out a way to monitor the GPU usage.
Any hints?
Thanks,
James
The text was updated successfully, but these errors were encountered:
In your source, you seems to be doing sysman initialization with : zeInit with ZES_ENABLE_SYSMAN environment variable
As B580 product is newer than PVC, so to initialize sysman we would recommend to use sysman initialization with the help of
zesInit + zesDriverGet + zesDeviceGet.
I've been trying to get l0 sysman examples to work with the B580 and zesDeviceGetProperties always returns 0x78000001 when trying to get info on the B580.
I'm running Ubuntu 24.10 with the kobuk PPA. I've also tried using the Ubuntu 24.10 kernel with Jammy and Noble containers and the latest compute stack from repositories.intel.com... I can't get sysman to work :(
I'm using the following code, taken from various ze examples.
Compiling after putting above in monitor.cpp and adding the missing header files from the compute-runtime repo:
Output:
sycl-ls (when I have a full oneAPI installation) and clinfo both show the GPU, so looks like maybe just a sysman issue?
I'd appreciate any pointers -- I'm just trying to monitor GPU usage while running some AI/ML workloads; intel_gpu_top will show me the UHD 770 info (i915), but I'm hoping to track utilization of the B580 as well.
I can run workloads on the GPU (running ipex-llm w/ nueralchat with pretty good results) but can't figure out a way to monitor the GPU usage.
Any hints?
Thanks,
James
The text was updated successfully, but these errors were encountered: