"""This module contains functions that are related to perf stat metrics"""
import re
import logging
import yaml
try:
import importlib.resources as pkg_resources
except ImportError:
import importlib_resources as pkg_resources
from perfmon import perfevents
from perfmon.common.processor import get_cpu_spec
from perfmon.common.utils.execute_cmd import execute_cmd
from perfmon.common.utils.parsing import get_parser
from perfmon.exceptions import PerfEventListNotFoundError
_log = logging.getLogger(__name__)
# pylint: disable=E0401,W0201,C0301
[docs]def get_mem_bw_event():
"""
This function returns the perf event to get memory bandwidth
Returns:
str: A string to get memory bandwidth for perf stat command
"""
# Get vendor name and micro architecture
vendor_name, micro_architecture = get_cpu_spec()
# Get perf event
perf_event = llc_cache_miss_perf_event(vendor_name, micro_architecture)
return perf_event
[docs]def llc_cache_miss_perf_event(processor_vendor, micro_architecture):
"""
This function gives the event code and umask for LLC cache miss event for different
architectures
Args:
processor_vendor (str): Vendor of the processor
micro_architecture (str): Name of the micro architecture of the processor
Returns:
str: String containing event code and umask
Raises:
ProcessorVendorNotFoundError: An error occurred while looking for processor vendor.
"""
# Here we will measure the "read" bandwidth from LLC to DRAM using offcore requests. They
# measure core originated requests. Thus they cannot provide any "write" requests
# More info can be found on this SOF thread: https://stackoverflow.com/questions/47612854/
# can-the-intel-performance-monitor-counters-be-used-to-measure-memory-bandwidth/47816066
# There is nice piece of work from these guys: https://www.readex.eu/wp-content/uploads/2017/06/
# ICPE2017_authors_version.pdf
# There are two events OFFCORE_RESPONSE_0 and OFFCORE_RESPONSE_1. We can use them to measure
# two events simultaneoulsy. Here we measure L3_MISS_LOCAL_DRAM with OFFCORE_RESPONSE_0 and
# L3_MISS_REMOTE_DRAM with OFFCORE_RESPONSE_1
# These events are obtained using libpmf4 for different Intel micro architectures.
# Still need some research for AMD processors.
# Another method is to look at uncore cas_count_read/write events. Typically, they need root
# access to measure them.
# I will use the OFFCORE_RESPONSES for the time being and I need to stop digging into these
# rabbit hole details.
if processor_vendor == 'GenuineIntel':
# These are for OFFCORE responses
if micro_architecture in ['broadwell']:
llc_miss_event = (
'cpu/config=0x5301b7,config1=0x3f84008fff,name=LLC_MISS_LOCAL/u,'
'cpu/config=0x5301bb,config1=0x3fb8008fff,name=LLC_MISS_REMOTE/u'
)
elif micro_architecture in ['skylake', 'skylake_avx512']:
llc_miss_event = (
'cpu/config=0x5301b7,config1=0x3f840085b7,name=LLC_MISS_LOCAL/u,'
'cpu/config=0x5301bb,config1=0x3f900085b7,name=LLC_MISS_REMOTE/u'
)
elif micro_architecture in ['haswell']:
llc_miss_event = (
'cpu/config=0x5301b7,config1=0x3f80408fff,name=LLC_MISS_LOCAL/u,'
'cpu/config=0x5301bb,config1=0x3fb8008fff,name=LLC_MISS_REMOTE/u'
)
elif micro_architecture in ['sandybridge']:
llc_miss_event = (
'cpu/config=0x5301b7,config1=0x3f80408fff,name=LLC_MISS_LOCAL/u,'
'cpu/config=0x5301bb,config1=0x3fff808fff,name=LLC_MISS_REMOTE/u'
)
elif micro_architecture in ['westmere']:
llc_miss_event = (
'cpu/config=0x5301b7,config1=0x10ff,name=LLC_MISS_LOCAL/u,'
'cpu/config=0x5301bb,config1=0x20ff,name=LLC_MISS_REMOTE/u'
)
else:
_log.warning('LLC miss event not found')
llc_miss_event = ''
# if micro_architecture in ['Nehalem', 'Westmere']:
# llc_miss_event = 'cpu/event=0x09,umask=0x03,name=LLC-load-misses/u'
# elif micro_architecture in ['IvyBridge', 'Haswell', 'Broadwell', 'SkyLake']:
# llc_miss_event = 'cpu/event=0xD1,umask=0x20,name=LLC-load-misses/u'
# else:
# llc_miss_event = 'LLC-load-misses'
elif processor_vendor == 'AuthenticAMD':
# if micro_architecture in ['Zen2', 'Zen+', 'Zen']:
# llc_miss_event = 'cpu/event=0x06,umask=0x01,name=LLC-load-misses/u'
# elif micro_architecture in ['Zen3']:
# llc_miss_event = 'cpu/event=0x04,umask=0x01,name=LLC-load-misses/u'
# else:
# llc_miss_event = ''
# On AMD we dont have OFFCORE_RESPONSE events and available events are uncore and they
# need root access
llc_miss_event = ''
_log.warning('LLC miss event not implemented for AMD chips')
else:
llc_miss_event = ''
_log.warning('LLC miss event not found for %s processor' % processor_vendor)
return llc_miss_event
[docs]def get_working_perf_events():
"""
This function checks the micro architecture type and returns available perf events. Raises an
exception if micro architecture is not implemented
Returns:
dict: Perf events with event name
dict: Derived perf metrics from event counters
Raises:
PerfEventsNotFoundError: An error occurred while looking for perf events
"""
# Get vendor name and micro architecture
_, micro_architecture = get_cpu_spec()
try:
# Get perf events
perf_events = perf_event_list(micro_architecture)
# Check if all events are gettable
perf_events = check_perf_events(perf_events)
# Get the derived perf events
perf_events, derived_perf_metrics = derived_perf_event_list(perf_events)
except PerfEventListNotFoundError:
perf_events = {}
derived_perf_metrics = {}
_log.error('Perf event list not found')
return perf_events, derived_perf_metrics
[docs]def check_perf_events(perf_events):
"""
This function check if all perf groups are actually working. We will only probe the working
counters during monitoring
Args:
perf_events (dict): A dict of found perf events
Returns:
dict: A dict of working perf events
"""
for group, event_list in perf_events.items():
if group not in ['hardware_events', 'software_events']:
final_event_list = event_list.copy()
cmd_str = 'perf stat -e {} sleep 0.1'.format(
','.join(event_list['events'])
)
cmd_out = execute_cmd(cmd_str)
parse_perf_out = get_parser(cmd_out, reg='perf')
for event_code in event_list['events']:
event_name = re.match(r'(.*),name=(.*)/u', event_code).group(2)
try:
_ = int(parse_perf_out(event_name).rstrip().replace(',', ''))
except ValueError:
final_event_list['events'].pop(event_code)
perf_events[group] = final_event_list
return perf_events
[docs]def perf_event_list(micro_architecture):
"""
This function returns list of perf events implemented for a given processor and micro
architecture
Args:
micro_architecture (str): Name of the micro architecture
Returns:
dict: A dict with name and event code of perf events
Raises:
PerfEventListNotFoundError: If perf events yml file is not found
"""
perf_event_file = f'{micro_architecture}.yml'
# Load perf event list file
try:
with pkg_resources.path(perfevents, perf_event_file) as evnt_file:
with open(evnt_file, 'r') as yam:
perf_event_dict = yaml.full_load(yam)
except Exception:
raise PerfEventListNotFoundError(
'Perf event list for {} not found'.format(micro_architecture)
)
return perf_event_dict
[docs]def derived_perf_event_list(perf_events):
"""
This function returns list of perf events implemented for a given processor and micro
architecture
Args:
perf_events (dict): Dictionary of perf events
Returns:
dict: A dict with name and event code of perf events
dict: A dict with derived perf metrics and its formula
"""
found_perf_events = {}
derived_perf_metrics = {}
for group, event_dict in perf_events.items():
found_perf_events[group] = {}
for event in event_dict['events']:
if group in ['hardware_events', 'software_events']:
get_name = event
else:
get_name = re.search(
r'cpu/event=(.*),umask=(.*),name=(.*)/u', event
).group(3)
found_perf_events[group][get_name] = event
if 'formulae' in event_dict.keys():
derived_perf_metrics[group] = event_dict['formulae']
return found_perf_events, derived_perf_metrics