Source code for perfmon.core.metrics.__init__
"""This file contains class to launch monitoring process"""
import os
import logging
import time
import copy
import multiprocessing
import threading
import psutil
from perfmon.common.pid import GetJobPid
from perfmon.common.utils.json_wrappers import dump_json
from perfmon.core.metrics.cpu import MonitorCpuUsage
from perfmon.core.metrics.perfcounters import MonitorPerfCounters
from perfmon.core.metrics.gpu import MonitorNvidiaGpuMetrics
_log = logging.getLogger(__name__)
# pylint: disable=E0401,W0201,C0301
[docs]class MonitorPerformanceMetrics(object):
"""Engine to extract performance metrics"""
# pylint: disable=too-many-instance-attributes
def __init__(self, config):
"""Initialize setup"""
self.config = config
self._extra = {}
[docs] def get_job_pid(self):
"""This method calls function to get job PID"""
# Get pid of the main job to monitor
pid_getter = GetJobPid(self.config)
self.config['pid'] = pid_getter.go()
_log.info('The monitoring process has the PID: %d', os.getpid())
_log.info(
'The job to monitor has PID(s): %s', ','.join([str(p) for p in self.config['pid']])
)
[docs] def start_collection(self):
"""Start collecting CPU metrics. We use multiprocessing library to spawn different
processes to monitor cpu and perf metrics"""
self._extra['start_time'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
# Find job pid
self.get_job_pid()
# Spawn processes to monitor the job simultaneously.
metric_processes = {}
if 'cpu_metrics' in self.config['metrics']:
metric_processes['cpu_metrics'] = MonitorCpuUsage(config=self.config)
metric_processes['cpu_metrics'].start()
if 'perf_metrics' in self.config['metrics']:
metric_processes['perf_metrics'] = MonitorPerfCounters(config=self.config)
metric_processes['perf_metrics'].start()
if 'nv_gpu_metrics' in self.config['metrics']:
metric_processes['nvidia_gpu'] = MonitorNvidiaGpuMetrics(config=self.config)
metric_processes['nvidia_gpu'].start()
# Wait for the spawned processes to finish
for _, spawned_proc in metric_processes.items():
spawned_proc.join()
self._extra['end_time'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())