Source code for perfmon.core.metrics.__init__

"""This file contains class to launch monitoring process"""

import os
import logging
import time
import copy
import multiprocessing
import threading
import psutil

from perfmon.common.pid import GetJobPid
from perfmon.common.utils.json_wrappers import dump_json
from perfmon.core.metrics.cpu import MonitorCpuUsage
from perfmon.core.metrics.perfcounters import MonitorPerfCounters
from perfmon.core.metrics.gpu import MonitorNvidiaGpuMetrics

_log = logging.getLogger(__name__)

# pylint: disable=E0401,W0201,C0301


[docs]class MonitorPerformanceMetrics(object): """Engine to extract performance metrics""" # pylint: disable=too-many-instance-attributes def __init__(self, config): """Initialize setup""" self.config = config self._extra = {}
[docs] def get_job_pid(self): """This method calls function to get job PID""" # Get pid of the main job to monitor pid_getter = GetJobPid(self.config) self.config['pid'] = pid_getter.go() _log.info('The monitoring process has the PID: %d', os.getpid()) _log.info( 'The job to monitor has PID(s): %s', ','.join([str(p) for p in self.config['pid']]) )
[docs] def start_collection(self): """Start collecting CPU metrics. We use multiprocessing library to spawn different processes to monitor cpu and perf metrics""" self._extra['start_time'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Find job pid self.get_job_pid() # Spawn processes to monitor the job simultaneously. metric_processes = {} if 'cpu_metrics' in self.config['metrics']: metric_processes['cpu_metrics'] = MonitorCpuUsage(config=self.config) metric_processes['cpu_metrics'].start() if 'perf_metrics' in self.config['metrics']: metric_processes['perf_metrics'] = MonitorPerfCounters(config=self.config) metric_processes['perf_metrics'].start() if 'nv_gpu_metrics' in self.config['metrics']: metric_processes['nvidia_gpu'] = MonitorNvidiaGpuMetrics(config=self.config) metric_processes['nvidia_gpu'].start() # Wait for the spawned processes to finish for _, spawned_proc in metric_processes.items(): spawned_proc.join() self._extra['end_time'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())