Skip to content

Commit

Permalink
Merge pull request #2278 from wazuh/2258-wazuh-metrics-multiprocesses
Browse files Browse the repository at this point in the history
Adapt `wazuh-metrics` and `data-visualizer` CLIs to handle multiprocessing
  • Loading branch information
snaow authored Nov 30, 2021
2 parents 14b0c2d + 7e84c40 commit 08ded39
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 23 deletions.
11 changes: 7 additions & 4 deletions deps/wazuh_testing/wazuh_testing/scripts/wazuh_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ def main():
logger.info(f'Started new session: {CURRENT_SESSION}')

for process in options.process_list:
monitor = Monitor(process_name=process, value_unit=options.data_unit, time_step=options.sleep_time,
version=options.version, dst_dir=options.store_path)
monitor.start()
MONITOR_LIST.append(monitor)
# Launch a monitor for every possible child process
for i, pid in enumerate(Monitor.get_process_pids(process)):
p_name = process if i == 0 else f'{process}_child_{i}'
monitor = Monitor(process_name=p_name, pid=pid, value_unit=options.data_unit, time_step=options.sleep_time,
version=options.version, dst_dir=options.store_path)
monitor.start()
MONITOR_LIST.append(monitor)


if __name__ == '__main__':
Expand Down
62 changes: 45 additions & 17 deletions deps/wazuh_testing/wazuh_testing/tools/performance/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Monitor:
Args:
process_name (str): name of the process to monitor.
pid (int): PID of the process.
value_unit (str, optional): unit to store the bytes values. Defaults to KB.
time_step (int, optional): time between each scan in seconds. Defaults to 1 second.
version (str, optional): version of the binary. Defaults to None.
Expand All @@ -43,45 +44,68 @@ class Monitor:
thread (thread): thread to scan the data.
csv_file (str): path to the CSV file.
"""
def __init__(self, process_name, value_unit='KB', time_step=1, version=None, dst_dir=gettempdir()):
def __init__(self, process_name, pid, value_unit='KB', time_step=1, version=None, dst_dir=gettempdir()):
self.process_name = process_name
self.value_unit = value_unit
self.time_step = time_step
self.version = version
self.data_units = {'B': 0, 'KB': 1, 'MB': 2}
self.platform = platform
self.dst_dir = dst_dir
self.pid = None
self.pid = pid
self.proc = None
self.event = None
self.thread = None
self.previous_read = None
self.previous_write = None
self.set_pid(self.process_name)
self.set_process()
self.csv_file = join(self.dst_dir, f'{self.process_name}.csv')

def set_pid(self, process_name):
"""Search and set the PID of the process.
@classmethod
def get_process_pids(cls, process_name, check_children=True) -> list:
"""Obtain the PIDs of the process and its children's if there are any.
Raises:
ValueError: if the process is not running.
Args:
process_name (str): name of the process.
check_children (bool): Check for children PIDs.
Returns:
list: List of integers with the PIDs.
"""
ppid = None
for proc in psutil.process_iter():
# These two binaries are executed using the Python interpreter instead of
# directly execute them as daemons. That's why we need to search the .py file in
# the cmdline instead of searching it in the name
if process_name in ['wazuh-clusterd', 'wazuh-apid']:
if any(filter(lambda x: f"{process_name}.py" in x, proc.cmdline())):
ppid = proc.pid
if any(filter(lambda x: f'{process_name}.py' in x, proc.cmdline())):
pid = proc.pid
break
elif process_name in proc.name():
ppid = proc.pid
pid = proc.pid
break
else:
raise ValueError(f'The process {process_name} is not running')

if not check_children:
return [pid]

if ppid is None:
raise ValueError(f"The process {process_name} is not running.")
# Look for all the children PIDs
parent_pid = psutil.Process(pid).parent().pid
if parent_pid == 1:
parent_pid = pid

self.pid = ppid
self.proc = psutil.Process(self.pid)
return [parent_pid] + [child.pid for child in psutil.Process(parent_pid).children(recursive=True)]

def set_process(self):
"""Create process instance and save it.
Raises:
ValueError: if the process is not running.
"""
try:
self.proc = psutil.Process(self.pid)
except psutil.NoSuchProcess:
raise ValueError(f'The process {self.process_name} is not running.')

def get_process_info(self, proc):
"""Collect the data from the process.
Expand Down Expand Up @@ -151,9 +175,13 @@ def unit_conversion(x):
self.previous_read = info[f'Disk_Read({self.value_unit})']
self.previous_write = info[f'Disk_Written({self.value_unit})']
except psutil.NoSuchProcess:
logger.warning(f'Lost PID for {self.process_name}. Trying to obtain a new one')
logger.warning(f'Lost PID for {self.process_name}. Trying to obtain a new one. '
'If the process has child processes, this test will not be valid')
try:
self.set_pid(self.process_name)
# Try to get another PID for the current process name. This could be wrong if there is more than
# one process with the same name (child processes)
self.pid = Monitor.get_process_pids(self.process_name, check_children=False)[0]
self.set_process()
except ValueError:
logger.warning(f'Could not obtain a new PID for {self.process_name}. Trying again in {self.time_step}s')
finally:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _color_palette(size):
Returns:
list: list of colors. The colors are represented as a tuple of float values.
"""
return sns.hls_palette(size - 1 if size > 1 else 1, h=.5)
return sns.hls_palette(size if size > 1 else 1, h=.5)

def _load_dataframes(self):
"""Load the dataframes from dataframes_paths."""
Expand Down Expand Up @@ -219,7 +219,7 @@ def _plot_data(self, elements, title=None, generic_label=None):
nodes = self.dataframe[self.dataframe.activity == element]['node_name'].unique()
current_df = self.dataframe[self.dataframe.activity == element]
current_df.reset_index(drop=True, inplace=True)
for node, color in zip(nodes, self._color_palette(len(nodes) + 1)):
for node, color in zip(nodes, self._color_palette(len(nodes))):
self._basic_plot(ax=ax, dataframe=current_df[current_df.node_name == node]['time_spent(s)'],
label=node, color=color)
self._save_custom_plot(ax, 'time_spent(s)', element.replace(' ', '_').lower(), cluster_log=True,
Expand Down

0 comments on commit 08ded39

Please sign in to comment.