Source code for darshan.tests.test_data_access_by_filesystem

import os

import numpy as np
from numpy.testing import assert_allclose
import pytest
import pandas as pd
from pandas.testing import assert_series_equal
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import darshan
from darshan.experimental.plots import data_access_by_filesystem
from darshan.log_utils import get_log_path

[docs] @pytest.mark.parametrize("series, expected_series", [ # a Series with a single filesystem root path # but the other root paths are absent (pd.Series([1], index=['/yellow']), # we expect the missing filesystem roots to get # added in with values of 0 pd.Series([1, 0, 0], index=['/yellow', '/tmp', '/home'], dtype=np.float64) ), # a Series with two filesystem root paths, # but the other root path is absent (pd.Series([1, 3], index=['/yellow', '/tmp']), # we expect the single missing root path to get # added in with a value of 0 pd.Series([1, 3, 0], index=['/yellow', '/tmp', '/home'], dtype=np.float64), ), # a Series with all filesystem root paths # present (pd.Series([1, 3, 2], index=['/yellow', '/tmp', '/home']), # if all root paths are already accounted for in the # Series, it will be just fine for plotting so can remain # unchanged pd.Series([1, 3, 2], index=['/yellow', '/tmp', '/home'], dtype=np.float64), ), # a Series with only the final filesystem root path (pd.Series([2], index=['/home']), # we expect the order of the indices to be # preserved from the filesystem_roots provided # and 0 values filled in where needed pd.Series([0, 0, 2], index=['/yellow', '/tmp', '/home'], dtype=np.float64), ), ]) def test_empty_series_handler(series, expected_series): # the empty_series_handler() function should # add indices for any filesystems that are missing # from a given Series, along with values of 0 for # each of those indices (i.e., no activity for that # missing filesystem)--this is mostly to enforce # consistent plotting behavior filesystem_roots = ['/yellow', '/tmp', '/home'] actual_series = data_access_by_filesystem.empty_series_handler(series=series, filesystem_roots=filesystem_roots) assert_series_equal(actual_series, expected_series)
[docs] @pytest.mark.parametrize("file_path, expected_root_path", [ ("/scratch1/scratchdirs/glock/testFile.00000046", "/scratch1"), ]) def test_convert_file_path_to_root_path(file_path, expected_root_path): actual_root_path = data_access_by_filesystem.convert_file_path_to_root_path(file_path=file_path) assert actual_root_path == expected_root_path
[docs] @pytest.mark.parametrize("input_id, file_id_dict, expected_file_path", [ (9.457796068806373e+18, {210703578647777632: '/yellow/usr/projects/eap/users/treddy/simple_dxt_mpi_io_darshan/test.out.locktest.0', 9457796068806373448: '/yellow/usr/projects/eap/users/treddy/simple_dxt_mpi_io_darshan/test.out'}, '/yellow/usr/projects/eap/users/treddy/simple_dxt_mpi_io_darshan/test.out' ), # intentionally use an ID that is absent # in the dictionary (9.357796068806371e+18, {210703578647777632: '/yellow/usr/projects/eap/users/treddy/simple_dxt_mpi_io_darshan/test.out.locktest.0', 9457796068806373448: '/yellow/usr/projects/eap/users/treddy/simple_dxt_mpi_io_darshan/test.out'}, None ), ]) def test_convert_file_id_to_path(input_id, file_id_dict, expected_file_path): file_id_hash_arr, file_path_arr = data_access_by_filesystem.convert_id_dict_to_arrays(file_id_dict=file_id_dict) actual_file_path = data_access_by_filesystem.convert_file_id_to_path(input_id=input_id, file_hashes=file_id_hash_arr, file_paths=file_path_arr) assert actual_file_path == expected_file_path
[docs] @pytest.mark.parametrize("verbose", [True, False]) @pytest.mark.parametrize("file_id_dict, expected_root_paths", [ ({210703578647777632: '/yellow/usr/projects/eap/users/treddy/simple_dxt_mpi_io_darshan/test.out.locktest.0', 14388265063268455899: '/tmp/ompi.sn176.28751/jf.29186/1/test.out_cid-0-3400.sm'}, ['/yellow', '/tmp']), ]) def test_identify_filesystems(capsys, file_id_dict, expected_root_paths, verbose): actual_root_paths = data_access_by_filesystem.identify_filesystems(file_id_dict=file_id_dict, verbose=verbose) assert actual_root_paths == expected_root_paths captured = capsys.readouterr() if verbose: # check that the same root paths # are also printed for root_path in actual_root_paths: assert root_path in captured.out else: # nothing should be printed assert len(captured.out) == 0
[docs] @pytest.mark.parametrize("""log_path, expected_df_reads_shape, expected_df_writes_shape""", [ (get_log_path("sample.darshan"), (0, 87), (3, 87), ), (get_log_path("sample-dxt-simple.darshan"), (0, 73), (2, 73), ), ]) def test_rec_to_rw_counter_dfs_with_cols(log_path, expected_df_reads_shape, expected_df_writes_shape): # check basic shape expectations on the dataframes # produced by rec_to_rw_counter_dfs_with_cols() with darshan.DarshanReport(log_path) as report: file_id_dict = report.data["name_records"] actual_df_reads, actual_df_writes = data_access_by_filesystem.rec_to_rw_counter_dfs_with_cols(report=report, file_id_dict=file_id_dict, mod='POSIX') assert actual_df_reads.shape == expected_df_reads_shape assert actual_df_writes.shape == expected_df_writes_shape
[docs] @pytest.mark.parametrize("read_groups, write_groups, filesystem_roots, expected_read_groups, expected_write_groups", [ (pd.Series([0, 1, 7], index=['/root', '/tmp', '/yellow']), pd.Series([5, 5], index=['/root', '/tmp']), ['/root', '/tmp', '/yellow', '/usr', '/scratch1'], pd.Series([0, 1, 7, 0, 0], index=['/root', '/tmp', '/yellow', '/usr', '/scratch1'], dtype=np.float64), pd.Series([5, 5, 0, 0, 0], index=['/root', '/tmp', '/yellow', '/usr', '/scratch1'], dtype=np.float64), ), ]) def test_check_empty_series(read_groups, write_groups, filesystem_roots, expected_read_groups, expected_write_groups): # check that the reindex operation happened as # expected actual_read_groups, actual_write_groups = data_access_by_filesystem.check_empty_series(read_groups=read_groups, write_groups=write_groups, filesystem_roots=filesystem_roots) assert_series_equal(actual_read_groups, expected_read_groups) assert_series_equal(actual_write_groups, expected_write_groups)
[docs] @pytest.mark.parametrize("df_reads, df_writes, expected_read_groups, expected_write_groups", [ (pd.DataFrame({'filesystem_root': ['/yellow', '/tmp', '/yellow'], 'POSIX_BYTES_READ': [3, 5, 90], 'POSIX_BYTES_WRITTEN': [0, 9, 0], 'COLUMN3': [np.nan, 5, 8], 'COLUMN4': ['a', 'b', 'c']}), pd.DataFrame({'filesystem_root': ['/yellow', '/tmp', '/tmp'], 'POSIX_BYTES_READ': [1, 11, 17], 'POSIX_BYTES_WRITTEN': [2098, 9, 20], 'COLUMN3': [np.nan, 5, 1], 'COLUMN4': ['a', 'b', 'd']}), pd.Series([5, 93], index=pd.Index(['/tmp', '/yellow'], name="filesystem_root"), name='BYTES_READ'), pd.Series([29, 2098], index=pd.Index(['/tmp', '/yellow'], name="filesystem_root"), name='BYTES_WRITTEN'), ), ]) def test_process_byte_counts(df_reads, df_writes, expected_read_groups, expected_write_groups): actual_read_groups, actual_write_groups = data_access_by_filesystem.process_byte_counts(df_reads=df_reads, df_writes=df_writes) assert_series_equal(actual_read_groups, expected_read_groups) assert_series_equal(actual_write_groups, expected_write_groups)
[docs] @pytest.mark.parametrize("df_reads, df_writes, expected_read_groups, expected_write_groups", [ (pd.DataFrame({'filesystem_root': ['/yellow', '/tmp', '/yellow'], 'filepath': ['/yellow/file1', '/tmp/file2', '/yellow/file3'], 'POSIX_BYTES_READ': [3, 5, 90], 'POSIX_BYTES_WRITTEN': [0, 9, 0], 'COLUMN3': [np.nan, 5, 8], 'COLUMN4': ['a', 'b', 'c']}), pd.DataFrame({'filesystem_root': ['/yellow', '/tmp', '/tmp'], 'filepath': ['/yellow/file4', '/tmp/file5', '/tmp/file19'], 'POSIX_BYTES_READ': [1, 11, 17], 'POSIX_BYTES_WRITTEN': [2098, 9, 20], 'COLUMN3': [np.nan, 5, 1], 'COLUMN4': ['a', 'b', 'd']}), pd.Series([1, 2], index=pd.Index(['/tmp', '/yellow'], name="filesystem_root"), name='filepath'), pd.Series([2, 1], index=pd.Index(['/tmp', '/yellow'], name="filesystem_root"), name='filepath'), ), ]) def test_process_unique_files(df_reads, df_writes, expected_read_groups, expected_write_groups): actual_read_groups, actual_write_groups = data_access_by_filesystem.process_unique_files(df_reads=df_reads, df_writes=df_writes) assert_series_equal(actual_read_groups, expected_read_groups) assert_series_equal(actual_write_groups, expected_write_groups)
[docs] @pytest.mark.parametrize("mod", ["POSIX", "OTHER"]) @pytest.mark.parametrize("verbose", [True, False]) @pytest.mark.parametrize("""log_path, processing_func, expected_read_groups, expected_write_groups""", [ (get_log_path("sample.darshan"), data_access_by_filesystem.process_unique_files, pd.Series([0.0, 0.0, 0.0, 0.0], index=pd.Index(['<STDIN>', '<STDOUT>', '<STDERR>', '/scratch2'], name='filesystem_root'), name='filepath'), pd.Series([0.0, 1.0, 1.0, 1.0], index=pd.Index(['<STDIN>', '<STDOUT>', '<STDERR>', '/scratch2'], name='filesystem_root'), name='filepath')), ]) def test_unique_fs_rw_counter(log_path, processing_func, verbose, expected_read_groups, expected_write_groups, mod): with darshan.DarshanReport(log_path) as report: file_id_dict = report.data["name_records"] filesystem_roots = data_access_by_filesystem.identify_filesystems(report.data["name_records"]) if mod == "POSIX": actual_read_groups, actual_write_groups = data_access_by_filesystem.unique_fs_rw_counter(report=report, filesystem_roots=filesystem_roots, file_id_dict=file_id_dict, processing_func=processing_func, mod=mod, verbose=verbose) assert_series_equal(actual_read_groups, expected_read_groups) assert_series_equal(actual_write_groups, expected_write_groups) else: with pytest.raises(NotImplementedError): data_access_by_filesystem.unique_fs_rw_counter(report=report, filesystem_roots=filesystem_roots, file_id_dict=file_id_dict, processing_func=processing_func, mod=mod, verbose=verbose)
[docs] @pytest.mark.parametrize("""file_rd_series, file_wr_series, bytes_rd_series, bytes_wr_series, filesystem_roots """, [ (pd.Series([3.0], index=pd.Index(['/p'], name='filesystem_root'), name='filepath'), pd.Series([14.0], index=pd.Index(['/p'], name='filesystem_root'), name='filepath'), pd.Series([2.145206e+09], index=pd.Index(['/p'], name='filesystem_root'), name='POSIX_BYTES_READ'), pd.Series([1.010878e+12], index=pd.Index(['/p'], name='filesystem_root'), name='POSIX_BYTES_WRITTEN'), ['/p'], ), ]) def test_plot_data(file_rd_series, file_wr_series, bytes_rd_series, bytes_wr_series, filesystem_roots): # test a few basic properties of the main plotting function fig = plt.figure() data_access_by_filesystem.plot_data(fig=fig, file_rd_series=file_rd_series, file_wr_series=file_wr_series, bytes_rd_series=bytes_rd_series, bytes_wr_series=bytes_wr_series, filesystem_roots=filesystem_roots) axes = fig.gca() children = axes.get_children() actual_list_text_in_fig = [] # accumulate text added via ax.text() # by the function for child in children: if isinstance(child, matplotlib.text.Text): actual_list_text_in_fig.append(child.get_text()) for expected_text_entry in [matplotlib.text.Text(0, 1, ' files read: 3'), matplotlib.text.Text(0, 0, ' files written: 14')]: assert expected_text_entry.get_text() in actual_list_text_in_fig # enforce invisibile right-side spine so that # there is no overlap between value labels and # the plot frame on the right side for ax in fig.axes: spines = ax.spines right_spine_visibility = spines['right'].get_visible() assert not right_spine_visibility
[docs] def test_empty_data_posix_y_axis_annot_position(): # the y-axis filesystem annotations were observed # to cross the left side spine and overlap onto the plot # proper in gh-397, when using a log file that lacks # POSIX data # verify that this is handled/resolved log_file_path = get_log_path('noposixopens.darshan') with darshan.DarshanReport(log_file_path) as report: actual_fig = data_access_by_filesystem.plot_with_report(report=report) # check that the y annotation font sizes have been # adjusted based on the length of the strings axes = actual_fig.axes for ax in axes: for child in ax.get_children(): if isinstance(child, matplotlib.text.Annotation): actual_text = child.get_text() actual_fontsize = child.get_fontsize() assert actual_fontsize == 18
[docs] @pytest.mark.parametrize("log_file_name, expected_text_labels", [ ('noposixopens.darshan', ['/global', 'anonymized']), ('sample.darshan', ['/scratch2', '<STDERR>', '<STDOUT>']), # test case for gh-678 ('mpi-io-test.darshan', ['/global', '<STDOUT>']), ]) def test_cat_labels_std_streams(log_file_name, expected_text_labels): # for an anonymized log file that operates on STDIO, STDERR # and STDIN, we want appropriate labels to be used instead of confusing # integers on y axis; for the same scenario without anonymization, # the STD.. stream label seem appropriate log_file_path = get_log_path(log_file_name) actual_text_labels = [] with darshan.DarshanReport(log_file_path) as report: actual_fig = data_access_by_filesystem.plot_with_report(report=report) axes = actual_fig.axes for ax in axes: for child in ax.get_children(): if isinstance(child, matplotlib.text.Annotation): actual_text = child.get_text() actual_text_labels.append(actual_text) assert actual_text_labels == expected_text_labels
[docs] def test_empty_data_posix_text_position(): # the bytes and files read/written text labels # were observed to be too far to the right in the # subplots for a log file lacking POSIX activity # in gh-397; regression test this issue log_file_path = get_log_path('noposixopens.darshan') with darshan.DarshanReport(log_file_path) as report: actual_fig = data_access_by_filesystem.plot_with_report(report=report) axes = actual_fig.axes for ax in axes: for child in ax.get_children(): if isinstance(child, matplotlib.text.Text): actual_text = child.get_text() # check for correct axis coordinate # positions if 'read' in actual_text: assert_allclose(child.get_position(), (0, 0.75)) elif 'written' in actual_text: assert_allclose(child.get_position(), (0, 0.25))
[docs] @pytest.mark.parametrize("""file_rd_series, file_wr_series, bytes_rd_series, bytes_wr_series, filesystem_roots """, [ (pd.Series([1], index=pd.Index(['/p'], name='filesystem_root'), name='filepath'), pd.Series([1], index=pd.Index(['/p'], name='filesystem_root'), name='filepath'), pd.Series([1.049e+6], index=pd.Index(['/p'], name='filesystem_root'), name='POSIX_BYTES_READ'), pd.Series([1.049e+6], index=pd.Index(['/p'], name='filesystem_root'), name='POSIX_BYTES_WRITTEN'), ['/p'], ), # test case where files read/written are zero (pd.Series([0], index=pd.Index(['/p'], name='filesystem_root'), name='filepath'), pd.Series([0], index=pd.Index(['/p'], name='filesystem_root'), name='filepath'), # NOTE: very strange to be able to read/write bytes to # a filesystem and yet have no files read or written # to on that filesystem (this might be an error someday?) # see comment: # https://github.com/darshan-hpc/darshan/pull/397#discussion_r683621305 pd.Series([1.049e+6], index=pd.Index(['/p'], name='filesystem_root'), name='POSIX_BYTES_READ'), pd.Series([1.049e+6], index=pd.Index(['/p'], name='filesystem_root'), name='POSIX_BYTES_WRITTEN'), ['/p'], ), ]) def test_plot_data_labels(file_rd_series, file_wr_series, bytes_rd_series, bytes_wr_series, filesystem_roots): # regression test for label spacing in plot # based on review comment in gh-397 fig = plt.figure() data_access_by_filesystem.plot_data(fig=fig, file_rd_series=file_rd_series, file_wr_series=file_wr_series, bytes_rd_series=bytes_rd_series, bytes_wr_series=bytes_wr_series, filesystem_roots=filesystem_roots) for ax in fig.axes: for child in ax.get_children(): if isinstance(child, matplotlib.text.Text): actual_text = child.get_text() if actual_text not in ['/p', '']: # count the leading spaces for each label leading_spaces = len(actual_text) - len(actual_text.lstrip(' ')) # check there is always 1 leading space for each label assert leading_spaces == 1
[docs] def test_plot_data_shared_x_axis(): # regression test for case described here: # https://github.com/darshan-hpc/darshan/pull/397#pullrequestreview-717403104 # https://github.com/darshan-hpc/darshan/pull/397#issuecomment-889504530 filesystem_roots = ['/usr', '/yellow', '/green', '/global'] rd_bytes = [1e7, 1e8, 1e9, 1e10] wr_bytes = [1e8, 1e9, 1e10, 1e11] rd_file_cts = [1e3, 1e4, 1e5, 1e6] wr_file_cts = [1e2, 1e3, 1e4, 1e5] bytes_rd_series = pd.Series(data=rd_bytes, index=filesystem_roots) bytes_wr_series = pd.Series(data=wr_bytes, index=filesystem_roots) file_rd_series = pd.Series(data=rd_file_cts, index=filesystem_roots) file_wr_series = pd.Series(data=wr_file_cts, index=filesystem_roots) fig = plt.figure() data_access_by_filesystem.plot_data(fig, file_rd_series, file_wr_series, bytes_rd_series, bytes_wr_series, filesystem_roots) # enforce shared log x axes in a given column bytes_column_x_axis_limits = [] files_column_x_axis_limits = [] for i, ax in enumerate(fig.axes): if i % 2 == 0: bytes_column_x_axis_limits.append(ax.get_xlim()) else: files_column_x_axis_limits.append(ax.get_xlim()) # also check for absence of ticklabels for label in ax.get_xticklabels(which='both'): assert len(label.get_text()) == 0 for label in ax.get_yticklabels(which='both'): assert len(label.get_text()) == 0 for limits in [bytes_column_x_axis_limits, files_column_x_axis_limits]: # matching axes: diff = np.diff(limits, axis=0) assert_allclose(diff, 0) # log scale values: assert_allclose(np.array(bytes_column_x_axis_limits)[..., 1], 3.89496945e+11) assert_allclose(np.array(files_column_x_axis_limits)[..., 1], 2190302.282682) # check for log scaling in both columns for i, axis in enumerate(fig.axes): if i in [6, 7]: assert 'symmetric log scaled' in axis.get_xlabel() else: assert axis.get_xlabel() == ''
[docs] @pytest.mark.parametrize('filename', ['imbalanced-io.darshan']) def test_log_scale_display(filename): # plot columns that are log scaled should be # labelled appropriately log_path = get_log_path(filename) with darshan.DarshanReport(log_path) as report: fig = data_access_by_filesystem.plot_with_report(report=report) # only index 8 should have the log axis label for i, axis in enumerate(fig.axes): if i == 8: assert 'symmetric log scaled' in axis.get_xlabel() else: assert axis.get_xlabel() == ''
[docs] @pytest.mark.parametrize('filename, expected_dims, num_cats', [('imbalanced-io.darshan', [12, 16], None), ('imbalanced-io.darshan', [12, 16], 3), ('imbalanced-io.darshan', [12, 16], 1), ('snyder_acme.exe_id1253318_9-27-24239-1515303144625770178_2.darshan', [12, 16], None)]) def test_vertical_resize(filename, expected_dims, num_cats): # ensure that plots are expanded vertically to # match the number of filesystems plotted log_path = get_log_path(filename) with darshan.DarshanReport(log_path) as report: fig = data_access_by_filesystem.plot_with_report(report=report, num_cats=num_cats) actual_dims = fig.get_size_inches() assert_allclose(actual_dims, expected_dims)
[docs] @pytest.mark.parametrize("logname", [ "mpi-io-test.darshan", "treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", ]) def test_annotate_center_align(logname): # for review comment here: # https://github.com/darshan-hpc/darshan/pull/397#discussion_r690847889 logpath = get_log_path(logname) with darshan.DarshanReport(logpath) as report: fig = data_access_by_filesystem.plot_with_report(report=report) axes = fig.axes for ax in axes: for child in ax.get_children(): if isinstance(child, matplotlib.text.Annotation): assert child.get_verticalalignment() == "center"
[docs] @pytest.mark.parametrize("logname", [ "imbalanced-io.darshan", "mpi-io-test.darshan", ]) def test_text_center_align(logname): # for review comment here: # https://github.com/darshan-hpc/darshan/pull/397#discussion_r690755364 logpath = get_log_path(logname) with darshan.DarshanReport(logpath) as report: fig = data_access_by_filesystem.plot_with_report(report=report) axes = fig.axes for ax in axes: for child in ax.get_children(): if isinstance(child, matplotlib.text.Text): actual_text = child.get_text() if "read" in actual_text or "written" in actual_text: assert child.get_verticalalignment() == "center"
[docs] @pytest.mark.parametrize("logname", [ "nonmpi_dxt_anonymized.darshan", "partial_data_stdio.darshan", "treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", ]) @pytest.mark.parametrize("num_cats", [2, 8]) def test_subplot_restriction(logname, num_cats): # for review comment here: # https://github.com/darshan-hpc/darshan/pull/397#discussion_r779176814 # the number of subplots in a figure should # be consistent (<=) with the number of category # rows requested, and avoid a collapsed layout expected_axes_limit = num_cats * 2 log_path = get_log_path(logname) with darshan.DarshanReport(log_path) as report: fig = data_access_by_filesystem.plot_with_report(report=report, num_cats=num_cats) actual_axes = fig.get_axes() assert len(actual_axes) <= expected_axes_limit max_y1 = 0 min_y1 = np.inf for ax in actual_axes: y1 = ax.get_position().y1 if y1 > max_y1: max_y1 = y1 if y1 < min_y1: min_y1 = y1 # this spread helps ensure avoidance of # a collapsed layout of subplots assert (max_y1 - min_y1) > 0.2
[docs] @pytest.mark.parametrize("logname", [ "partial_data_dxt.darshan", "partial_data_stdio.darshan", ]) def test_plot_with_report_no_file(tmpdir, logname): # plot_with_report should only return a figure, and # not generate a `.png` file # see review comment: # https://github.com/darshan-hpc/darshan/pull/397#discussion_r689859765 with tmpdir.as_cwd(): log_path = get_log_path(logname) with darshan.DarshanReport(log_path) as report: fig = data_access_by_filesystem.plot_with_report(report=report, num_cats=6) files_in_tmp = os.listdir(".") assert not files_in_tmp
[docs] @pytest.mark.parametrize("logname, top_cat_name, third_cat_name", [ # spot check the 1st and 3rd most active # categories for each case ("imbalanced-io.darshan", "/lus", "anonymized"), ("nonmpi_dxt_anonymized.darshan", "/", "anonymized"), ]) def test_plot_with_report_proper_sort(logname, top_cat_name, third_cat_name): # we want to sort categories in descending order of activity # (bytes read + bytes written), which is especially important # when using `num_cats` for `plot_with_report()`, otherwise # we could end up with only i.e., inactive categories/filesystems # displayed # see review comment: # https://github.com/darshan-hpc/darshan/pull/397#discussion_r769186581 log_path = get_log_path(logname) with darshan.DarshanReport(log_path) as report: fig = data_access_by_filesystem.plot_with_report(report=report, num_cats=6) actual_axes = fig.get_axes() for i, ax in enumerate(actual_axes): for child in ax.get_children(): if isinstance(child, matplotlib.text.Annotation): if i == 0: assert child.get_text() == top_cat_name elif i == 4: assert child.get_text() == third_cat_name
[docs] @pytest.mark.parametrize("logname", [ "imbalanced-io.darshan", "nonmpi_dxt_anonymized.darshan", ]) def test_plot_with_report_root_files(logname): # regression test for a bug that resulted in several # categories that started with "//" for root-mounted # files log_path = get_log_path(logname) with darshan.DarshanReport(log_path) as report: fig = data_access_by_filesystem.plot_with_report(report=report) actual_axes = fig.get_axes() for i, ax in enumerate(actual_axes): for child in ax.get_children(): if isinstance(child, matplotlib.text.Annotation): assert not child.get_text().startswith("//")
[docs] @pytest.mark.parametrize("""logname, expected_file_rd_series, expected_file_wr_series, expected_bytes_rd_series, expected_bytes_wr_series""", [ ("ior_hdf5_example.darshan", pd.Series({"<STDIN>": 0.0, "<STDOUT>": 0.0, "<STDERR>": 0.0, "/global": 1.0}), pd.Series({"<STDIN>": 0.0, "<STDOUT>": 1.0, "<STDERR>": 0.0, "/global": 1.0}), pd.Series({"<STDIN>": 0.0, "<STDOUT>": 0.0, "<STDERR>": 0.0, "/global": 4202504.0}), pd.Series({"<STDIN>": 0.0, "<STDOUT>": 2421.0, "<STDERR>": 0.0, "/global": 4195800.0}), ), ]) def test_stdio_basic_inclusion(logname, expected_file_rd_series, expected_file_wr_series, expected_bytes_rd_series, expected_bytes_wr_series): for series in [expected_file_rd_series, expected_file_wr_series, expected_bytes_rd_series, expected_bytes_wr_series]: series.index.name = "filesystem_root" series.name = "filepath" expected_bytes_rd_series.name = "BYTES_READ" expected_bytes_wr_series.name = "BYTES_WRITTEN" # test for the inclusin of STDIO module # data in the accounting of files/bytes read/written # (the original "data access by category" implementation # was POSIX-only) # follow the basic setup in plot_with_report() log_path = get_log_path(logname) with darshan.DarshanReport(log_path) as report: file_id_dict = report.data["name_records"] filesystem_roots = data_access_by_filesystem.identify_filesystems(file_id_dict=file_id_dict) # now, we expect the files and bytes data structures # to properly account for STDIO + POSIX data file_rd_series, file_wr_series = data_access_by_filesystem.unique_fs_rw_counter(report=report, filesystem_roots=filesystem_roots, file_id_dict=file_id_dict, processing_func=data_access_by_filesystem.process_unique_files, mod='POSIX') bytes_rd_series, bytes_wr_series = data_access_by_filesystem.unique_fs_rw_counter(report=report, filesystem_roots=filesystem_roots, file_id_dict=file_id_dict, processing_func=data_access_by_filesystem.process_byte_counts, mod='POSIX') assert_series_equal(file_rd_series, expected_file_rd_series) assert_series_equal(file_wr_series, expected_file_wr_series) assert_series_equal(bytes_rd_series, expected_bytes_rd_series) assert_series_equal(bytes_wr_series, expected_bytes_wr_series)
[docs] def test_plot_with_empty_data(): # generate a report object that filters out all contained records # to ensure data access by category plot properly returns None instead of failing logpath = get_log_path("ior_hdf5_example.darshan") # use a bogus regex with the "include" filter mode to ensure no records are included with darshan.DarshanReport(logpath, filter_patterns=["bogus-regex"], filter_mode="include") as report: fig = data_access_by_filesystem.plot_with_report(report=report) assert fig == None
[docs] def test_with_filtered_data(): # ensure get_io_cost_df doesn't include data for modules with no records logpath = get_log_path("sample-badost.darshan") # generate a report object with all STDIO module records filtered out # POSIX records should still remain with darshan.DarshanReport(logpath, filter_patterns=["ior-posix"], filter_mode="include") as report: file_id_dict = report.data["name_records"] actual_df_reads, actual_df_writes = data_access_by_filesystem.rec_to_rw_counter_dfs_with_cols(report=report, file_id_dict=file_id_dict) assert len(actual_df_reads) == 0 assert len(actual_df_writes) == 2048 # generate a report object with all POSIX module records filtered out # STDIO records should still remain with darshan.DarshanReport(logpath, filter_patterns=["ior-posix"], filter_mode="exclude") as report: file_id_dict = report.data["name_records"] actual_df_reads, actual_df_writes = data_access_by_filesystem.rec_to_rw_counter_dfs_with_cols(report=report, file_id_dict=file_id_dict) assert len(actual_df_reads) == 1 assert len(actual_df_writes) == 2