Cleanup pass

pitt-crc · Dec 7, 2023 · bed939f · bed939f
1 parent 1f0ed63
commit bed939f
Show file tree

Hide file tree

Showing 7 changed files with 53 additions and 61 deletions.
diff --git a/shinigami/__init__.py b/shinigami/__init__.py
@@ -1,10 +1,6 @@
 """Shinigami is a command-line application for killing errant processes
 on Slurm based compute nodes. The application scans for and terminates any
 running processes not associated with a currently running Slurm job.
-
-Individual users and groups can be whitelisted in the application settings file
-via UID and GID values. Specific compute nodes can also be ignored using basic
-string matching. See the `settings` module for more details.
 """
 
 import importlib.metadata

diff --git a/shinigami/cli.py b/shinigami/cli.py
@@ -1,4 +1,4 @@
-"""The executable application and its command-line interface."""
+"""The executable application and its command line interface."""
 
 import asyncio
 import inspect

diff --git a/shinigami/utils.py b/shinigami/utils.py
@@ -10,15 +10,18 @@
 import asyncssh
 import pandas as pd
 
+# Technically the init process ID may vary with the system
+# architecture, but 1 is an almost universal default
 INIT_PROCESS_ID = 1
 
+# Custom type hints
 Whitelist = Collection[Union[int, Tuple[int, int]]]
 
 
-def id_in_whitelist(id_value: int, whitelist: Whitelist) -> bool:
+def _id_in_whitelist(id_value: int, whitelist: Whitelist) -> bool:
     """Return whether an ID is in a list of ID value definitions
 
-    The `whitelist`  of ID values can contain a mix of integers and tuples
+    The `whitelist` of ID values can contain a mix of integers and tuples
     of integer ranges. For example, [0, 1, (2, 9), 10] includes all IDs from
     zero through ten.
 
@@ -58,11 +61,14 @@ def get_nodes(cluster: str, ignore_nodes: Collection[str] = tuple()) -> set:
         raise RuntimeError(stderr)
 
     all_nodes = stdout.decode().strip().split('\n')
-    return set(node for node in all_nodes if node not in ignore_nodes)
+    return set(all_nodes) - set(ignore_nodes)
 
 
 async def get_remote_processes(conn: asyncssh.SSHClientConnection) -> pd.DataFrame:
-    """Fetch running process data from the remote machine
+    """Fetch running process data from a remote machine
+
+    The returned DataFrame is guaranteed to have columns `PID`, `PPID`, `PGID`,
+    `UID`, and `CND`.
 
     Args:
         conn: Open SSH connection to the machine
@@ -82,8 +88,10 @@ def include_orphaned_processes(df: pd.DataFrame) -> pd.DataFrame:
     Given a DataFrame with system process data, return a subset of the data
     containing processes parented by `INIT_PROCESS_ID`.
 
+    See the `get_remote_processes` function for the assumed DataFrame data model.
+
     Args:
-        df: A DataFrame with a `PPID` column
+        df: A DataFrame with process data
 
     Returns:
         A copy of the given DataFrame
@@ -98,23 +106,30 @@ def include_user_whitelist(df: pd.DataFrame, uid_whitelist: Whitelist) -> pd.Dat
     Given a DataFrame with system process data, return a subset of the data
     containing processes owned by the given user IDs.
 
+    See the `get_remote_processes` function for the assumed DataFrame data model.
+
     Args:
-        df: A DataFrame with a `UID` column
+        df: A DataFrame with process data
         uid_whitelist: List of user IDs to whitelist
 
     Returns:
         A copy of the given DataFrame
     """
 
-    whitelist_index = df['UID'].apply(id_in_whitelist, whitelist=uid_whitelist)
+    whitelist_index = df['UID'].apply(_id_in_whitelist, whitelist=uid_whitelist)
     return df[whitelist_index]
 
 
 def exclude_active_slurm_users(df: pd.DataFrame) -> pd.DataFrame:
     """Filter a DataFrame to exclude user IDs tied to a running slurm job
 
+    Given a DataFrame with system process data, return a subset of the data
+    that excludes processes owned by users running a `slurmd` command.
+
+    See the `get_remote_processes` function for the assumed DataFrame data model.
+
     Args:
-        df: A DataFrame with `UID` and `CMD` columns
+        df: A DataFrame with process data
 
     Returns:
         A copy of the given DataFrame
@@ -136,7 +151,7 @@ async def terminate_errant_processes(
 
     Args:
         node: The DNS resolvable name of the node to terminate processes on
-        uid_whitelist: Do not terminate processes owned by the given UID
+        uid_whitelist: Do not terminate processes owned by the given UIDs
         ssh_limit: Semaphore object used to limit concurrent SSH connections
         ssh_options: Options for configuring the outbound SSH connection
         debug: Log which process to terminate but do not terminate them

diff --git a/tests/cli/test_base_parser.py b/tests/cli/test_base_parser.py
@@ -6,7 +6,7 @@
 
 
 class BaseParsing(TestCase):
-    """Test custom parsing login encapsulated by the `BaseParser`  class"""
+    """Test custom parsing logic encapsulated by the `BaseParser`  class"""
 
     def test_errors_raise_system_exit(self) -> None:
         """Test error messages are raised as `SystemExit` instances"""

diff --git a/tests/utils/test_exclude_active_slurm_users.py b/tests/utils/test_exclude_active_slurm_users.py
@@ -11,31 +11,34 @@ class ExcludeSlurmUsers(unittest.TestCase):
     """Test the identification of slurm users from a DataFrame of process data"""
 
     def test_exclude_slurm_users(self) -> None:
+        """Test slurm processes are excluded from the returned DataFrame"""
+
+        # User 1002 has two processes.
+        # They BOTH should be excluded because ONE of them is a slurm process.
         input_df = pd.DataFrame({
-            'UID': [1001, 1002, 1003, 1004, 1005],
-            'CMD': ['process_1', 'slurmd', 'process_3', 'process_4', 'process_5']})
+            'UID': [1001, 1002, 1002, 1003, 1004],
+            'CMD': ['process 1', 'slurmd ...', 'process 3', 'process 4', 'process5']})
 
-        expected_df = input_df.loc[[0, 2, 3, 4]]
+        expected_df = input_df.loc[[0, 3, 4]]
         returned_df = exclude_active_slurm_users(input_df)
-
         pd.testing.assert_frame_equal(returned_df, expected_df)
-        self.assertIsNot(returned_df, input_df)
 
     def test_no_slurm_users(self) -> None:
+        """Test the returned dataframe matches the input dataframe when there are no slurm processes"""
+
         input_df = pd.DataFrame({
             'UID': [1001, 1002, 1003, 1004, 1005],
-            'CMD': ['process_1', 'process_2', 'process_3', 'process_4', 'process_5']})
+            'CMD': ['process1', 'process2', 'process3', 'process4', 'process5']})
 
         returned_df = exclude_active_slurm_users(input_df)
-
         pd.testing.assert_frame_equal(returned_df, input_df)
-        self.assertIsNot(returned_df, input_df)
 
     def test_all_slurm_users(self) -> None:
+        """Test the returned dataframe is empty when all process container `slurmd`"""
+
         input_df = pd.DataFrame({
-            'UID': [1001, 1002, 1003, 1004, 1005],
-            'CMD': ['slurmd', 'slurmd', 'slurmd', 'slurmd', 'slurmd']})
+            'UID': [1001, 1002, 1003],
+            'CMD': ['slurmd', 'prefix slurmd', 'slurmd postfix']})
 
         returned_df = exclude_active_slurm_users(input_df)
         self.assertTrue(returned_df.empty)
-        self.assertIsNot(returned_df, input_df)
diff --git a/tests/utils/test_include_orphaned_processes.py b/tests/utils/test_include_orphaned_processes.py
@@ -15,48 +15,31 @@ def test_orphaned_processes_are_returned(self) -> None:
 
         input_df = pd.DataFrame({
             'PID': [1, 2, 3, 4, 5],
-            'PPID': [0, INIT_PROCESS_ID, INIT_PROCESS_ID, 2000, 2000],
-            'PGID': [0, 0, 0, 0, 0],
-            'UID': [0, 1, 1, 2, 2],
-            'CMD': ['init', 'process_1', 'process_2', 'process_3', 'process_4']})
-
-        # Manually define the data subset (including the index) expected in the function return
-        expected_output_df = pd.DataFrame({
-            'PID': [2, 3],
-            'PPID': [INIT_PROCESS_ID, INIT_PROCESS_ID],
-            'PGID': [0, 0],
-            'UID': [1, 1],
-            'CMD': ['process_1', 'process_2']},
-            index=[1, 2])
-
-        pd.testing.assert_frame_equal(
-            include_orphaned_processes(input_df),
-            expected_output_df)
+            'PPID': [0, INIT_PROCESS_ID, INIT_PROCESS_ID, 2000, 2000]
+        })
+
+        expected_df = input_df.loc[[1, 2]]
+        returned_df = include_orphaned_processes(input_df)
+        pd.testing.assert_frame_equal(returned_df, expected_df)
 
     def test_no_orphaned_processes(self) -> None:
         """Test the returned DataFrame is empty when no orphaned processes are present"""
 
         input_df = pd.DataFrame({
             'PID': [1, 2, 3, 4, 5],
-            'PPID': [0, 2, 3, 4, 4],
-            'PGID': [0, 0, 0, 0, 0],
-            'UID': [0, 1, 1, 2, 2],
-            'CMD': ['init', 'process_1', 'process_2', 'process_3', 'process_4']})
+            'PPID': [0, 2, 3, 4, 4]
+        })
 
-        self.assertTrue(include_orphaned_processes(input_df).empty)
+        returned_df = include_orphaned_processes(input_df)
+        self.assertTrue(returned_df.empty)
 
     def test_all_orphaned_processes(self) -> None:
         """Test the returned DataFrame matches the input DataFrame when all processes are orphaned"""
 
         input_df = pd.DataFrame({
             'PID': [2, 3, 4, 5],
             'PPID': [INIT_PROCESS_ID, INIT_PROCESS_ID, INIT_PROCESS_ID, INIT_PROCESS_ID],
-            'PGID': [0, 0, 0, 0],
-            'UID': [1, 1, 2, 2],
-            'CMD': ['process_1', 'process_2', 'process_3', 'process_4']})
+        })
 
         returned_df = include_orphaned_processes(input_df)
         pd.testing.assert_frame_equal(returned_df, input_df)
-
-        # Make sure the return value is an equivalent copy and not just the original DataFrame
-        self.assertIsNot(returned_df, input_df)
diff --git a/tests/utils/test_include_user_whitelist.py b/tests/utils/test_include_user_whitelist.py
@@ -4,7 +4,7 @@
 
 import pandas as pd
 
-from shinigami.utils import INIT_PROCESS_ID, include_user_whitelist
+from shinigami.utils import include_user_whitelist
 
 
 class UserIDs(TestCase):
@@ -17,12 +17,7 @@ def setUp(self) -> None:
         processes, and multiple user IDs.
         """
 
-        self.testing_data = pd.DataFrame({
-            'PID': [INIT_PROCESS_ID, 100, 101, 102, 103],
-            'PPID': [0, INIT_PROCESS_ID, INIT_PROCESS_ID, 100, 200],
-            'PGID': [0, 1, 1, 2, 3],
-            'UID': [0, 123, 123, 456, 789],
-            'CMD': ['init', 'process_1', 'process_2', 'process_3', 'process_4']})
+        self.testing_data = pd.DataFrame({'UID': [0, 123, 123, 456, 789]})
 
     def test_empty_whitelist(self) -> None:
         """Test the returned DataFrame is empty when the whitelist is empty"""