Skip to content

Commit

Permalink
Cleanup pass
Browse files Browse the repository at this point in the history
  • Loading branch information
djperrefort committed Dec 7, 2023
1 parent 1f0ed63 commit bed939f
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 61 deletions.
4 changes: 0 additions & 4 deletions shinigami/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
"""Shinigami is a command-line application for killing errant processes
on Slurm based compute nodes. The application scans for and terminates any
running processes not associated with a currently running Slurm job.
Individual users and groups can be whitelisted in the application settings file
via UID and GID values. Specific compute nodes can also be ignored using basic
string matching. See the `settings` module for more details.
"""

import importlib.metadata
Expand Down
2 changes: 1 addition & 1 deletion shinigami/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""The executable application and its command-line interface."""
"""The executable application and its command line interface."""

import asyncio
import inspect
Expand Down
33 changes: 24 additions & 9 deletions shinigami/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@
import asyncssh
import pandas as pd

# Technically the init process ID may vary with the system
# architecture, but 1 is an almost universal default
INIT_PROCESS_ID = 1

# Custom type hints
Whitelist = Collection[Union[int, Tuple[int, int]]]


def id_in_whitelist(id_value: int, whitelist: Whitelist) -> bool:
def _id_in_whitelist(id_value: int, whitelist: Whitelist) -> bool:
"""Return whether an ID is in a list of ID value definitions
The `whitelist` of ID values can contain a mix of integers and tuples
The `whitelist` of ID values can contain a mix of integers and tuples
of integer ranges. For example, [0, 1, (2, 9), 10] includes all IDs from
zero through ten.
Expand Down Expand Up @@ -58,11 +61,14 @@ def get_nodes(cluster: str, ignore_nodes: Collection[str] = tuple()) -> set:
raise RuntimeError(stderr)

all_nodes = stdout.decode().strip().split('\n')
return set(node for node in all_nodes if node not in ignore_nodes)
return set(all_nodes) - set(ignore_nodes)


async def get_remote_processes(conn: asyncssh.SSHClientConnection) -> pd.DataFrame:
"""Fetch running process data from the remote machine
"""Fetch running process data from a remote machine
The returned DataFrame is guaranteed to have columns `PID`, `PPID`, `PGID`,
`UID`, and `CND`.
Args:
conn: Open SSH connection to the machine
Expand All @@ -82,8 +88,10 @@ def include_orphaned_processes(df: pd.DataFrame) -> pd.DataFrame:
Given a DataFrame with system process data, return a subset of the data
containing processes parented by `INIT_PROCESS_ID`.
See the `get_remote_processes` function for the assumed DataFrame data model.
Args:
df: A DataFrame with a `PPID` column
df: A DataFrame with process data
Returns:
A copy of the given DataFrame
Expand All @@ -98,23 +106,30 @@ def include_user_whitelist(df: pd.DataFrame, uid_whitelist: Whitelist) -> pd.Dat
Given a DataFrame with system process data, return a subset of the data
containing processes owned by the given user IDs.
See the `get_remote_processes` function for the assumed DataFrame data model.
Args:
df: A DataFrame with a `UID` column
df: A DataFrame with process data
uid_whitelist: List of user IDs to whitelist
Returns:
A copy of the given DataFrame
"""

whitelist_index = df['UID'].apply(id_in_whitelist, whitelist=uid_whitelist)
whitelist_index = df['UID'].apply(_id_in_whitelist, whitelist=uid_whitelist)
return df[whitelist_index]


def exclude_active_slurm_users(df: pd.DataFrame) -> pd.DataFrame:
"""Filter a DataFrame to exclude user IDs tied to a running slurm job
Given a DataFrame with system process data, return a subset of the data
that excludes processes owned by users running a `slurmd` command.
See the `get_remote_processes` function for the assumed DataFrame data model.
Args:
df: A DataFrame with `UID` and `CMD` columns
df: A DataFrame with process data
Returns:
A copy of the given DataFrame
Expand All @@ -136,7 +151,7 @@ async def terminate_errant_processes(
Args:
node: The DNS resolvable name of the node to terminate processes on
uid_whitelist: Do not terminate processes owned by the given UID
uid_whitelist: Do not terminate processes owned by the given UIDs
ssh_limit: Semaphore object used to limit concurrent SSH connections
ssh_options: Options for configuring the outbound SSH connection
debug: Log which process to terminate but do not terminate them
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/test_base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class BaseParsing(TestCase):
"""Test custom parsing login encapsulated by the `BaseParser` class"""
"""Test custom parsing logic encapsulated by the `BaseParser` class"""

def test_errors_raise_system_exit(self) -> None:
"""Test error messages are raised as `SystemExit` instances"""
Expand Down
25 changes: 14 additions & 11 deletions tests/utils/test_exclude_active_slurm_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,34 @@ class ExcludeSlurmUsers(unittest.TestCase):
"""Test the identification of slurm users from a DataFrame of process data"""

def test_exclude_slurm_users(self) -> None:
"""Test slurm processes are excluded from the returned DataFrame"""

# User 1002 has two processes.
# They BOTH should be excluded because ONE of them is a slurm process.
input_df = pd.DataFrame({
'UID': [1001, 1002, 1003, 1004, 1005],
'CMD': ['process_1', 'slurmd', 'process_3', 'process_4', 'process_5']})
'UID': [1001, 1002, 1002, 1003, 1004],
'CMD': ['process 1', 'slurmd ...', 'process 3', 'process 4', 'process5']})

expected_df = input_df.loc[[0, 2, 3, 4]]
expected_df = input_df.loc[[0, 3, 4]]
returned_df = exclude_active_slurm_users(input_df)

pd.testing.assert_frame_equal(returned_df, expected_df)
self.assertIsNot(returned_df, input_df)

def test_no_slurm_users(self) -> None:
"""Test the returned dataframe matches the input dataframe when there are no slurm processes"""

input_df = pd.DataFrame({
'UID': [1001, 1002, 1003, 1004, 1005],
'CMD': ['process_1', 'process_2', 'process_3', 'process_4', 'process_5']})
'CMD': ['process1', 'process2', 'process3', 'process4', 'process5']})

returned_df = exclude_active_slurm_users(input_df)

pd.testing.assert_frame_equal(returned_df, input_df)
self.assertIsNot(returned_df, input_df)

def test_all_slurm_users(self) -> None:
"""Test the returned dataframe is empty when all process container `slurmd`"""

input_df = pd.DataFrame({
'UID': [1001, 1002, 1003, 1004, 1005],
'CMD': ['slurmd', 'slurmd', 'slurmd', 'slurmd', 'slurmd']})
'UID': [1001, 1002, 1003],
'CMD': ['slurmd', 'prefix slurmd', 'slurmd postfix']})

returned_df = exclude_active_slurm_users(input_df)
self.assertTrue(returned_df.empty)
self.assertIsNot(returned_df, input_df)
39 changes: 11 additions & 28 deletions tests/utils/test_include_orphaned_processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,48 +15,31 @@ def test_orphaned_processes_are_returned(self) -> None:

input_df = pd.DataFrame({
'PID': [1, 2, 3, 4, 5],
'PPID': [0, INIT_PROCESS_ID, INIT_PROCESS_ID, 2000, 2000],
'PGID': [0, 0, 0, 0, 0],
'UID': [0, 1, 1, 2, 2],
'CMD': ['init', 'process_1', 'process_2', 'process_3', 'process_4']})

# Manually define the data subset (including the index) expected in the function return
expected_output_df = pd.DataFrame({
'PID': [2, 3],
'PPID': [INIT_PROCESS_ID, INIT_PROCESS_ID],
'PGID': [0, 0],
'UID': [1, 1],
'CMD': ['process_1', 'process_2']},
index=[1, 2])

pd.testing.assert_frame_equal(
include_orphaned_processes(input_df),
expected_output_df)
'PPID': [0, INIT_PROCESS_ID, INIT_PROCESS_ID, 2000, 2000]
})

expected_df = input_df.loc[[1, 2]]
returned_df = include_orphaned_processes(input_df)
pd.testing.assert_frame_equal(returned_df, expected_df)

def test_no_orphaned_processes(self) -> None:
"""Test the returned DataFrame is empty when no orphaned processes are present"""

input_df = pd.DataFrame({
'PID': [1, 2, 3, 4, 5],
'PPID': [0, 2, 3, 4, 4],
'PGID': [0, 0, 0, 0, 0],
'UID': [0, 1, 1, 2, 2],
'CMD': ['init', 'process_1', 'process_2', 'process_3', 'process_4']})
'PPID': [0, 2, 3, 4, 4]
})

self.assertTrue(include_orphaned_processes(input_df).empty)
returned_df = include_orphaned_processes(input_df)
self.assertTrue(returned_df.empty)

def test_all_orphaned_processes(self) -> None:
"""Test the returned DataFrame matches the input DataFrame when all processes are orphaned"""

input_df = pd.DataFrame({
'PID': [2, 3, 4, 5],
'PPID': [INIT_PROCESS_ID, INIT_PROCESS_ID, INIT_PROCESS_ID, INIT_PROCESS_ID],
'PGID': [0, 0, 0, 0],
'UID': [1, 1, 2, 2],
'CMD': ['process_1', 'process_2', 'process_3', 'process_4']})
})

returned_df = include_orphaned_processes(input_df)
pd.testing.assert_frame_equal(returned_df, input_df)

# Make sure the return value is an equivalent copy and not just the original DataFrame
self.assertIsNot(returned_df, input_df)
9 changes: 2 additions & 7 deletions tests/utils/test_include_user_whitelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd

from shinigami.utils import INIT_PROCESS_ID, include_user_whitelist
from shinigami.utils import include_user_whitelist


class UserIDs(TestCase):
Expand All @@ -17,12 +17,7 @@ def setUp(self) -> None:
processes, and multiple user IDs.
"""

self.testing_data = pd.DataFrame({
'PID': [INIT_PROCESS_ID, 100, 101, 102, 103],
'PPID': [0, INIT_PROCESS_ID, INIT_PROCESS_ID, 100, 200],
'PGID': [0, 1, 1, 2, 3],
'UID': [0, 123, 123, 456, 789],
'CMD': ['init', 'process_1', 'process_2', 'process_3', 'process_4']})
self.testing_data = pd.DataFrame({'UID': [0, 123, 123, 456, 789]})

def test_empty_whitelist(self) -> None:
"""Test the returned DataFrame is empty when the whitelist is empty"""
Expand Down

0 comments on commit bed939f

Please sign in to comment.