From e1afb248715039a18477111b61f51a377013b5fe Mon Sep 17 00:00:00 2001 From: Garand Tyson Date: Wed, 20 Nov 2024 17:48:08 -0800 Subject: [PATCH 1/2] Improves stellar-core-debug-info script and adds docs --- scripts/README.md | 12 +++ scripts/stellar-core-debug-info | 125 ++++++++++++++++++++++++++------ 2 files changed, 116 insertions(+), 21 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index bd88c41ff3..20da2a0f11 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -56,6 +56,18 @@ This folder is for storing any scripts that may be helpful for using stellar-cor ./src/stellar-core(+0x34f0c1) [0x55c7cd1000c1]" ``` +### Stellar Core Debug Info + +- Name - `stellar-core-debug-info` +- Description - Gathers useful information about core state in order to help debug crashes. This includes collecting log files, bucket directories, +SQL DB state, status reported by `offline-info`, and OS information for the given node. +- Usage - Ex. `stellar-core-debug-info /tmp/stellarCoreDumpOutputDirectory`. This script requires a destination directory to write temporary files to and the resulting +zip file of the collected debug information. Note that secret seeds from config files are automatically redacted. +If the given output directory does not exist, the script will attempt to create it. By default, the script checks +the `stellar-core.service` file to determine correct paths of the stellar-core executable and config file. From the config file, the script will +then parse the path of log files, bucket directory, and SQL DB. All these fields can be manually overridden as well, see +`stellar-core-debug-info --help` for specific flags. + ### Soroban Settings Helper - Name - `settings-helper.sh` - Prequisites - `stellar-xdr` and `stellar-core` diff --git a/scripts/stellar-core-debug-info b/scripts/stellar-core-debug-info index 1b67788379..14744844c1 100755 --- a/scripts/stellar-core-debug-info +++ b/scripts/stellar-core-debug-info @@ -15,25 +15,78 @@ import time def parse_args(): parser = argparse.ArgumentParser(description='Gathers information about host and stellar-core') - parser.add_argument('-d', '--dest', required=False, type=str, help='Pre-existing path to use for scratch space and.' - 'storing results. The script will create new subdirectory under this path.', - default='/var/lib/stellar/') - parser.add_argument('-c', '--core-config', required=False, type=str, help='Path to the stellar-core config file', - default='/etc/stellar/stellar-core.cfg') - parser.add_argument('-l', '--log-dir', required=False, type=str, help='Path where logs are written to.' - 'If not set we will try to find it in the config or use /var/log/stellar/ location.' + parser.add_argument('outputDir', type=str, help='Path to directory to use for scratch space and ' + 'storing results. The script will create the directory if it does not exist and a new subdirectory under this path.') + parser.add_argument('-c', '--core-config', required=False, type=str, help='Path to the stellar-core config file. ' + 'If not set we will try to find it in the service file.') + parser.add_argument('-l', '--log-dir', required=False, type=str, help='Path where logs are written to. ' + 'If not set we will try to find it in the config. ' 'Set to string "disabled" to exclude logs.') - parser.add_argument('-b', '--bucket-dir', required=False, type=str, help='Path where buckets are written to.' - 'If not set we will try to find it in the config or use /var/lib/stellar/buckets location.' + parser.add_argument('-b', '--bucket-dir', required=False, type=str, help='Path where buckets are written to. ' + 'If not set we will try to find it in the config. ' 'Set to string "disabled" to exclude buckets directory.') - parser.add_argument('-p', '--core-path', required=False, type=str, help='Path to the stellar-core binary' - 'If not set "stellar-core" will be used.', - default='stellar-core') - parser.add_argument('-s', '--sqlite-path', required=False, type=str, help='Path to the sqlite database.' - 'If not set we will try to find it in the config or use /var/lib/stellar/stellar.db location.' + parser.add_argument('-p', '--core-path', required=False, type=str, help='Path to the stellar-core binary. ' + 'If not set "stellar-core" will be used.', default='stellar-core') + parser.add_argument('-s', '--sqlite-path', required=False, type=str, help='Path to the sqlite database. ' + 'If not set we will try to find it in the config. ' 'Set to string "disabled" to exclude sqlite.') return parser.parse_args() +def get_service_exec_start(): + service_name = "stellar-core.service" + try: + # Use systemctl to retrieve the service file content + result = subprocess.run( + ["systemctl", "cat", service_name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + if result.returncode != 0: + raise Exception(result.stderr.strip()) + + # Parse the service file content + exec_start = None + for line in result.stdout.splitlines(): + if line.strip().startswith("ExecStart="): + exec_start = line.split("=", 1)[1].strip() + break + + if exec_start: + return exec_start + else: + raise ValueError(f"No 'ExecStart' found in {service_name} service file.") + except Exception as e: + return f"Error: {e}" + +def extract_paths(exec_start): + try: + # Extract the first path (the command) + first_path = re.search(r"^([^\s]+)", exec_start).group(1) + + # Extract the config file path after the --conf flag + conf_path = re.search(r"--conf\s+([^\s]+\.cfg)", exec_start) + conf_path = conf_path.group(1) if conf_path else None + + return first_path, conf_path + except Exception as e: + return f"Error: {e}", None + +def get_full_path_for_file(file): + # If the file is a relative or absolute path + if file.startswith("./") or file.startswith("../") or os.path.sep in file or file.startswith("~"): + return os.path.abspath(os.path.expanduser(file)) + + return file + +def get_full_path_for_command(command): + # If the file is a relative or absolute path + if command.startswith("./") or command.startswith("../") or os.path.sep in command or command.startswith("~"): + return os.path.abspath(os.path.expanduser(command)) + else: + # If it's just a command, search for it in PATH + return shutil.which(command) class Gatherer(object): def catch_errors(func): @@ -48,8 +101,8 @@ class Gatherer(object): def __init__(self, args): timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") - self.base_dir = args.dest - self.scratch_dir = os.path.join(args.dest, f'stellar-core-debug-info-{timestamp}') + self.base_dir = get_full_path_for_file(args.outputDir) + self.scratch_dir = os.path.join(self.base_dir, f'stellar-core-debug-info-{timestamp}') self.tgz_file = f'{self.scratch_dir}.tar.gz' self.core_config = args.core_config self.core_path = args.core_path @@ -59,8 +112,34 @@ class Gatherer(object): self.header_template = '#####################\n# {}\n#####################\n' def pre_flight(self): - if not os.path.isdir(self.base_dir) or not os.access(self.base_dir, os.W_OK): - print(f"Error: destination directory must exist and be writable: {self.scratch_dir}") + if not self.core_config: + try: + exec_start = get_service_exec_start() + self.core_path, self.core_config = extract_paths(exec_start) + except Exception as e: + print(f"Could not parse stellar-core config file from service file, please provide it with --core-config flag.") + return False + else: + # If the paths are not absolute, make them absolute + self.core_config = get_full_path_for_file(self.core_config) + self.core_path = get_full_path_for_command(self.core_path) + if not self.core_path: + print("Error: stellar-core command not found, please specify executable with --core-path flag") + return False + + if os.path.exists(self.base_dir) and not os.path.isdir(self.base_dir): + print(f"Error: destination path {self.base_dir} exists but is not a directory") + return False + + if not os.path.exists(self.base_dir): + try: + os.mkdir(self.base_dir, mode=0o755) + except: # noqa: E722 + print(f'Error: failed to create destination directory {self.base_dir}') + return False + + if not os.access(self.base_dir, os.W_OK): + print(f"Error: destination directory must be writable: {self.scratch_dir}") return False try: @@ -77,9 +156,13 @@ class Gatherer(object): print(f"Error: can't read core config file: {self.core_config}. Maybe you need --core-config flag?") return False - user = pwd.getpwuid(os.getuid()).pw_name - if user not in ['root', 'stellar']: - print(f'Warning: the script should normaly be run as stellar or root user. Running as {user}') + # Check if stellar-core executable exists and is executable + if not os.path.isfile(self.core_path): + print(f"Error: stellar-core binary not found, have you specified a full path?: {self.core_path}") + return False + + if not os.access(self.core_path, os.X_OK): + print("Warning: user does not have permission to run stellar-core, debug info will be limited!") return True From 0f7fe887928f75973ae14dd95adc6057e96dbc00 Mon Sep 17 00:00:00 2001 From: Garand Tyson Date: Thu, 21 Nov 2024 13:55:51 -0800 Subject: [PATCH 2/2] Added docker support for debug script --- scripts/stellar-core-debug-info | 87 ++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/scripts/stellar-core-debug-info b/scripts/stellar-core-debug-info index 14744844c1..fdd9e0ad30 100755 --- a/scripts/stellar-core-debug-info +++ b/scripts/stellar-core-debug-info @@ -26,39 +26,44 @@ def parse_args(): 'If not set we will try to find it in the config. ' 'Set to string "disabled" to exclude buckets directory.') parser.add_argument('-p', '--core-path', required=False, type=str, help='Path to the stellar-core binary. ' - 'If not set "stellar-core" will be used.', default='stellar-core') + 'If not set "stellar-core" will be used.') parser.add_argument('-s', '--sqlite-path', required=False, type=str, help='Path to the sqlite database. ' 'If not set we will try to find it in the config. ' 'Set to string "disabled" to exclude sqlite.') return parser.parse_args() +def is_docker(): + def text_in_file(text, filename): + try: + with open(filename, encoding='utf-8') as lines: + return any(text in line for line in lines) + except OSError: + return False + cgroup = '/proc/self/cgroup' + return os.path.exists('/.dockerenv') or text_in_file('docker', cgroup) + def get_service_exec_start(): + # Use systemctl to retrieve the service file content service_name = "stellar-core.service" - try: - # Use systemctl to retrieve the service file content - result = subprocess.run( - ["systemctl", "cat", service_name], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True - ) - - if result.returncode != 0: - raise Exception(result.stderr.strip()) - - # Parse the service file content - exec_start = None - for line in result.stdout.splitlines(): - if line.strip().startswith("ExecStart="): - exec_start = line.split("=", 1)[1].strip() - break - - if exec_start: - return exec_start - else: - raise ValueError(f"No 'ExecStart' found in {service_name} service file.") - except Exception as e: - return f"Error: {e}" + result = subprocess.check_output( + ["systemctl", "cat", service_name], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + # Parse the service file content + exec_start = None + for line in result: + print(f'line: {line}') + if line.strip().startswith("ExecStart="): + exec_start = line.split("=", 1)[1].strip() + break + + if exec_start: + return exec_start + else: + raise ValueError(f"No 'ExecStart' found in {service_name} service file.") def extract_paths(exec_start): try: @@ -113,19 +118,35 @@ class Gatherer(object): def pre_flight(self): if not self.core_config: + # First try to get the stellar-core config from the service file try: exec_start = get_service_exec_start() self.core_path, self.core_config = extract_paths(exec_start) except Exception as e: - print(f"Could not parse stellar-core config file from service file, please provide it with --core-config flag.") - return False + pass + + # Couldn't find service file, check if we're running in docker + if not self.core_config or not self.core_path: + # If script is run in docker, try default docker paths + if is_docker(): + self.core_config = '/etc/stellar/stellar-core.cfg' + self.core_path = '/usr/bin/stellar-core' + else: + print("Error: could not find stellar-core config file in service file or docker container, please specify with --core-config flag") + return False + else: + # Default to stellar-core if path not specified + if not self.core_path or self.core_path == 'stellar-core': + self.core_path = get_full_path_for_command('stellar-core') + if not self.core_path: + print("Error: stellar-core command not found, please specify executable with --core-path flag") + return False + else: + self.core_path = get_full_path_for_command(self.core_path) + # If the paths are not absolute, make them absolute self.core_config = get_full_path_for_file(self.core_config) - self.core_path = get_full_path_for_command(self.core_path) - if not self.core_path: - print("Error: stellar-core command not found, please specify executable with --core-path flag") - return False if os.path.exists(self.base_dir) and not os.path.isdir(self.base_dir): print(f"Error: destination path {self.base_dir} exists but is not a directory") @@ -158,7 +179,7 @@ class Gatherer(object): # Check if stellar-core executable exists and is executable if not os.path.isfile(self.core_path): - print(f"Error: stellar-core binary not found, have you specified a full path?: {self.core_path}") + print(f"Error: stellar-core binary not found at {self.core_path}, have you specified a full path?") return False if not os.access(self.core_path, os.X_OK):