diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..5c3f7cae --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,25 @@ +--- +version: "2.1" +services: + openssh-server: + image: lscr.io/linuxserver/openssh-server:latest + container_name: openssh-server + hostname: openssh-server #optional + environment: + - PUID=1000 + - PGID=1000 + - TZ=Etc/UTC + # - PUBLIC_KEY=yourpublickey #optional + # - PUBLIC_KEY_FILE=/path/to/file #optional + # - PUBLIC_KEY_DIR=/path/to/directory/containing/_only_/pubkeys #optional + # - PUBLIC_KEY_URL=https://github.com/username.keys #optional + - SUDO_ACCESS=true #optional + - PASSWORD_ACCESS=true #optional + - USER_PASSWORD=datashuttle #optional + # - USER_PASSWORD_FILE=/path/to/file #optional + - USER_NAME=linuxserver.io #optional + volumes: + - /path/to/appdata/config:/config + ports: + - 2222:2222 + restart: unless-stopped diff --git a/tests/configs_for_ssh_tests.yaml b/tests/configs_for_ssh_tests.yaml new file mode 100644 index 00000000..ddbb6ad6 --- /dev/null +++ b/tests/configs_for_ssh_tests.yaml @@ -0,0 +1,10 @@ +'test_ssh': + false +'username': + 'joeziminski' +'remote_host_id': + 'localhost' +'test_data_filesystem_path': + false +'test_data_server_path': + false diff --git a/tests/conftest.py b/tests/conftest.py index 90a121b6..770c4a20 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,46 +1,128 @@ -""" -Test configs, used for setting up SSH tests. - -Before running these tests, it is necessary to setup -an SSH key. This can be done through datashuttle -ssh.setup_ssh_key(project.cfg, log=False). - -Store this path somewhere outside of the test environment, -and it will be copied to the project test folder before testing. - -FILESYSTEM_PATH and SERVER_PATH these must point -to the same folder on the HPC, filesystem, -as a moutned drive and server as the linux path to -connect through SSH -""" -import platform +import os +from pathlib import Path from types import SimpleNamespace import pytest import test_utils +import yaml -test_ssh = False -username = "jziminski" -remote_host_id = "hpc-gw1.hpc.swc.ucl.ac.uk" -server_path = r"/ceph/neuroinformatics/neuroinformatics/scratch/datashuttle_tests/fake_data" +from datashuttle.utils import utils -if platform.system() == "Windows": - ssh_key_path = r"C:\Users\Joe\.datashuttle\test_file_conflicts_ssh_key" - filesystem_path = "X:/neuroinformatics/scratch/datashuttle_tests/fake_data" +def get_canonical_test_dir_names(initialise=False): + """ + SSH tests are run by copying data from a `local` project folder to a + `remote` project folder through SSH. At present, the folder that is + SSH too must also be accessible through the local filesystem (e.g. if + SSH to a HPC, then the target directly must also be accessible through + the local filesystem e.g. through a mounted drive). + `test_data_filesystem_path` gives the local filesystem path to the folder, + `test_data_server_path` gives the path after SSH connection to the folder. + If SSH to `localhost`, both these paths will be the same. -else: - ssh_key_path = "/home/joe/test_file_conflicts_ssh_key" - filesystem_path = "/home/joe/ceph_mount/neuroinformatics/scratch/datashuttle_tests/fake_data" + Returns + ------- + + save_ssh_key_project_name : Canonical project name used to store SSH key. + The test-project configs are deleted during + tear-down and so any pre-set SSH key pair must + be stored outside of this project. + `save_ssh_key_project_name` stores the project + name used internally for storing the SSH key + setup with `run_to_setup_ssh_test_key.py` + which is copied to the test-project configs + on test set up. + + test_data_filesystem_path : The path to the testing folder on the + filesystem where the `local` and `remote` + project folders be created during tests. + For SSH testing, this should be the local + filesystem path to the server path i.e. a + mounted drive. e.g. + W:/home/user/datashuttle_tests By default + (e.g. used for SSH tests to `localhost` this + is /home/.datashuttle_tests) + + test_data_server_path : Path to the testing folder once SSH connected to + the target server. For example + /home/user/datashuttle_tests. By default (e.g. + used for SSH tests to `localhost` this is + /home/.datashuttle_tests) + + ssh_key_path : Path to the SSH key setup by `run_to_setup_ssh_test_key.py`. + This is copied to the test-project during setup. + + config["test_ssh"] : bool indicating whether SSH tests should be run. + + config["username"] : Username for the account to connect with via SSH. + + config["remote_host_id"] : Target for SSH connection e.g. HPC address, + `localhost` for SSH to host machine. Note + sshing to `localhost` is only available on + macOS and linux. + """ + # Load SSH setup configs + config_filepath = ( + Path(os.path.dirname(os.path.realpath(__file__))) + / "configs_for_ssh_tests.yaml" + ) + + with open(config_filepath) as file: + config = yaml.full_load(file) + + # Check pre-setup SSH key-pair is set up + save_ssh_key_project_name = "tests_ssh_key_holding_project" + ssh_key_path = ( + utils.get_datashuttle_path(save_ssh_key_project_name)[0] + / f"{save_ssh_key_project_name}_ssh_key" + ) + + if not initialise and config["test_ssh"] and not ssh_key_path.is_file(): + raise FileNotFoundError( + f"Must run `run_to_setup_ssh_test_key` before running " + f"SSH tests. {ssh_key_path}" + ) + + # Load filesystem and remote data paths + if not config["test_data_filesystem_path"]: + test_data_filesystem_path = Path.home() / ".datashuttle_tests" + else: + test_data_filesystem_path = config["test_data_filesystem_path"] + + if not config["test_data_server_path"]: + test_data_server_path = Path.home() / ".datashuttle_tests" + else: + test_data_server_path = config["test_data_server_path"] + + return ( + save_ssh_key_project_name, + test_data_filesystem_path, + test_data_server_path, + ssh_key_path, + config["test_ssh"], + config["username"], + config["remote_host_id"], + ) def pytest_configure(config): + + ( + __, + test_data_filesystem_path, + test_data_server_path, + ssh_key_path, + test_ssh, + username, + remote_host_id, + ) = get_canonical_test_dir_names() + pytest.ssh_config = SimpleNamespace( TEST_SSH=test_ssh, SSH_KEY_PATH=ssh_key_path, USERNAME=username, REMOTE_HOST_ID=remote_host_id, - FILESYSTEM_PATH=filesystem_path, # FILESYSTEM_PATH and SERVER_PATH these must point to the same folder on the HPC, filesystem - SERVER_PATH=server_path, # as a mounted drive and server as the linux path to connect through SSH + FILESYSTEM_PATH=str(test_data_filesystem_path), + SERVER_PATH=str(test_data_server_path), ) test_utils.set_datashuttle_loggers(disable=True) diff --git a/tests/setup_ssh_test_keys.py b/tests/setup_ssh_test_keys.py new file mode 100644 index 00000000..9d3a944f --- /dev/null +++ b/tests/setup_ssh_test_keys.py @@ -0,0 +1,38 @@ +""" +This script will setup an SSH key pair between the SSH target +specified in `setup_configs_for_ssh_tests.yaml`. This will require +one-time password entry. + +The SSH key pair is saved to a holding datashuttle project +specified in `conftest.py`. During setup of an SSH test project, +this SSH key pair is copied from the holding datashuttle +project to the SSH test-project. This means that when the test +project is deleted during tear-down, the SSH key is still available +for future testing +""" +import conftest + +from datashuttle.datashuttle import DataShuttle + +( + save_ssh_key_project_name, + test_data_filesystem_path, + test_data_server_path, + ssh_key_path, + __, + username, + remote_host_id, +) = conftest.get_canonical_test_dir_names(initialise=True) + +project = DataShuttle(save_ssh_key_project_name) + +project.make_config_file( + test_data_filesystem_path / "local", + test_data_server_path / "remote", + "ssh", + remote_host_id=remote_host_id, + remote_host_username=username, + use_behav=True, +) + +project.setup_ssh_connection_to_remote_server() diff --git a/tests/ssh_test_utils.py b/tests/tests_integration/ssh_tests/ssh_test_utils.py similarity index 67% rename from tests/ssh_test_utils.py rename to tests/tests_integration/ssh_tests/ssh_test_utils.py index 07e095ee..7051c04f 100644 --- a/tests/ssh_test_utils.py +++ b/tests/tests_integration/ssh_tests/ssh_test_utils.py @@ -1,11 +1,12 @@ import builtins import copy +import shutil from datashuttle.utils import rclone, ssh def setup_project_for_ssh( - project, remote_path, remote_host_id, remote_host_username + project, remote_path, ssh_config, setup_ssh_connection=True ): """ Setup the project configs to use SSH connection @@ -15,16 +16,21 @@ def setup_project_for_ssh( "remote_path", remote_path, ) - project.update_config("remote_host_id", remote_host_id) - project.update_config("remote_host_username", remote_host_username) + project.update_config("remote_host_id", ssh_config.REMOTE_HOST_ID) + project.update_config("remote_host_username", ssh_config.USERNAME) project.update_config("connection_method", "ssh") - rclone.setup_remote_as_rclone_target( - "ssh", - project.cfg, - project.cfg.get_rclone_config_name("ssh"), - project.cfg.ssh_key_path, - ) + if setup_ssh_connection: + setup_hostkeys(project) + + shutil.copy(ssh_config.SSH_KEY_PATH, project.cfg.ssh_key_path) + + rclone.setup_remote_as_rclone_target( + "ssh", + project.cfg, + project.cfg.get_rclone_config_name("ssh"), + project.cfg.ssh_key_path, + ) def setup_mock_input(input_): diff --git a/tests/tests_integration/test_file_conflicts_pathtable.py b/tests/tests_integration/ssh_tests/test_file_conflicts_pathtable.py similarity index 100% rename from tests/tests_integration/test_file_conflicts_pathtable.py rename to tests/tests_integration/ssh_tests/test_file_conflicts_pathtable.py diff --git a/tests/tests_integration/test_ssh_file_transfer.py b/tests/tests_integration/ssh_tests/test_ssh_file_transfer.py similarity index 83% rename from tests/tests_integration/test_ssh_file_transfer.py rename to tests/tests_integration/ssh_tests/test_ssh_file_transfer.py index 22dd8c58..69e36044 100644 --- a/tests/tests_integration/test_ssh_file_transfer.py +++ b/tests/tests_integration/ssh_tests/test_ssh_file_transfer.py @@ -1,9 +1,6 @@ -""" -""" import copy import glob import shutil -import time from pathlib import Path import pandas as pd @@ -17,12 +14,12 @@ class TestFileTransfer: @pytest.fixture( scope="class", - params=[ # Set running SSH or local filesystem + params=[ False, pytest.param( True, marks=pytest.mark.skipif( - ssh_config.TEST_SSH is False, + not ssh_config.TEST_SSH, reason="TEST_SSH is set to False.", ), ), @@ -41,28 +38,13 @@ def pathtable_and_project(self, request, tmpdir_factory): SSH to must also be mounted and the path supplied to the location SSH'd to. - For speed, create the project once, - and all files to transfer. Then in the - test function, the folder are transferred. - Partial cleanup is done in the test function - i.e. deleting the remote_path to which the - items have been transferred. This is achieved + For speed, create the project once (including + all files to transfer) once. This is achieved by using "class" scope. - - pathtable is a convenient way to represent - file paths for testing against. - - NOTE: for convenient, files are transferred - with SSH and then checked through the local filesystem - mount. This is significantly easier than checking - everything through SFTP. However, on Windows the - mounted filesystem is quite slow to update, taking - a few seconds after SSH transfer. This makes the - tests run very slowly. We can get rid - of this limitation on linux. """ testing_ssh = request.param tmp_path = tmpdir_factory.mktemp("test") + test_project_name = "test_file_conflicts" if testing_ssh: base_path = ssh_config.FILESYSTEM_PATH @@ -70,7 +52,6 @@ def pathtable_and_project(self, request, tmpdir_factory): else: base_path = tmp_path / "test with space" remote_path = base_path - test_project_name = "test_file_conflicts" project, cwd = test_utils.setup_project_fixture( base_path, test_project_name @@ -82,14 +63,9 @@ def pathtable_and_project(self, request, tmpdir_factory): test_utils.make_test_path( remote_path, test_project_name, "remote" ), - ssh_config.REMOTE_HOST_ID, - ssh_config.USERNAME, + ssh_config, ) - # Initialise the SSH connection - ssh_test_utils.setup_hostkeys(project) - shutil.copy(ssh_config.SSH_KEY_PATH, project.cfg.file_path.parent) - pathtable = get_pathtable(project.cfg["local_path"]) self.create_all_pathtable_files(pathtable) project.testing_ssh = testing_ssh @@ -206,11 +182,6 @@ def test_all_data_transfer_options( ) expected_transferred_paths = remote_base_paths / expected_paths.path - # When transferring with SSH, there is a delay before - # filesystem catches up - if project.testing_ssh: - time.sleep(10) - # Check what paths were actually moved # (through the local filesystem), and test path_to_search = ( @@ -231,9 +202,9 @@ def test_all_data_transfer_options( except FileNotFoundError: pass - # --------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------- # Utils - # --------------------------------------------------------------------------------------------------------------- + # ------------------------------------------------------------------------- def query_table(self, pathtable, arguments): """ @@ -293,7 +264,9 @@ def make_pathtable_search_filter(self, sub_names, ses_names, data_type): else: if "histology" in data_type: sub_ses_dtype_arguments += [ - f"(parent_sub == '{sub}' & (parent_data_type == 'histology' | parent_data_type == 'histology'))" + f"(parent_sub == '{sub}' & " + f"(parent_data_type == 'histology' | " + f"parent_data_type == 'histology'))" ] for ses in ses_names: @@ -307,11 +280,16 @@ def make_pathtable_search_filter(self, sub_names, ses_names, data_type): for dtype in data_type: if dtype == "all_ses_level_non_data_type": extra_arguments += [ - f"(parent_sub == '{sub}' & parent_ses == '{ses}' & is_ses_level_non_data_type == True)" + f"(parent_sub == '{sub}' & " + f"parent_ses == '{ses}' & " + f"is_ses_level_non_data_type == True)" ] else: sub_ses_dtype_arguments += [ - f"(parent_sub == '{sub}' & parent_ses == '{ses}' & (parent_data_type == '{dtype}' | parent_data_type == '{dtype}'))" + f"(parent_sub == '{sub}' & " + f"parent_ses == '{ses}' & " + f"(parent_data_type == '{dtype}' | " + f"parent_data_type == '{dtype}'))" ] return sub_ses_dtype_arguments, extra_arguments diff --git a/tests/tests_integration/test_ssh_setup.py b/tests/tests_integration/ssh_tests/test_ssh_setup.py similarity index 73% rename from tests/tests_integration/test_ssh_setup.py rename to tests/tests_integration/ssh_tests/test_ssh_setup.py index 67c31c96..7e5c966e 100644 --- a/tests/tests_integration/test_ssh_setup.py +++ b/tests/tests_integration/ssh_tests/test_ssh_setup.py @@ -1,8 +1,5 @@ -""" -SSH configs are set in conftest.py . The password -should be stored in a file called test_ssh_password.txt located -in the same folder as test_ssh.py -""" +import getpass + import pytest import ssh_test_utils import test_utils @@ -16,8 +13,9 @@ class TestSSH: @pytest.fixture(scope="function") def project(test, tmp_path): """ - Make a project as per usual, but now add - in test ssh configurations + Setup a test project with the ssh options specified in + ssh_config. During setup, the SSH setup routine for a + project is not run, as it is tested here. """ tmp_path = tmp_path / "test with space" @@ -29,16 +27,16 @@ def project(test, tmp_path): ssh_test_utils.setup_project_for_ssh( project, ssh_config.FILESYSTEM_PATH, - ssh_config.REMOTE_HOST_ID, - ssh_config.USERNAME, + ssh_config, + setup_ssh_connection=False, ) yield project test_utils.teardown_project(cwd, project) - # ----------------------------------------------------------------- + # ------------------------------------------------------------------------- # Test Setup SSH Connection - # ----------------------------------------------------------------- + # ------------------------------------------------------------------------- @pytest.mark.parametrize("input_", ["n", "o", "@"]) def test_verify_ssh_remote_host_do_not_accept( @@ -97,3 +95,26 @@ def test_generate_and_write_ssh_key(self, project): first_line = file.readlines()[0] assert first_line == "-----BEGIN RSA PRIVATE KEY-----\n" + + def test_setup_ssh_key_failure(self, project): + """ + Enter the wrong password and check failure is gracefully handled. + Successful password entry is not tested as would require + password stored locally. + """ + ssh_test_utils.setup_hostkeys(project) + + getpass.getpass = lambda _: "wrong_password" # type: ignore + + with pytest.raises(BaseException) as e: + ssh.setup_ssh_key( + project.cfg, + log=False, + ) + + assert ( + "Could not connect to server. Ensure that " + "\n1) You have run setup_ssh_connection_to_remote_server() " + "\n2) You are on VPN network if required. " + "\n3) The remote_host_id:" in str(e.value) + )