From 2c623189c5d979157976248b041059546ab9156c Mon Sep 17 00:00:00 2001 From: Oleg Kalashev Date: Thu, 1 Feb 2024 21:52:29 +0500 Subject: [PATCH] fixing issue #116 cleanup fail in case of exception in nbadapter.py:429 (#117) * #116 fix * check for git-lfs error added * advanced check for git clone error added * using GitPython for git clone * fixing import error * git-lfs error message was modified * Update tests/test_nbadapter.py --------- Co-authored-by: Volodymyr --- nb2workflow/nbadapter.py | 24 ++++++++++++++---------- requirements.txt | 3 ++- setup.py | 3 ++- tests/conftest.py | 12 ++++++++++++ tests/test_nbadapter.py | 23 +++++++++++++++++++++++ 5 files changed, 53 insertions(+), 12 deletions(-) diff --git a/nb2workflow/nbadapter.py b/nb2workflow/nbadapter.py index 211976d4..68634f31 100644 --- a/nb2workflow/nbadapter.py +++ b/nb2workflow/nbadapter.py @@ -31,7 +31,7 @@ from nb2workflow.json import CustomJSONEncoder from nb2workflow.semantics import understand_comment_references, oda_ontology_prefix - +from git import Repo, InvalidGitRepositoryError, GitCommandError import logging logger=logging.getLogger(__name__) @@ -431,21 +431,25 @@ def execute(self, parameters, progress_bar = True, log_output = True, inplace=Fa return exceptions def _execute(self, parameters, progress_bar = True, log_output = True, inplace=False, callback_url=None, tmpdir_key=None): - if not inplace : tmpdir = self.new_tmpdir(tmpdir_key) logger.info("new tmpdir: %s", tmpdir) - + repo_dir = os.path.dirname(os.path.realpath(self.notebook_fn)) try: - output = subprocess.check_output(["git","clone", "--recurse-submodules", os.path.dirname(os.path.realpath(self.notebook_fn)), tmpdir]) - # output = subprocess.check_output(["git","clone", "--depth", "1", "file://" + os.path.dirname(os.path.realpath(self.notebook_fn)), tmpdir]) - logger.info("git clone output: %s", output) - except Exception as e: - logger.warning("git clone failed: %s, will attempt copytree", e) - + repo = Repo(repo_dir) + repo.clone(tmpdir, multi_options=["--recurse-submodules"]) + except InvalidGitRepositoryError: + logger.warning(f"repository {repo_dir} is invalid, will attempt copytree") os.rmdir(tmpdir) - shutil.copytree(os.path.dirname(os.path.realpath(self.notebook_fn)), tmpdir) + except GitCommandError as e: + logger.warning(f"git command error: {e}") + if 'git-lfs' in str(e): + # this error may occur if the repo was originally cloned by the different version of git utility + # e.g. when repo is mounted with docker run -v + raise Exception("We got some problem cloning the repository, the problem seems to be related to git-lfs. You might want to try reinitializing git-lfs with 'git-lfs install; git-lfs pull'") + else: + raise e else: tmpdir =os.path.dirname(os.path.realpath(self.notebook_fn)) logger.info("executing inplace, no tmpdir is input dir: %s", tmpdir) diff --git a/requirements.txt b/requirements.txt index 11fe3244..a6587803 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,4 +24,5 @@ scrapbook matplotlib pytest werkzeug==2.0.3 -nbconvert>=7.0.0 \ No newline at end of file +nbconvert>=7.0.0 +GitPython \ No newline at end of file diff --git a/setup.py b/setup.py index 83fe244a..37a3b2cb 100644 --- a/setup.py +++ b/setup.py @@ -85,7 +85,8 @@ 'scrapbook', 'werkzeug==2.0.3', 'sentry_sdk', - 'rdflib' + 'rdflib', + 'GitPython' ], diff --git a/tests/conftest.py b/tests/conftest.py index 1013dc6f..493b4098 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,6 +32,18 @@ def test_notebook_repo(): return path +@pytest.fixture +def test_notebook_lfs_repo(): + path = os.environ.get('TEST_NOTEBOOK_LFS_REPO', None) + + if path is None: + path = os.path.join(os.getcwd(), 'tests/testlfsrepo/') + if os.path.exists(path): + shutil.rmtree(path) + subprocess.check_call(["git", "clone", "https://gitlab.renkulab.io/astronomy/mmoda/crbeam.git", path]) + + return path + @pytest.fixture def app(test_notebook): app = nb2workflow.service.app diff --git a/tests/test_nbadapter.py b/tests/test_nbadapter.py index 60c50061..fc1497e5 100644 --- a/tests/test_nbadapter.py +++ b/tests/test_nbadapter.py @@ -123,6 +123,29 @@ def test_nbadapter_repo(test_notebook_repo): validate_oda_dispatcher(nba) +@pytest.mark.skip(reason="Reproducing this condition in the test is difficult") +def test_nbadapter_lfs_repo(test_notebook_lfs_repo): + from nb2workflow.nbadapter import NotebookAdapter, find_notebooks, validate_oda_dispatcher + + nbas = find_notebooks(test_notebook_lfs_repo) + + assert len(nbas) >= 1 + + for nba_name, nba in nbas.items(): + print("notebook", nba_name) + + if os.path.exists(nba.output_notebook_fn): + os.remove(nba.output_notebook_fn) + + if os.path.exists(nba.preproc_notebook_fn): + os.remove(nba.preproc_notebook_fn) + + try: + nba.execute(dict()) + assert False, 'nba.execute is expected to fail' + except Exception as ex: + assert ex.message == "git-lfs is not initialized" + break def test_nbreduce(test_notebook): from nb2workflow.nbadapter import NotebookAdapter, nbreduce, setup_logging