From 40c210628f21e897ac3c1ec5f4b92e0c45cff1c4 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 09:03:00 -0500 Subject: [PATCH 001/118] Adjusted various URLs now that PR #492 merged. Expanded testing for the variations supported by `setup_fuse_install(...)` function. --- .github/actions/scala_build/action.yml | 6 +- python/mosaic/api/fuse.py | 14 +-- python/setup.cfg | 2 +- python/test/test_fuse_install.py | 114 ++++++++++++++++++ python/test/test_gdal_install.py | 2 + python/test/utils/setup_fuse.py | 49 ++++++++ python/test/utils/setup_gdal.py | 8 +- .../sql/test/SharedSparkSessionGDAL.scala | 4 - 8 files changed, 181 insertions(+), 18 deletions(-) create mode 100644 python/test/test_fuse_install.py create mode 100644 python/test/utils/setup_fuse.py diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index a25a35f26..651232edb 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -33,9 +33,9 @@ runs: sudo apt-get install -y gdal-bin libgdal-dev python3-gdal # - install gdal with numpy pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}' - sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so - sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30 - #sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 - name: Test and build the scala JAR - skip tests is false if: inputs.skip_tests == 'false' shell: bash diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index eb39609a6..76e8b7591 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -29,7 +29,7 @@ class SetupMgr: """ to_fuse_dir: str script_in_name: str = 'mosaic-gdal-init.sh' - script_out_name: str = 'mosaic-gdal-init.sh' + script_out_name: str = 'mosaic-fuse-init.sh' with_mosaic_pip: bool = False with_gdal: bool = True with_ubuntugis: bool = False @@ -56,12 +56,10 @@ def configure(self) -> None: elif mosaic_version is None: github_version = 'main' - # TODOS AFTER PR MERGED: - # [1] CHANGE URL TO ACTUAL MOSAIC (not 'mjohns-databricks'): - # 'https://raw.githubusercontent.com/databrickslabs/mosaic' - # [2] USE f'{GITHUB_CONTENT_URL_BASE}/{github_version}' (not 'gdal-jammy-1') - GITHUB_CONTENT_URL_BASE = 'https://raw.githubusercontent.com/mjohns-databricks/mosaic' - GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/gdal-jammy-1' + GITHUB_CONTENT_URL_BASE = 'https://raw.githubusercontent.com/databrickslabs/mosaic' + GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/v_{github_version}' + if github_version == 'main': + GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/{github_version}' # - generate fuse dir path os.makedirs(self.to_fuse_dir, exist_ok=True) @@ -145,7 +143,7 @@ def configure(self) -> None: # url and version details GITHUB_RELEASE_URL_BASE = 'https://github.com/databrickslabs/mosaic/releases' resource_version = github_version - if github_version is None: + if github_version == 'main': latest = str(requests.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] diff --git a/python/setup.cfg b/python/setup.cfg index e8d0eb2fe..b734e5fd1 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -18,7 +18,7 @@ classifiers = packages = find: python_requires = >=3.10.0 setup_requires = - pyspark==3.4.0 + pyspark==3.4.1 ipython>=7.22.0 install_requires = diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py new file mode 100644 index 000000000..8caa16c6e --- /dev/null +++ b/python/test/test_fuse_install.py @@ -0,0 +1,114 @@ +from .utils import SparkTestCase, FuseInstaller + + +class TestFuseInstall(SparkTestCase): + + def test_setup_no_op(self): + installer = FuseInstaller(False, False, jar_copy=False, jni_so_copy=False) + try: + installer.do_op() + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + self.assertEquals(len(installer.list_files()), 0) + + def test_setup_jar_only(self): + installer = FuseInstaller(False, False, jar_copy=True, jni_so_copy=False) + try: + installer.do_op() + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + files = installer.list_files() + self.assertEquals(len(files), 1) + self.assertEquals(files[0][-4:].lower(), '.jar') + + def test_setup_sh_pip_only(self): + installer = FuseInstaller(True, False, jar_copy=False, jni_so_copy=False) + try: + installer.do_op() + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + try: + installer_result = installer.run_init_script() + except Exception: + self.fail("Running fuse init script raised an exception.") + self.assertEqual(installer_result, 0) + + files = installer.list_files() + self.assertEquals(len(files), 1) + self.assertEquals(files[0][-3:].lower(), '.sh') + + def test_setup_sh_gdal(self): + installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=False) + try: + installer.do_op() + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + try: + installer_result = installer.run_init_script() + except Exception: + self.fail("Running fuse init script raised an exception.") + self.assertEqual(installer_result, 0) + + files = installer.list_files() + self.assertEquals(len(files), 1) + self.assertEquals(files[0][-3:].lower(), '.sh') + + def test_setup_sh_gdal_jni(self): + installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=True) + try: + installer.do_op() + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + try: + installer_result = installer.run_init_script() + except Exception: + self.fail("Running fuse init script raised an exception.") + self.assertEqual(installer_result, 0) + + files = installer.list_files() + self.assertEqual(len(files), 4) + + found_sh = False + so_cnt = 0 + for f in files: + if f.lower().endswith('.sh'): + found_sh = True + elif 'libgdalall.jni.so' in f.lower(): + so_cnt += 1 + self.assertTrue(found_sh) + self.assertEqual(so_cnt, 3) + + def test_setup_sh_all(self): + installer = FuseInstaller(True, True, jar_copy=True, jni_so_copy=True) + try: + installer.do_op() + except Exception: + self.fail("Executing `setup_fuse_install()` raised an exception.") + + try: + installer_result = installer.run_init_script() + except Exception: + self.fail("Running fuse init script raised an exception.") + self.assertEqual(installer_result, 0) + + files = installer.list_files() + self.assertEqual(len(files), 5) + + found_sh = False + found_jar = False + so_cnt = 0 + for f in files: + if f.lower().endswith('.sh'): + found_sh = True + elif f.lower().endswith('.jar'): + found_jar = True + elif 'libgdalall.jni.so' in f.lower(): + so_cnt += 1 + self.assertTrue(found_sh) + self.assertTrue(found_jar) + self.assertEqual(so_cnt, 3) diff --git a/python/test/test_gdal_install.py b/python/test/test_gdal_install.py index 3d2125fb2..79cbde1e5 100644 --- a/python/test/test_gdal_install.py +++ b/python/test/test_gdal_install.py @@ -18,3 +18,5 @@ def test_setup_gdal(self): gdalinfo_result = installer.test_gdalinfo() self.assertEqual(gdalinfo_result, "GDAL 3.4.1, released 2021/12/27\n") + + self.assertEquals(len(installer.list_files()), 1) # <- just init script \ No newline at end of file diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py new file mode 100644 index 000000000..7fcebda4c --- /dev/null +++ b/python/test/utils/setup_fuse.py @@ -0,0 +1,49 @@ +import os +import tempfile +import subprocess +from pkg_resources import working_set, Requirement + +from test.context import api + +FUSE_INIT_SCRIPT_FILENAME = "mosaic-fuse-init.sh" + +class FuseInstaller: + def __init__( + self, with_mosaic_pip, with_gdal, + jar_copy = False, jni_so_copy = False + ): + self._site_packages = working_set.find(Requirement("keplergl")).location + self._temp_dir = tempfile.TemporaryDirectory() + self.with_mosaic_pip = with_mosaic_pip + self.with_gdal = with_gdal + self.jar_copy = jar_copy + self.jni_so_copy = jni_so_copy + + def __del__(self): + self._temp_dir.cleanup() + + def do_op(self): + api.setup_fuse_install( + self._temp_dir.name, + self.with_mosaic_pip, + self.with_gdal, + self.jar_copy, + self.jni_so_copy, + script_out_name=FUSE_INIT_SCRIPT_FILENAME + ) + + def run_init_script(self): + fuse_install_script_target = os.path.join( + self._temp_dir.name, FUSE_INIT_SCRIPT_FILENAME + ) + os.chmod(fuse_install_script_target, mode=0x744) + result = subprocess.run( + [fuse_install_script_target], + stdout=subprocess.PIPE, + env=dict(os.environ, DATABRICKS_ROOT_VIRTUALENV_ENV=self._site_packages), + ) + print(result.stdout.decode()) + return result.returncode + + def list_files(self): + return os.listdir(self._temp_dir.name) diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index 4bd577a89..66b647db2 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -5,6 +5,7 @@ from test.context import api +GDAL_INIT_SCRIPT_FILENAME = "mosaic-gdal-init.sh" class GDALInstaller: def __init__(self): @@ -15,11 +16,11 @@ def __del__(self): self._temp_dir.cleanup() def copy_objects(self): - api.setup_gdal(self._temp_dir.name) + api.setup_gdal(self._temp_dir.name, script_out_name=GDAL_INIT_SCRIPT_FILENAME) def run_init_script(self): gdal_install_script_target = os.path.join( - self._temp_dir.name, "mosaic-gdal-init.sh" + self._temp_dir.name, GDAL_INIT_SCRIPT_FILENAME ) os.chmod(gdal_install_script_target, mode=0x744) result = subprocess.run( @@ -29,6 +30,9 @@ def run_init_script(self): ) print(result.stdout.decode()) return result.returncode + + def list_files(self): + return os.listdir(self._temp_dir.name) def test_gdalinfo(self): result = subprocess.run(["gdalinfo", "--version"], stdout=subprocess.PIPE) diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index 36da49694..7576e79c5 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -23,10 +23,6 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { val session = new TestSparkSession(conf) session.sparkContext.setLogLevel("FATAL") Try { - val tempPath = Files.createTempDirectory("mosaic-gdal") - // prepareEnvironment no longer exists - // - only have python calls now - //MosaicGDAL.prepareEnvironment(session, tempPath.toAbsolutePath.toString) MosaicGDAL.enableGDAL(session) } session From c0a1df1d8131fb358956c9bada8d739c5a0b51bf Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 09:42:21 -0500 Subject: [PATCH 002/118] Adjusted from https://raw.githubuserconent.com to https://github.com/../raw url pattern to avoid needing an additional github username header token. --- .github/actions/scala_build/action.yml | 6 +++--- CHANGELOG.md | 6 +++++- python/mosaic/api/fuse.py | 4 ++-- scripts/mosaic-gdal-init.sh | 12 +++++------- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 651232edb..5bc4cd657 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -33,9 +33,9 @@ runs: sudo apt-get install -y gdal-bin libgdal-dev python3-gdal # - install gdal with numpy pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}' - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 + sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so + sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30 + sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 - name: Test and build the scala JAR - skip tests is false if: inputs.skip_tests == 'false' shell: bash diff --git a/CHANGELOG.md b/CHANGELOG.md index eff0a47fe..8ba2a3aa4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ -## v0.3.14 +## v0.4.0 [DBR 13.3 LTS] +- First release for DBR 13.3 LTS which is Ubuntu Jammy and Spark 3.4.1. Not backwards compatible, meaning it will not run on prior DBRs; requires either a Photon DBR or a ML Runtime (Standard DBR not allowed). +- New `setup_fuse_install` function to meet various requirements arising with Unity Catalog + Shared Access clusters; removed the scala equivalent function, making artifact setup and install python-first for scala and Spark SQL. +- +## v0.3.14 [DBR < 13] - Fixes for Warning and Error messages on mosaic_enable call. - Performance improvements for raster functions. - Fix support for GDAL configuration via spark config (use 'spark.databricks.labs.mosaic.gdal.' prefix). diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 76e8b7591..1376d93a2 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -56,10 +56,10 @@ def configure(self) -> None: elif mosaic_version is None: github_version = 'main' - GITHUB_CONTENT_URL_BASE = 'https://raw.githubusercontent.com/databrickslabs/mosaic' + GITHUB_CONTENT_URL_BASE = 'https://github.com/databrickslabs/mosaic/raw' GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/v_{github_version}' if github_version == 'main': - GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/{github_version}' + GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/main' # - generate fuse dir path os.makedirs(self.to_fuse_dir, exist_ok=True) diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 27f8a2b0c..4ae6cf30b 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -11,7 +11,7 @@ # - setup_gdal(...) # [4] this script has conditional logic based on variables # Author: Michael Johns | mjohns@databricks.com -# Last Modified: 20 NOV, 2023 +# Last Modified: 20 DEC, 2023 # TEMPLATE-BASED REPLACEMENT # - can also be manually specified @@ -72,12 +72,10 @@ then cp $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib else # copy from github - # - !!! TODO: MODIFY PATH ONCE PR MERGES !!! - # - THIS WILL USE GITHUB_VERSION - GITHUB_REPO_PATH=databrickslabs/mosaic/main/src/main/resources/gdal/ubuntu + GITHUB_REPO_PATH=databrickslabs/mosaic/raw/main/resources/gdal/jammy - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 - #sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 + sudo wget -P /usr/lib -nc https://github.com/$GITHUB_REPO_PATH/libgdalalljni.so + sudo wget -P /usr/lib -nc https://github.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 + sudo wget -P /usr/lib -nc https://github.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 fi fi From 6c34ac9b151e837e2ac0aaf7dd97161a2f131494 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 10:25:49 -0500 Subject: [PATCH 003/118] Fixed broken import for testing. Added error vs warning for standard DBR. --- CHANGELOG.md | 5 +++-- python/test/utils/__init__.py | 1 + .../labs/mosaic/functions/MosaicContext.scala | 15 +++++++-------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ba2a3aa4..1b17bbcf8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ ## v0.4.0 [DBR 13.3 LTS] -- First release for DBR 13.3 LTS which is Ubuntu Jammy and Spark 3.4.1. Not backwards compatible, meaning it will not run on prior DBRs; requires either a Photon DBR or a ML Runtime (Standard DBR not allowed). +- First release for DBR 13.3 LTS which is Ubuntu Jammy and Spark 3.4.1. Not backwards compatible, meaning it will not run on prior DBRs; requires either a Photon DBR or a ML Runtime (__Standard, non-Photon DBR no longer allowed__). - New `setup_fuse_install` function to meet various requirements arising with Unity Catalog + Shared Access clusters; removed the scala equivalent function, making artifact setup and install python-first for scala and Spark SQL. -- +- Removed OSS ESRI Geometry API for 0.4 series, JTS now the only vector provider. + ## v0.3.14 [DBR < 13] - Fixes for Warning and Error messages on mosaic_enable call. - Performance improvements for raster functions. diff --git a/python/test/utils/__init__.py b/python/test/utils/__init__.py index c46240505..cdcf84086 100644 --- a/python/test/utils/__init__.py +++ b/python/test/utils/__init__.py @@ -1,3 +1,4 @@ from .mosaic_test_case import * from .mosaic_test_case_with_gdal import * from .setup_gdal import GDALInstaller +from .setup_fuse import FuseInstaller diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 8e483c702..049366704 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -1004,7 +1004,7 @@ object MosaicContext extends Logging { (dbrMajor > 12 && mosaicVersion < "0.4.0") ) { val msg = """|DEPRECATION ERROR: - | Mosaic v0.3.x series only supports Databricks Runtime 12 and below. + | Mosaic v0.4.x series only supports Databricks Runtime 13. | You can specify `%pip install 'databricks-mosaic<0.4,>=0.3'` for DBR < 13.""".stripMargin logError(msg) @@ -1013,17 +1013,16 @@ object MosaicContext extends Logging { } if (!isML && !isPhoton && !isTest) { - val msg = """|DEPRECATION WARNING: + val msg = """|DEPRECATION ERROR: | Please use a Databricks: | - Photon-enabled Runtime for performance benefits | - Runtime ML for spatial AI benefits - | Mosaic will stop working on this cluster after v0.3.x.""".stripMargin - logWarning(msg) + | Mosaic 0.4.x series restricts executing this cluster.""".stripMargin + logError(msg) println(msg) - false - } else { - true - } + throw new Exception(msg) + } + true } } From 309e68e22669271f22c68cdb02af7d0890158a78 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 11:35:33 -0500 Subject: [PATCH 004/118] - updated scala checks since `*.clusterType` no longer a tag. - `*.sparkVersion` works for (1) DBR major, (2) isML and (3) isPhoton. - verified on all flavors of DBR 13.3 and 12.2 --- CHANGELOG.md | 2 ++ python/mosaic/api/enable.py | 10 ++----- .../labs/mosaic/functions/MosaicContext.scala | 7 ++--- .../functions/MosaicContextBehaviors.scala | 28 +++++++++++++------ 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b17bbcf8..b41fbce18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ - First release for DBR 13.3 LTS which is Ubuntu Jammy and Spark 3.4.1. Not backwards compatible, meaning it will not run on prior DBRs; requires either a Photon DBR or a ML Runtime (__Standard, non-Photon DBR no longer allowed__). - New `setup_fuse_install` function to meet various requirements arising with Unity Catalog + Shared Access clusters; removed the scala equivalent function, making artifact setup and install python-first for scala and Spark SQL. - Removed OSS ESRI Geometry API for 0.4 series, JTS now the only vector provider. +- MosaicAnalyzer functions now accept Spark DataFrames instead of MosaicFrame, which has been removed. +- Docs for 0.3.x have been archived and linked from current docs; notebooks for 0.3.x have been separated from current notebooks. ## v0.3.14 [DBR < 13] - Fixes for Warning and Error messages on mosaic_enable call. diff --git a/python/mosaic/api/enable.py b/python/mosaic/api/enable.py index 3aea9ff7c..a6c3c0285 100644 --- a/python/mosaic/api/enable.py +++ b/python/mosaic/api/enable.py @@ -56,14 +56,8 @@ def enable_mosaic(spark: SparkSession, dbutils=None) -> None: isSupported = config.mosaic_context._context.checkDBR(spark._jsparkSession) if not isSupported: - print( - """ - DEPRECATION WARNING: - Please use a Databricks: - - Photon-enabled Runtime for performance benefits - - Runtime ML for spatial AI benefits - Mosaic will stop working on this cluster after v0.3.x.""" - ) + # unexpected - checkDBR returns true or throws exception + print("""WARNING: checkDBR returned False.""") # Not yet added to the pyspark API with warnings.catch_warnings(): diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 049366704..9cad7250c 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -995,14 +995,11 @@ object MosaicContext extends Logging { def checkDBR(spark: SparkSession): Boolean = { val sparkVersion = spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "0") val isML = sparkVersion.contains("-ml-") - val isPhoton = spark.conf.getOption("spark.databricks.photon.enabled").getOrElse("false").toBoolean + val isPhoton = sparkVersion.contains("-photon-") val isTest = !spark.conf.getAll.exists(_._1.startsWith("spark.databricks.clusterUsageTags.")) val dbrMajor = sparkVersion.split("-").head.split("\\.").head.toInt - if ( - (dbrMajor < 13 && mosaicVersion >= "0.4.0") || - (dbrMajor > 12 && mosaicVersion < "0.4.0") - ) { + if (dbrMajor != 13) { val msg = """|DEPRECATION ERROR: | Mosaic v0.4.x series only supports Databricks Runtime 13. | You can specify `%pip install 'databricks-mosaic<0.4,>=0.3'` for DBR < 13.""".stripMargin diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala index 35bf435ba..9b11ef17b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala @@ -248,17 +248,29 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { method.apply(1).asInstanceOf[Int] shouldBe 2 } - def printWarnings(): Unit = { - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "1-x") - spark.conf.set("spark.databricks.photon.enabled", "false") - spark.conf.set("spark.databricks.clusterUsageTags.clusterType", "1-x") - noException should be thrownBy MosaicContext.checkDBR(spark) - } + def throwErrors(): Unit = { + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-x") + an[Exception] should be thrownBy MosaicContext.checkDBR(spark) - def throwError(): Unit = { spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "14-x") - spark.conf.set("spark.databricks.clusterUsageTags.clusterType", "14-x") an[Exception] should be thrownBy MosaicContext.checkDBR(spark) + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "14-photon-x") + an[Exception] should be thrownBy MosaicContext.checkDBR(spark) + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "12-x") + an[Exception] should be thrownBy MosaicContext.checkDBR(spark) + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "12-photon-x") + an[Exception] should be thrownBy MosaicContext.checkDBR(spark) + } + + def noErrors(): Unit = { + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-photon-x") + noException should be thrownBy MosaicContext.checkDBR(spark) + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-ml-x") + noException should be thrownBy MosaicContext.checkDBR(spark) } } From ce5414b734108c08efbcf4adf471f5eb8c38d0ef Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 11:46:09 -0500 Subject: [PATCH 005/118] Missed a change in test harness. --- .../databricks/labs/mosaic/functions/MosaicContextTest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala index a06a43bcd..612846787 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala @@ -17,7 +17,7 @@ class MosaicContextTest extends MosaicSpatialQueryTest with SharedSparkSession w test("MosaicContext lookup correct sql functions") { sqlFunctionLookup() } test("MosaicContext should use databricks h3") { callDatabricksH3() } test("MosaicContext should correctly reflect functions") { reflectedMethods() } - test("MosaicContext should printWarning") { printWarnings() } - test("MosaicContext should throwError") { throwError() } + test("MosaicContext should throw errors") { throwErrors() } + test("MosaicContext should not throw errors") { noErrors() } } From 5d7c2b5e2f57a847be1dfb45b36f894b16eedc26 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 12:49:22 -0500 Subject: [PATCH 006/118] Adjustments to spark testing vs DBR. Resetting spark conf per test. --- .../labs/mosaic/functions/MosaicContext.scala | 7 +++++-- .../scala/com/databricks/labs/mosaic/package.scala | 2 ++ .../labs/mosaic/functions/MosaicContextBehaviors.scala | 10 +++++++--- .../labs/mosaic/functions/MosaicContextTest.scala | 5 ++--- .../apache/spark/sql/test/SharedSparkSessionGDAL.scala | 3 +++ 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 9cad7250c..6c6face87 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -996,10 +996,13 @@ object MosaicContext extends Logging { val sparkVersion = spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "0") val isML = sparkVersion.contains("-ml-") val isPhoton = sparkVersion.contains("-photon-") - val isTest = !spark.conf.getAll.exists(_._1.startsWith("spark.databricks.clusterUsageTags.")) + val isTest = ( + !spark.conf.getAll.keySet.contains("spark.databricks.clusterUsageTags.clusterId") && + spark.conf.get(MOSAIC_TEST, "false").toBoolean == true + ) val dbrMajor = sparkVersion.split("-").head.split("\\.").head.toInt - if (dbrMajor != 13) { + if (dbrMajor != 13 && !isTest) { val msg = """|DEPRECATION ERROR: | Mosaic v0.4.x series only supports Databricks Runtime 13. | You can specify `%pip install 'databricks-mosaic<0.4,>=0.3'` for DBR < 13.""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/package.scala b/src/main/scala/com/databricks/labs/mosaic/package.scala index 543592fbc..3997c7165 100644 --- a/src/main/scala/com/databricks/labs/mosaic/package.scala +++ b/src/main/scala/com/databricks/labs/mosaic/package.scala @@ -28,6 +28,8 @@ package object mosaic { val MOSAIC_RASTER_READ_AS_PATH = "as_path" val MOSAIC_RASTER_RE_TILE_ON_READ = "retile_on_read" + val MOSAIC_TEST = "spark.databricks.labs.mosaic.is.test" + def read: MosaicDataFrameReader = new MosaicDataFrameReader(SparkSession.builder().getOrCreate()) } diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala index 9b11ef17b..227ac70a9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala @@ -249,6 +249,8 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { } def throwErrors(): Unit = { + spark.conf.set("spark.databricks.clusterUsageTags.clusterId", "my-cluster") + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-x") an[Exception] should be thrownBy MosaicContext.checkDBR(spark) @@ -261,15 +263,17 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "12-x") an[Exception] should be thrownBy MosaicContext.checkDBR(spark) - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "12-photon-x") + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "12-photon-x") an[Exception] should be thrownBy MosaicContext.checkDBR(spark) } def noErrors(): Unit = { - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-photon-x") + spark.conf.set("spark.databricks.clusterUsageTags.clusterId", "my-cluster") + + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-ml-x") noException should be thrownBy MosaicContext.checkDBR(spark) - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-ml-x") + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-photon-x") noException should be thrownBy MosaicContext.checkDBR(spark) } diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala index 612846787..c6e773244 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextTest.scala @@ -17,7 +17,6 @@ class MosaicContextTest extends MosaicSpatialQueryTest with SharedSparkSession w test("MosaicContext lookup correct sql functions") { sqlFunctionLookup() } test("MosaicContext should use databricks h3") { callDatabricksH3() } test("MosaicContext should correctly reflect functions") { reflectedMethods() } - test("MosaicContext should throw errors") { throwErrors() } - test("MosaicContext should not throw errors") { noErrors() } - + test("MosaicContext should throw error for wrong DBRs") { throwErrors() } + test("MosaicContext should not throw error for correct DBRs") { noErrors() } } diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index 7576e79c5..dd194ba0c 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -14,6 +14,8 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { override def sparkConf: SparkConf = { super.sparkConf .set(MOSAIC_GDAL_NATIVE, "true") + super.sparkConf + set(MOSAIC_TEST, "true") } override def createSparkSession: TestSparkSession = { @@ -30,6 +32,7 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { override def beforeEach(): Unit = { super.beforeEach() + this.spark.conf = super.sparkConf MosaicGDAL.enableGDAL(this.spark) gdal.AllRegister() } From 293caba39551edbad9a0d8a2030717ccb264112e Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 13:02:20 -0500 Subject: [PATCH 007/118] missed a '.' --- .../org/apache/spark/sql/test/SharedSparkSessionGDAL.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index dd194ba0c..a3849a90a 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -1,7 +1,7 @@ package org.apache.spark.sql.test import com.databricks.labs.mosaic.gdal.MosaicGDAL -import com.databricks.labs.mosaic.{MOSAIC_GDAL_NATIVE, MOSAIC_RASTER_CHECKPOINT} +import com.databricks.labs.mosaic.{MOSAIC_TEST, MOSAIC_GDAL_NATIVE, MOSAIC_RASTER_CHECKPOINT} import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.gdal.gdal.gdal @@ -15,7 +15,7 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { super.sparkConf .set(MOSAIC_GDAL_NATIVE, "true") super.sparkConf - set(MOSAIC_TEST, "true") + .set(MOSAIC_TEST, "true") } override def createSparkSession: TestSparkSession = { From c2cc8af16c63984d9f31f4bd34042edd16de9e6c Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 13:25:19 -0500 Subject: [PATCH 008/118] Streamlining isTest check further. --- .../com/databricks/labs/mosaic/functions/MosaicContext.scala | 5 +---- .../labs/mosaic/functions/MosaicContextBehaviors.scala | 4 ---- .../org/apache/spark/sql/test/SharedSparkSessionGDAL.scala | 1 - 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 6c6face87..4c82b8cf8 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -996,10 +996,7 @@ object MosaicContext extends Logging { val sparkVersion = spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "0") val isML = sparkVersion.contains("-ml-") val isPhoton = sparkVersion.contains("-photon-") - val isTest = ( - !spark.conf.getAll.keySet.contains("spark.databricks.clusterUsageTags.clusterId") && - spark.conf.get(MOSAIC_TEST, "false").toBoolean == true - ) + val isTest = spark.conf.get(MOSAIC_TEST, "false").toBoolean == true val dbrMajor = sparkVersion.split("-").head.split("\\.").head.toInt if (dbrMajor != 13 && !isTest) { diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala index 227ac70a9..dad781ab6 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala @@ -249,8 +249,6 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { } def throwErrors(): Unit = { - spark.conf.set("spark.databricks.clusterUsageTags.clusterId", "my-cluster") - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-x") an[Exception] should be thrownBy MosaicContext.checkDBR(spark) @@ -268,8 +266,6 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { } def noErrors(): Unit = { - spark.conf.set("spark.databricks.clusterUsageTags.clusterId", "my-cluster") - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-ml-x") noException should be thrownBy MosaicContext.checkDBR(spark) diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index a3849a90a..e299587fa 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -32,7 +32,6 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { override def beforeEach(): Unit = { super.beforeEach() - this.spark.conf = super.sparkConf MosaicGDAL.enableGDAL(this.spark) gdal.AllRegister() } From 010f114d0fe0a51d90e27e2c963daf0a53833ff9 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 13:57:55 -0500 Subject: [PATCH 009/118] spark conf standardization for testing. --- .../databricks/labs/mosaic/functions/MosaicContext.scala | 7 +++++-- src/main/scala/com/databricks/labs/mosaic/package.scala | 1 + .../labs/mosaic/functions/MosaicContextBehaviors.scala | 4 ++++ .../org/apache/spark/sql/test/SharedSparkSessionGDAL.scala | 6 ++++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 4c82b8cf8..98fed6477 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -996,8 +996,11 @@ object MosaicContext extends Logging { val sparkVersion = spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "0") val isML = sparkVersion.contains("-ml-") val isPhoton = sparkVersion.contains("-photon-") - val isTest = spark.conf.get(MOSAIC_TEST, "false").toBoolean == true - + val isTest = ( + spark.conf.get(MOSAIC_TEST, "false").toBoolean == true && + spark.conf.get(MOSAIC_TEST_DBR, "false").toBoolean == false + ) + val dbrMajor = sparkVersion.split("-").head.split("\\.").head.toInt if (dbrMajor != 13 && !isTest) { val msg = """|DEPRECATION ERROR: diff --git a/src/main/scala/com/databricks/labs/mosaic/package.scala b/src/main/scala/com/databricks/labs/mosaic/package.scala index 3997c7165..bb2bec3a7 100644 --- a/src/main/scala/com/databricks/labs/mosaic/package.scala +++ b/src/main/scala/com/databricks/labs/mosaic/package.scala @@ -29,6 +29,7 @@ package object mosaic { val MOSAIC_RASTER_RE_TILE_ON_READ = "retile_on_read" val MOSAIC_TEST = "spark.databricks.labs.mosaic.is.test" + val MOSAIC_TEST_DBR = "spark.databricks.labs.mosaic.is.test.dbr" def read: MosaicDataFrameReader = new MosaicDataFrameReader(SparkSession.builder().getOrCreate()) diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala index dad781ab6..8e02a6f1a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala @@ -249,6 +249,8 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { } def throwErrors(): Unit = { + spark.conf.set(MOSAIC_TEST_DBR, "true") + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-x") an[Exception] should be thrownBy MosaicContext.checkDBR(spark) @@ -266,6 +268,8 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { } def noErrors(): Unit = { + spark.conf.set(MOSAIC_TEST_DBR, "true") + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-ml-x") noException should be thrownBy MosaicContext.checkDBR(spark) diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index e299587fa..02be8c870 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -1,7 +1,7 @@ package org.apache.spark.sql.test import com.databricks.labs.mosaic.gdal.MosaicGDAL -import com.databricks.labs.mosaic.{MOSAIC_TEST, MOSAIC_GDAL_NATIVE, MOSAIC_RASTER_CHECKPOINT} +import com.databricks.labs.mosaic.{MOSAIC_TEST, MOSAIC_TEST_DBR, MOSAIC_GDAL_NATIVE, MOSAIC_RASTER_CHECKPOINT} import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.gdal.gdal.gdal @@ -32,8 +32,10 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { override def beforeEach(): Unit = { super.beforeEach() + this.spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "0") + this.spark.conf.set(MOSAIC_TEST_DBR, "false") MosaicGDAL.enableGDAL(this.spark) gdal.AllRegister() } - + } From 2fdfe5e86f3a3df06e193d7c64c71e57fd42fcf0 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 15:56:38 -0500 Subject: [PATCH 010/118] spark.conf.set(MOSAIC_TEST, "true") where needed. --- .../labs/mosaic/datasource/OGRFileFormatTest.scala | 4 +++- .../multiread/RasterAsGridReaderTest.scala | 10 +++++++++- .../expressions/format/ConvertToBehaviors.scala | 4 ++++ .../geometry/FlattenPolygonsBehaviors.scala | 7 +++++++ .../expressions/geometry/ST_BufferBehaviors.scala | 4 ++++ .../geometry/ST_ConvexHullBehaviors.scala | 4 ++++ .../expressions/geometry/ST_DistanceBehaviors.scala | 4 ++++ .../expressions/geometry/ST_HaversineBehaviors.scala | 5 +++++ .../geometry/ST_IntersectionBehaviors.scala | 6 ++++++ .../geometry/ST_IntersectsBehaviors.scala | 6 ++++++ .../expressions/geometry/ST_RotateBehaviors.scala | 6 +++++- .../expressions/geometry/ST_ScaleBehaviors.scala | 6 +++++- .../expressions/geometry/ST_SetSRIDBehaviors.scala | 5 ++++- .../expressions/geometry/ST_SimplifyBehaviors.scala | 6 +++++- .../expressions/geometry/ST_TransformBehaviors.scala | 6 +++++- .../expressions/geometry/ST_TranslateBehaviors.scala | 6 +++++- .../expressions/geometry/ST_UnionBehaviors.scala | 11 +++++++++-- .../raster/RST_BandMetadataBehaviors.scala | 3 +++ .../raster/RST_BoundingBoxBehaviors.scala | 3 +++ .../expressions/raster/RST_ClipBehaviors.scala | 3 +++ .../raster/RST_CombineAvgAggBehaviors.scala | 3 +++ .../expressions/raster/RST_CombineAvgBehaviors.scala | 3 +++ .../raster/RST_DerivedBandAggBehaviors.scala | 3 +++ .../raster/RST_DerivedBandBehaviors.scala | 3 +++ .../expressions/raster/RST_FromBandsBehaviors.scala | 3 +++ .../expressions/raster/RST_FromFileBehaviors.scala | 3 +++ .../raster/RST_GeoReferenceBehaviors.scala | 3 +++ .../expressions/raster/RST_GetNoDataBehaviors.scala | 3 +++ .../raster/RST_GetSubdatasetBehaviors.scala | 3 +++ .../expressions/raster/RST_HeightBehaviors.scala | 3 +++ .../expressions/raster/RST_InitNoDataBehaviors.scala | 3 +++ .../expressions/raster/RST_IsEmptyBehaviors.scala | 3 +++ .../expressions/raster/RST_MapAlgebraBehaviors.scala | 3 +++ .../expressions/raster/RST_MemSizeBehaviors.scala | 3 +++ .../expressions/raster/RST_MergeAggBehaviors.scala | 3 +++ .../expressions/raster/RST_MergeBehaviors.scala | 3 +++ .../expressions/raster/RST_MetadataBehaviors.scala | 3 +++ .../expressions/raster/RST_NDVIBehaviors.scala | 3 +++ .../expressions/raster/RST_NumBandsBehaviors.scala | 3 +++ .../raster/RST_PixelHeightBehaviors.scala | 3 +++ .../expressions/raster/RST_PixelWidthBehaviors.scala | 3 +++ .../raster/RST_RasterToGridAvgBehaviors.scala | 3 +++ .../raster/RST_RasterToGridCountBehaviors.scala | 3 +++ .../raster/RST_RasterToGridMaxBehaviors.scala | 3 +++ .../raster/RST_RasterToGridMedianBehaviors.scala | 3 +++ .../raster/RST_RasterToGridMinBehaviors.scala | 3 +++ .../raster/RST_RasterToWorldCoordBehaviors.scala | 3 +++ .../raster/RST_RasterToWorldCoordXBehaviors.scala | 3 +++ .../raster/RST_RasterToWorldCoordYBehaviors.scala | 3 +++ .../expressions/raster/RST_ReTileBehaviors.scala | 3 +++ .../expressions/raster/RST_RotationBehaviors.scala | 3 +++ .../expressions/raster/RST_SRIDBehaviors.scala | 3 +++ .../expressions/raster/RST_ScaleXBehaviors.scala | 3 +++ .../expressions/raster/RST_ScaleYBehaviors.scala | 3 +++ .../expressions/raster/RST_SetNoDataBehaviors.scala | 3 +++ .../expressions/raster/RST_SkewXBehaviors.scala | 3 +++ .../expressions/raster/RST_SkewYBehaviors.scala | 3 +++ .../raster/RST_SubdatasetsBehaviors.scala | 3 +++ .../expressions/raster/RST_SummaryBehaviors.scala | 3 +++ .../expressions/raster/RST_TessellateBehaviors.scala | 3 +++ .../raster/RST_ToOverlappingTilesBehaviors.scala | 3 +++ .../expressions/raster/RST_TryOpenBehaviors.scala | 3 +++ .../expressions/raster/RST_UpperLeftXBehaviors.scala | 3 +++ .../expressions/raster/RST_UpperLeftYBehaviors.scala | 3 +++ .../expressions/raster/RST_WidthBehaviors.scala | 3 +++ .../raster/RST_WorldToRasterCoordBehaviors.scala | 3 +++ .../raster/RST_WorldToRasterCoordXBehaviors.scala | 3 +++ .../raster/RST_WorldToRasterCoordYBehaviors.scala | 3 +++ .../mosaic/functions/MosaicContextBehaviors.scala | 4 ++-- .../mosaic/functions/MosaicRegistryBehaviors.scala | 2 ++ .../labs/mosaic/models/knn/SpatialKNNTest.scala | 5 +++++ .../mosaic/sql/extensions/TestSQLExtensions.scala | 12 +++++++++++- .../labs/mosaic/test/MosaicSpatialQueryTest.scala | 3 +++ .../labs/mosaic/test/SparkCodeGenSuite.scala | 3 ++- .../com/databricks/labs/mosaic/test/SparkSuite.scala | 3 ++- .../spark/sql/test/SharedSparkSessionGDAL.scala | 2 +- 76 files changed, 271 insertions(+), 16 deletions(-) diff --git a/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala b/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala index 6ed735d1f..11feabf0c 100644 --- a/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.datasource +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.expressions.util.OGRReadeWithOffset import com.databricks.labs.mosaic.functions.MosaicContext import com.databricks.labs.mosaic.utils.PathUtils @@ -144,7 +145,8 @@ class OGRFileFormatTest extends QueryTest with SharedSparkSessionGDAL { test("OGRFileFormat should handle partial schema: ISSUE 351") { assume(System.getProperty("os.name") == "Linux") - + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(H3, JTS) import mc.functions._ diff --git a/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala b/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala index 1f7b4008b..0b1d095e8 100644 --- a/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala @@ -1,7 +1,7 @@ package com.databricks.labs.mosaic.datasource.multiread import com.databricks.labs.mosaic.functions.MosaicContext -import com.databricks.labs.mosaic.JTS +import com.databricks.labs.mosaic.{JTS, MOSAIC_TEST} import com.databricks.labs.mosaic.core.index.H3IndexSystem import com.databricks.labs.mosaic.test.MosaicSpatialQueryTest import org.apache.spark.sql.test.SharedSparkSessionGDAL @@ -14,6 +14,8 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read netcdf with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(H3IndexSystem, JTS) val netcdf = "/binary/netcdf-coral/" @@ -35,6 +37,8 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read grib with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(H3IndexSystem, JTS) val grib = "/binary/grib-cams/" @@ -55,6 +59,8 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read tif with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(H3IndexSystem, JTS) val tif = "/modis/" @@ -73,6 +79,8 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read zarr with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(H3IndexSystem, JTS) val zarr = "/binary/zarr-example/" diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/format/ConvertToBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/format/ConvertToBehaviors.scala index 614d860f9..d83c594a9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/format/ConvertToBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/format/ConvertToBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.format +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,6 +14,7 @@ trait ConvertToBehaviors extends QueryTest { def checkInputTypeBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -59,6 +61,7 @@ trait ConvertToBehaviors extends QueryTest { def passthroughBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -68,6 +71,7 @@ trait ConvertToBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) val wkts = getWKTRowsDf().select("wkt") diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/FlattenPolygonsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/FlattenPolygonsBehaviors.scala index 53d4c24cd..3fc135248 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/FlattenPolygonsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/FlattenPolygonsBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.core.types.{HexType, InternalGeometryType} @@ -17,6 +18,7 @@ trait FlattenPolygonsBehaviors extends QueryTest { def flattenWKBPolygon(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -67,6 +69,7 @@ trait FlattenPolygonsBehaviors extends QueryTest { def flattenWKTPolygon(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -117,6 +120,7 @@ trait FlattenPolygonsBehaviors extends QueryTest { def flattenCOORDSPolygon(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -178,6 +182,7 @@ trait FlattenPolygonsBehaviors extends QueryTest { def flattenHEXPolygon(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -239,6 +244,7 @@ trait FlattenPolygonsBehaviors extends QueryTest { def failDataTypeCheck(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -257,6 +263,7 @@ trait FlattenPolygonsBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_BufferBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_BufferBehaviors.scala index 674238e43..bec55c0cc 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_BufferBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_BufferBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -16,6 +17,7 @@ trait ST_BufferBehaviors extends QueryTest { def bufferBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ val sc = spark @@ -57,6 +59,7 @@ trait ST_BufferBehaviors extends QueryTest { def bufferCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -86,6 +89,7 @@ trait ST_BufferBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) import mc.functions._ diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ConvexHullBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ConvexHullBehaviors.scala index 607e0f3e5..8b06a4e33 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ConvexHullBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ConvexHullBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -14,6 +15,7 @@ trait ST_ConvexHullBehaviors extends QueryTest { def convexHullBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ val sc = spark @@ -38,6 +40,7 @@ trait ST_ConvexHullBehaviors extends QueryTest { def convexHullCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -71,6 +74,7 @@ trait ST_ConvexHullBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_DistanceBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_DistanceBehaviors.scala index 54de04600..e7ad99e29 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_DistanceBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_DistanceBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.geometry.MosaicGeometry import com.databricks.labs.mosaic.core.index._ @@ -19,6 +20,7 @@ trait ST_DistanceBehaviors extends QueryTest { def distanceBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ val sc = spark @@ -52,6 +54,7 @@ trait ST_DistanceBehaviors extends QueryTest { def distanceCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -88,6 +91,7 @@ trait ST_DistanceBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala index 611be6f31..e6cd3184a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,6 +14,8 @@ import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper trait ST_HaversineBehaviors extends QueryTest { def haversineBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -25,6 +28,7 @@ trait ST_HaversineBehaviors extends QueryTest { def haversineCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -45,6 +49,7 @@ trait ST_HaversineBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala index c7ec70f36..e2605776a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum @@ -21,6 +22,7 @@ trait ST_IntersectionBehaviors extends QueryTest { def intersectionBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI, resolution: Int): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -72,6 +74,7 @@ trait ST_IntersectionBehaviors extends QueryTest { def intersectionAggBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -167,6 +170,7 @@ trait ST_IntersectionBehaviors extends QueryTest { def selfIntersectionBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI, resolution: Int): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -213,6 +217,7 @@ trait ST_IntersectionBehaviors extends QueryTest { def intersectionCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -239,6 +244,7 @@ trait ST_IntersectionBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala index 4a480dfd3..496d015d7 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum @@ -18,6 +19,7 @@ trait ST_IntersectsBehaviors extends QueryTest { def intersectsBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI, resolution: Int): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -93,6 +95,7 @@ trait ST_IntersectsBehaviors extends QueryTest { def intersectsAggBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -148,6 +151,7 @@ trait ST_IntersectsBehaviors extends QueryTest { def selfIntersectsBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI, resolution: Int): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -192,6 +196,7 @@ trait ST_IntersectsBehaviors extends QueryTest { def intersectsCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -218,6 +223,7 @@ trait ST_IntersectsBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala index 4741eedff..2d45cbfa4 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -15,8 +16,9 @@ trait ST_RotateBehaviors extends QueryTest { def rotateBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - val sc = spark + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) @@ -36,6 +38,7 @@ trait ST_RotateBehaviors extends QueryTest { def rotateCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -65,6 +68,7 @@ trait ST_RotateBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala index 4901a0f04..7b901d7a0 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -14,8 +15,9 @@ trait ST_ScaleBehaviors extends QueryTest { def scaleBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - val sc = spark + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) @@ -35,6 +37,7 @@ trait ST_ScaleBehaviors extends QueryTest { def scaleCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -64,6 +67,7 @@ trait ST_ScaleBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala index 61fd4de70..33be242bd 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,8 +14,9 @@ trait ST_SetSRIDBehaviors extends QueryTest { def setSRIDBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - val sc = spark + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) @@ -64,6 +66,7 @@ trait ST_SetSRIDBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala index a5e10e13d..47f4dc458 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -16,9 +17,10 @@ trait ST_SimplifyBehaviors extends QueryTest { def simplifyBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) - import mc.functions._ val sc = spark + import mc.functions._ import sc.implicits._ mc.register(spark) @@ -52,6 +54,7 @@ trait ST_SimplifyBehaviors extends QueryTest { def simplifyCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -81,6 +84,7 @@ trait ST_SimplifyBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) import mc.functions._ diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala index 1f2e8b1a5..4b4576d56 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -20,8 +21,10 @@ trait ST_TransformBehaviors extends QueryTest { val geomFactory = new GeometryFactory() def reprojectGeometries(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { - val sc = spark + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) @@ -116,6 +119,7 @@ trait ST_TransformBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala index 58c7215db..aaca52282 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -14,8 +15,9 @@ trait ST_TranslateBehaviors extends QueryTest { def translateBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - val sc = spark + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ import sc.implicits._ mc.register(spark) @@ -36,6 +38,7 @@ trait ST_TranslateBehaviors extends QueryTest { def translateCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -65,6 +68,7 @@ trait ST_TranslateBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala index 435677c64..a5ac7ad96 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.geometry +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -15,9 +16,11 @@ import org.scalatest.matchers.should.Matchers.{an, be, convertToAnyShouldWrapper trait ST_UnionBehaviors extends QueryTest { def unionBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) + val sc = spark import mc.functions._ - val sc = spark import sc.implicits._ mc.register(spark) @@ -39,9 +42,11 @@ trait ST_UnionBehaviors extends QueryTest { } def unionAggBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) - import mc.functions._ val sc = spark + import mc.functions._ import sc.implicits._ mc.register(spark) @@ -77,6 +82,7 @@ trait ST_UnionBehaviors extends QueryTest { def unionCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -104,6 +110,7 @@ trait ST_UnionBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala index 6051ccc8e..bec094343 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_BandMetadataBehaviors extends QueryTest { def bandMetadataBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { val sc = spark + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala index 9478411bd..444fd3662 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_BoundingBoxBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala index dbc0b35e9..a59e2a4d3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_ClipBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala index 5ed81f8f1..8788b0636 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_CombineAvgAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala index b0f1225d2..07f07735f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_CombineAvgBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala index c3668bd83..db4526f0c 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_DerivedBandAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala index bd2ded02a..0c4e1eec6 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_DerivedBandBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala index 3a7f7f4a2..c90f56eb2 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_FromBandsBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala index f61fe174d..0b0260d46 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_FromFileBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala index 6e698426d..86ee3803a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_GeoReferenceBehaviors extends QueryTest { //noinspection MapGetGet def geoReferenceBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala index b1154f55e..01d555f26 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_GetNoDataBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala index cc572e475..52f307ab3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_GetSubdatasetBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala index 7effc2e14..b9ae2f98b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_HeightBehaviors extends QueryTest { def heightBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala index cb00638e1..d1659e98f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_InitNoDataBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala index 0db36ec39..490de50ff 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_IsEmptyBehaviors extends QueryTest { // noinspection AccessorLikeMethodIsUnit def isEmptyBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala index fd15f8102..364613f93 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_MapAlgebraBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala index 741fad613..51a54470a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_MemSizeBehaviors extends QueryTest { def memSizeBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala index 0533eafee..060795d4e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_MergeAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala index f4b17ce83..491210593 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_MergeBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala index d6869fce7..6785a783d 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_MetadataBehaviors extends QueryTest { def metadataBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala index b433ccd79..fd1c51913 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_NDVIBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala index 711cab7ce..9509acf38 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_NumBandsBehaviors extends QueryTest { def numBandsBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala index d9f0c66f1..ff287b7f2 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_PixelHeightBehaviors extends QueryTest { def pixelHeightBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala index 895c12a52..449e9f03c 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_PixelWidthBehaviors extends QueryTest { def pixelWidthBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala index 2a08fe559..5d9501476 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridAvgBehaviors extends QueryTest { def rasterToGridAvgBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala index 2d1eca342..5cffe5073 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridCountBehaviors extends QueryTest { def rasterToGridCountBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala index f150abdf9..be2fdea25 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridMaxBehaviors extends QueryTest { def rasterToGridMaxBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala index 49ca59dd3..664ab7280 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridMedianBehaviors extends QueryTest { def rasterToGridMedianBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala index 134f0bfa4..921b17334 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToGridMinBehaviors extends QueryTest { def rasterToGridMinBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala index 8265e745a..e7b59a03d 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToWorldCoordBehaviors extends QueryTest { def rasterToWorldCoordBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala index 079e0839b..4af151c6a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToWorldCoordXBehaviors extends QueryTest { def rasterToWorldCoordX(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala index c27722e8b..66f27fdb6 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RasterToWorldCoordYBehaviors extends QueryTest { def rasterToWorldCoordY(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala index 608c3de85..adab09401 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_ReTileBehaviors extends QueryTest { def retileBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala index 6469d7292..3810fecab 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_RotationBehaviors extends QueryTest { def rotationBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala index debe3d0a1..1285b6c09 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SRIDBehaviors extends QueryTest { def sridBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala index e12dca7fe..c389856cf 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_ScaleXBehaviors extends QueryTest { def scaleXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala index e264199b1..b989158ce 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_ScaleYBehaviors extends QueryTest { def scaleYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala index c28403817..d13e17783 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_SetNoDataBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala index 2a5b5e3db..61ef921ff 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SkewXBehaviors extends QueryTest { def skewXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala index 294157065..dd4b80e0e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SkewYBehaviors extends QueryTest { def skewYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala index ad713f17c..df1d7ebd2 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SubdatasetsBehaviors extends QueryTest { def subdatasetsBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala index 1d53cdb4a..85ba51f2c 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_SummaryBehaviors extends QueryTest { def summaryBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala index daad95af0..a7b7965c9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_TessellateBehaviors extends QueryTest { // noinspection MapGetGet def tessellateBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala index d51f26891..cae81678e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,6 +12,8 @@ trait RST_ToOverlappingTilesBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala index 3e5669614..78dff0745 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ trait RST_TryOpenBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala index 88e5ecd3a..c8b4474ee 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_UpperLeftXBehaviors extends QueryTest { def upperLeftXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala index fc83d11d2..35bd61d66 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_UpperLeftYBehaviors extends QueryTest { def upperLeftYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala index 885a3e05a..c5cc5d22f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -9,6 +10,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_WidthBehaviors extends QueryTest { def widthBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala index 4aaf86b3e..0700617bd 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_WorldToRasterCoordBehaviors extends QueryTest { def worldToRasterCoordBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala index 9dc26422a..d698b18a0 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_WorldToRasterCoordXBehaviors extends QueryTest { def worldToRasterCoordXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala index e2a259b55..3f4c276b2 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.expressions.raster +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -10,6 +11,8 @@ import org.scalatest.matchers.should.Matchers._ trait RST_WorldToRasterCoordYBehaviors extends QueryTest { def worldToRasterCoordYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala index 8e02a6f1a..448af4d4f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala @@ -1,8 +1,8 @@ package com.databricks.labs.mosaic.functions +import com.databricks.labs.mosaic._ import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.test._ -import com.databricks.labs.mosaic._ import org.apache.spark.sql.adapters.Column import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder @@ -269,7 +269,7 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { def noErrors(): Unit = { spark.conf.set(MOSAIC_TEST_DBR, "true") - + spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-ml-x") noException should be thrownBy MosaicContext.checkDBR(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala index e47f3c797..6a0158151 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala @@ -38,6 +38,8 @@ object MosaicRegistryBehaviors extends MockFactory { ix.name _ when () returns H3.name val gapi = stub[GeometryAPI] gapi.name _ when () returns JTS.name + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(ix, gapi) } diff --git a/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala b/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala index f9d133b7e..495d14bd5 100644 --- a/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.models.knn +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.JTS import com.databricks.labs.mosaic.core.index.{BNGIndexSystem, H3IndexSystem} import com.databricks.labs.mosaic.functions.MosaicContext @@ -17,20 +18,24 @@ class SpatialKNNTest extends AnyFlatSpec with SpatialKNNBehaviors with SparkSuit .set(MOSAIC_INDEX_SYSTEM, "H3") .set(MOSAIC_GEOMETRY_API, "JTS") .set(MOSAIC_RASTER_API, "GDAL") + .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .set("spark.sql.parquet.compression.codec", "uncompressed") var spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like noApproximation(MosaicContext.build(H3IndexSystem, JTS), spark) conf = new SparkConf(false) .set(MOSAIC_INDEX_SYSTEM, "BNG") .set(MOSAIC_GEOMETRY_API, "JTS") .set(MOSAIC_RASTER_API, "GDAL") + .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .set("spark.sql.parquet.compression.codec", "uncompressed") spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like noApproximation(MosaicContext.build(BNGIndexSystem, JTS), spark) } diff --git a/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala b/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala index db100a53d..07412eaa3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala +++ b/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala @@ -16,31 +16,39 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa .set(MOSAIC_INDEX_SYSTEM, "H3") .set(MOSAIC_GEOMETRY_API, "JTS") .set(MOSAIC_RASTER_API, "GDAL") + .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") var spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like sqlRegister(MosaicContext.build(H3IndexSystem, JTS), spark) conf = new SparkConf(false) .set(MOSAIC_INDEX_SYSTEM, "BNG") .set(MOSAIC_GEOMETRY_API, "JTS") .set(MOSAIC_RASTER_API, "GDAL") + .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like sqlRegister(MosaicContext.build(BNGIndexSystem, JTS), spark) conf = new SparkConf(false) .set(MOSAIC_INDEX_SYSTEM, "DummyIndex") .set(MOSAIC_GEOMETRY_API, "DummyAPI") .set(MOSAIC_RASTER_API, "GDAL") + .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like { an[Error] should be thrownBy spark.sql("""show functions""").collect() } conf = new SparkConf(false) + .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQLDefault") spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like sqlRegister(MosaicContext.build(H3IndexSystem, JTS), spark) } @@ -49,9 +57,11 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa assume(System.getProperty("os.name") == "Linux") val conf = new SparkConf(loadDefaults = false) - .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicGDAL") + .set(MOSAIC_TEST, "true") .set(MOSAIC_GDAL_NATIVE, "true") + .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicGDAL") val spark = withConf(conf) + spark.sparkContext.setLogLevel("FATAL") it should behave like mosaicGDAL(MosaicContext.build(H3IndexSystem, JTS), spark) } diff --git a/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala b/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala index d9e5fd534..64ba5734e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala @@ -1,5 +1,6 @@ package com.databricks.labs.mosaic.test +import com.databricks.labs.mosaic.{MOSAIC_TEST} import com.databricks.labs.mosaic.core.geometry.api.{GeometryAPI, JTS} import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -179,6 +180,8 @@ trait MosaicHelper extends BeforeAndAfterEach { self: Suite => /** Constructs the MosaicContext from its parts and calls `f`. */ protected def withMosaicContext(geometry: GeometryAPI, indexSystem: IndexSystem)(f: MosaicContext => Unit): Unit = { + spark.sparkContext.setLogLevel("FATAL") + spark.conf.set(MOSAIC_TEST, "true") val mc: MosaicContext = MosaicContext.build(indexSystem, geometry) f(mc) diff --git a/src/test/scala/com/databricks/labs/mosaic/test/SparkCodeGenSuite.scala b/src/test/scala/com/databricks/labs/mosaic/test/SparkCodeGenSuite.scala index 730af9d9d..6870da193 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/SparkCodeGenSuite.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/SparkCodeGenSuite.scala @@ -1,11 +1,12 @@ package com.databricks.labs.mosaic.test -import com.databricks.labs.mosaic.MOSAIC_GDAL_NATIVE +import com.databricks.labs.mosaic.{MOSAIC_TEST, MOSAIC_GDAL_NATIVE} trait SparkCodeGenSuite extends SparkSuite { override def beforeAll(): Unit = { super.beforeAll() + spark.conf.set(MOSAIC_TEST, "true") spark.conf.set(MOSAIC_GDAL_NATIVE, "false") spark.conf.set("spark.sql.codegen.factoryMode", "CODEGEN_ONLY") spark.conf.set("spark.sql.codegen.fallback", "false") diff --git a/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala b/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala index 6e40e5350..38340783b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala @@ -1,14 +1,15 @@ package com.databricks.labs.mosaic.test +import com.databricks.labs.mosaic.MOSAIC_TEST import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, TestSuite} - import org.apache.spark.sql._ trait SparkSuite extends TestSuite with BeforeAndAfterAll { var sparkConf: SparkConf = new SparkConf(false) + .set(MOSAIC_TEST, "true") .set("spark.executor.extraLibraryPath", "/usr/lib/gdal") .set("spark.sql.parquet.compression.codec", "uncompressed") @transient private var _sc: SparkContext = _ diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index 02be8c870..06ab1f1df 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -1,7 +1,7 @@ package org.apache.spark.sql.test +import com.databricks.labs.mosaic._ import com.databricks.labs.mosaic.gdal.MosaicGDAL -import com.databricks.labs.mosaic.{MOSAIC_TEST, MOSAIC_TEST_DBR, MOSAIC_GDAL_NATIVE, MOSAIC_RASTER_CHECKPOINT} import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.gdal.gdal.gdal From dfdfdff1625f02d090ed1ee95b8524b84f555087 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 16:21:08 -0500 Subject: [PATCH 011/118] added additional system property since spark not always used in tests involving MosaicContext. --- .../com/databricks/labs/mosaic/functions/MosaicContext.scala | 5 +++-- .../labs/mosaic/functions/MosaicRegistryBehaviors.scala | 3 +-- .../databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 98fed6477..f18ade96d 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -997,8 +997,9 @@ object MosaicContext extends Logging { val isML = sparkVersion.contains("-ml-") val isPhoton = sparkVersion.contains("-photon-") val isTest = ( - spark.conf.get(MOSAIC_TEST, "false").toBoolean == true && - spark.conf.get(MOSAIC_TEST_DBR, "false").toBoolean == false + (spark.conf.get(MOSAIC_TEST, "false").toBoolean == true && + spark.conf.get(MOSAIC_TEST_DBR, "false").toBoolean == false) || + (System.getProperty(MOSAIC_TEST, "false").toBoolean == true) ) val dbrMajor = sparkVersion.split("-").head.split("\\.").head.toInt diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala index 6a0158151..4b78b9d53 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala @@ -38,8 +38,7 @@ object MosaicRegistryBehaviors extends MockFactory { ix.name _ when () returns H3.name val gapi = stub[GeometryAPI] gapi.name _ when () returns JTS.name - spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") + System.setProperty(MOSAIC_TEST, "true") MosaicContext.build(ix, gapi) } diff --git a/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala b/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala index 64ba5734e..3abc9e6de 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala @@ -1,6 +1,6 @@ package com.databricks.labs.mosaic.test -import com.databricks.labs.mosaic.{MOSAIC_TEST} +import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.{GeometryAPI, JTS} import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -180,8 +180,7 @@ trait MosaicHelper extends BeforeAndAfterEach { self: Suite => /** Constructs the MosaicContext from its parts and calls `f`. */ protected def withMosaicContext(geometry: GeometryAPI, indexSystem: IndexSystem)(f: MosaicContext => Unit): Unit = { - spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") + System.setProperty(MOSAIC_TEST, "true") val mc: MosaicContext = MosaicContext.build(indexSystem, geometry) f(mc) From 63a0aa65b296cbc594c7f4a6062c042a8940374b Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 17:46:35 -0500 Subject: [PATCH 012/118] Simplified isTest logic in MosaicContext. --- .../databricks/labs/mosaic/functions/MosaicContext.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index f18ade96d..29447c347 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -994,15 +994,15 @@ object MosaicContext extends Logging { // noinspection ScalaStyle,ScalaWeakerAccess def checkDBR(spark: SparkSession): Boolean = { val sparkVersion = spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "0") + val dbrMajor = sparkVersion.split("-").head.split("\\.").head.toInt + val isML = sparkVersion.contains("-ml-") val isPhoton = sparkVersion.contains("-photon-") val isTest = ( - (spark.conf.get(MOSAIC_TEST, "false").toBoolean == true && - spark.conf.get(MOSAIC_TEST_DBR, "false").toBoolean == false) || - (System.getProperty(MOSAIC_TEST, "false").toBoolean == true) + dbrMajor == 0 + && !spark.conf.getAll.exists(_._1.startsWith("spark.databricks.clusterUsageTags.")) ) - val dbrMajor = sparkVersion.split("-").head.split("\\.").head.toInt if (dbrMajor != 13 && !isTest) { val msg = """|DEPRECATION ERROR: | Mosaic v0.4.x series only supports Databricks Runtime 13. From e5e4176af294ba474dc6d3183cbedb48e634cde1 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 18:30:48 -0500 Subject: [PATCH 013/118] streamline isTest logic. --- src/main/scala/com/databricks/labs/mosaic/package.scala | 1 - .../labs/mosaic/functions/MosaicContextBehaviors.scala | 4 ---- .../labs/mosaic/functions/MosaicRegistryBehaviors.scala | 1 - .../databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala | 1 - .../org/apache/spark/sql/test/SharedSparkSessionGDAL.scala | 4 ---- 5 files changed, 11 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/package.scala b/src/main/scala/com/databricks/labs/mosaic/package.scala index bb2bec3a7..3997c7165 100644 --- a/src/main/scala/com/databricks/labs/mosaic/package.scala +++ b/src/main/scala/com/databricks/labs/mosaic/package.scala @@ -29,7 +29,6 @@ package object mosaic { val MOSAIC_RASTER_RE_TILE_ON_READ = "retile_on_read" val MOSAIC_TEST = "spark.databricks.labs.mosaic.is.test" - val MOSAIC_TEST_DBR = "spark.databricks.labs.mosaic.is.test.dbr" def read: MosaicDataFrameReader = new MosaicDataFrameReader(SparkSession.builder().getOrCreate()) diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala index 448af4d4f..a4afc3a1a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicContextBehaviors.scala @@ -249,8 +249,6 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { } def throwErrors(): Unit = { - spark.conf.set(MOSAIC_TEST_DBR, "true") - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-x") an[Exception] should be thrownBy MosaicContext.checkDBR(spark) @@ -268,8 +266,6 @@ trait MosaicContextBehaviors extends MosaicSpatialQueryTest { } def noErrors(): Unit = { - spark.conf.set(MOSAIC_TEST_DBR, "true") - spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "13-ml-x") noException should be thrownBy MosaicContext.checkDBR(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala index 4b78b9d53..e47f3c797 100644 --- a/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/functions/MosaicRegistryBehaviors.scala @@ -38,7 +38,6 @@ object MosaicRegistryBehaviors extends MockFactory { ix.name _ when () returns H3.name val gapi = stub[GeometryAPI] gapi.name _ when () returns JTS.name - System.setProperty(MOSAIC_TEST, "true") MosaicContext.build(ix, gapi) } diff --git a/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala b/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala index 3abc9e6de..d68fd9aab 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala @@ -180,7 +180,6 @@ trait MosaicHelper extends BeforeAndAfterEach { self: Suite => /** Constructs the MosaicContext from its parts and calls `f`. */ protected def withMosaicContext(geometry: GeometryAPI, indexSystem: IndexSystem)(f: MosaicContext => Unit): Unit = { - System.setProperty(MOSAIC_TEST, "true") val mc: MosaicContext = MosaicContext.build(indexSystem, geometry) f(mc) diff --git a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala index 06ab1f1df..a720b3f9b 100644 --- a/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala +++ b/src/test/scala/org/apache/spark/sql/test/SharedSparkSessionGDAL.scala @@ -14,8 +14,6 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { override def sparkConf: SparkConf = { super.sparkConf .set(MOSAIC_GDAL_NATIVE, "true") - super.sparkConf - .set(MOSAIC_TEST, "true") } override def createSparkSession: TestSparkSession = { @@ -32,8 +30,6 @@ trait SharedSparkSessionGDAL extends SharedSparkSession { override def beforeEach(): Unit = { super.beforeEach() - this.spark.conf.set("spark.databricks.clusterUsageTags.sparkVersion", "0") - this.spark.conf.set(MOSAIC_TEST_DBR, "false") MosaicGDAL.enableGDAL(this.spark) gdal.AllRegister() } From 304b688e58356df6ef05cbb4b069ce8a4d3a6eb7 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 19:17:52 -0500 Subject: [PATCH 014/118] cleanup scala tests from exploratory isTest logic. --- src/main/scala/com/databricks/labs/mosaic/package.scala | 1 - .../labs/mosaic/datasource/OGRFileFormatTest.scala | 2 -- .../datasource/multiread/RasterAsGridReaderTest.scala | 6 +----- .../mosaic/expressions/format/ConvertToBehaviors.scala | 4 ---- .../expressions/geometry/FlattenPolygonsBehaviors.scala | 7 ------- .../mosaic/expressions/geometry/ST_BufferBehaviors.scala | 4 ---- .../expressions/geometry/ST_ConvexHullBehaviors.scala | 4 ---- .../mosaic/expressions/geometry/ST_DistanceBehaviors.scala | 4 ---- .../expressions/geometry/ST_HaversineBehaviors.scala | 4 ---- .../expressions/geometry/ST_IntersectionBehaviors.scala | 6 ------ .../expressions/geometry/ST_IntersectsBehaviors.scala | 6 ------ .../mosaic/expressions/geometry/ST_RotateBehaviors.scala | 4 ---- .../mosaic/expressions/geometry/ST_ScaleBehaviors.scala | 4 ---- .../mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala | 3 --- .../mosaic/expressions/geometry/ST_SimplifyBehaviors.scala | 4 ---- .../expressions/geometry/ST_TransformBehaviors.scala | 3 --- .../expressions/geometry/ST_TranslateBehaviors.scala | 4 ---- .../mosaic/expressions/geometry/ST_UnionBehaviors.scala | 5 ----- .../expressions/raster/RST_BandMetadataBehaviors.scala | 2 -- .../expressions/raster/RST_BoundingBoxBehaviors.scala | 2 -- .../labs/mosaic/expressions/raster/RST_ClipBehaviors.scala | 2 -- .../expressions/raster/RST_CombineAvgAggBehaviors.scala | 2 -- .../expressions/raster/RST_CombineAvgBehaviors.scala | 2 -- .../expressions/raster/RST_DerivedBandAggBehaviors.scala | 2 -- .../expressions/raster/RST_DerivedBandBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_FromBandsBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_FromFileBehaviors.scala | 2 -- .../expressions/raster/RST_GeoReferenceBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_GetNoDataBehaviors.scala | 2 -- .../expressions/raster/RST_GetSubdatasetBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_HeightBehaviors.scala | 2 -- .../expressions/raster/RST_InitNoDataBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_IsEmptyBehaviors.scala | 2 -- .../expressions/raster/RST_MapAlgebraBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_MemSizeBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_MergeAggBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_MergeBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_MetadataBehaviors.scala | 2 -- .../labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_NumBandsBehaviors.scala | 2 -- .../expressions/raster/RST_PixelHeightBehaviors.scala | 2 -- .../expressions/raster/RST_PixelWidthBehaviors.scala | 2 -- .../expressions/raster/RST_RasterToGridAvgBehaviors.scala | 2 -- .../raster/RST_RasterToGridCountBehaviors.scala | 2 -- .../expressions/raster/RST_RasterToGridMaxBehaviors.scala | 2 -- .../raster/RST_RasterToGridMedianBehaviors.scala | 2 -- .../expressions/raster/RST_RasterToGridMinBehaviors.scala | 2 -- .../raster/RST_RasterToWorldCoordBehaviors.scala | 2 -- .../raster/RST_RasterToWorldCoordXBehaviors.scala | 2 -- .../raster/RST_RasterToWorldCoordYBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_ReTileBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_RotationBehaviors.scala | 2 -- .../labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_ScaleXBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_ScaleYBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_SetNoDataBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_SkewXBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_SkewYBehaviors.scala | 2 -- .../expressions/raster/RST_SubdatasetsBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_SummaryBehaviors.scala | 2 -- .../expressions/raster/RST_TessellateBehaviors.scala | 2 -- .../raster/RST_ToOverlappingTilesBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_TryOpenBehaviors.scala | 2 -- .../expressions/raster/RST_UpperLeftXBehaviors.scala | 2 -- .../expressions/raster/RST_UpperLeftYBehaviors.scala | 2 -- .../mosaic/expressions/raster/RST_WidthBehaviors.scala | 2 -- .../raster/RST_WorldToRasterCoordBehaviors.scala | 2 -- .../raster/RST_WorldToRasterCoordXBehaviors.scala | 2 -- .../raster/RST_WorldToRasterCoordYBehaviors.scala | 2 -- .../databricks/labs/mosaic/models/knn/SpatialKNNTest.scala | 3 --- .../labs/mosaic/sql/extensions/TestSQLExtensions.scala | 5 ----- .../labs/mosaic/test/MosaicSpatialQueryTest.scala | 1 - .../databricks/labs/mosaic/test/SparkCodeGenSuite.scala | 3 +-- .../scala/com/databricks/labs/mosaic/test/SparkSuite.scala | 2 -- 74 files changed, 2 insertions(+), 189 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/package.scala b/src/main/scala/com/databricks/labs/mosaic/package.scala index 3997c7165..58ee2f98e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/package.scala +++ b/src/main/scala/com/databricks/labs/mosaic/package.scala @@ -28,7 +28,6 @@ package object mosaic { val MOSAIC_RASTER_READ_AS_PATH = "as_path" val MOSAIC_RASTER_RE_TILE_ON_READ = "retile_on_read" - val MOSAIC_TEST = "spark.databricks.labs.mosaic.is.test" def read: MosaicDataFrameReader = new MosaicDataFrameReader(SparkSession.builder().getOrCreate()) diff --git a/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala b/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala index 11feabf0c..f37bef949 100644 --- a/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/datasource/OGRFileFormatTest.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.datasource -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.expressions.util.OGRReadeWithOffset import com.databricks.labs.mosaic.functions.MosaicContext import com.databricks.labs.mosaic.utils.PathUtils @@ -146,7 +145,6 @@ class OGRFileFormatTest extends QueryTest with SharedSparkSessionGDAL { test("OGRFileFormat should handle partial schema: ISSUE 351") { assume(System.getProperty("os.name") == "Linux") spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(H3, JTS) import mc.functions._ diff --git a/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala b/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala index 0b1d095e8..5e3a95bc1 100644 --- a/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/datasource/multiread/RasterAsGridReaderTest.scala @@ -1,7 +1,7 @@ package com.databricks.labs.mosaic.datasource.multiread import com.databricks.labs.mosaic.functions.MosaicContext -import com.databricks.labs.mosaic.{JTS, MOSAIC_TEST} +import com.databricks.labs.mosaic.JTS import com.databricks.labs.mosaic.core.index.H3IndexSystem import com.databricks.labs.mosaic.test.MosaicSpatialQueryTest import org.apache.spark.sql.test.SharedSparkSessionGDAL @@ -15,7 +15,6 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read netcdf with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(H3IndexSystem, JTS) val netcdf = "/binary/netcdf-coral/" @@ -38,7 +37,6 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read grib with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(H3IndexSystem, JTS) val grib = "/binary/grib-cams/" @@ -60,7 +58,6 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read tif with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(H3IndexSystem, JTS) val tif = "/modis/" @@ -80,7 +77,6 @@ class RasterAsGridReaderTest extends MosaicSpatialQueryTest with SharedSparkSess test("Read zarr with Raster As Grid Reader") { assume(System.getProperty("os.name") == "Linux") spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") MosaicContext.build(H3IndexSystem, JTS) val zarr = "/binary/zarr-example/" diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/format/ConvertToBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/format/ConvertToBehaviors.scala index d83c594a9..614d860f9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/format/ConvertToBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/format/ConvertToBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.format -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -14,7 +13,6 @@ trait ConvertToBehaviors extends QueryTest { def checkInputTypeBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -61,7 +59,6 @@ trait ConvertToBehaviors extends QueryTest { def passthroughBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -71,7 +68,6 @@ trait ConvertToBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) val wkts = getWKTRowsDf().select("wkt") diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/FlattenPolygonsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/FlattenPolygonsBehaviors.scala index 3fc135248..53d4c24cd 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/FlattenPolygonsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/FlattenPolygonsBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.core.types.{HexType, InternalGeometryType} @@ -18,7 +17,6 @@ trait FlattenPolygonsBehaviors extends QueryTest { def flattenWKBPolygon(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -69,7 +67,6 @@ trait FlattenPolygonsBehaviors extends QueryTest { def flattenWKTPolygon(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -120,7 +117,6 @@ trait FlattenPolygonsBehaviors extends QueryTest { def flattenCOORDSPolygon(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -182,7 +178,6 @@ trait FlattenPolygonsBehaviors extends QueryTest { def flattenHEXPolygon(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -244,7 +239,6 @@ trait FlattenPolygonsBehaviors extends QueryTest { def failDataTypeCheck(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -263,7 +257,6 @@ trait FlattenPolygonsBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_BufferBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_BufferBehaviors.scala index bec55c0cc..674238e43 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_BufferBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_BufferBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -17,7 +16,6 @@ trait ST_BufferBehaviors extends QueryTest { def bufferBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ val sc = spark @@ -59,7 +57,6 @@ trait ST_BufferBehaviors extends QueryTest { def bufferCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -89,7 +86,6 @@ trait ST_BufferBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) import mc.functions._ diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ConvexHullBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ConvexHullBehaviors.scala index 8b06a4e33..607e0f3e5 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ConvexHullBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ConvexHullBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -15,7 +14,6 @@ trait ST_ConvexHullBehaviors extends QueryTest { def convexHullBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ val sc = spark @@ -40,7 +38,6 @@ trait ST_ConvexHullBehaviors extends QueryTest { def convexHullCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -74,7 +71,6 @@ trait ST_ConvexHullBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_DistanceBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_DistanceBehaviors.scala index e7ad99e29..54de04600 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_DistanceBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_DistanceBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.geometry.MosaicGeometry import com.databricks.labs.mosaic.core.index._ @@ -20,7 +19,6 @@ trait ST_DistanceBehaviors extends QueryTest { def distanceBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ val sc = spark @@ -54,7 +52,6 @@ trait ST_DistanceBehaviors extends QueryTest { def distanceCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -91,7 +88,6 @@ trait ST_DistanceBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala index e6cd3184a..f9fbd8c4b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_HaversineBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -15,7 +14,6 @@ trait ST_HaversineBehaviors extends QueryTest { def haversineBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -28,7 +26,6 @@ trait ST_HaversineBehaviors extends QueryTest { def haversineCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -49,7 +46,6 @@ trait ST_HaversineBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala index e2605776a..c7ec70f36 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum @@ -22,7 +21,6 @@ trait ST_IntersectionBehaviors extends QueryTest { def intersectionBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI, resolution: Int): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -74,7 +72,6 @@ trait ST_IntersectionBehaviors extends QueryTest { def intersectionAggBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -170,7 +167,6 @@ trait ST_IntersectionBehaviors extends QueryTest { def selfIntersectionBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI, resolution: Int): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -217,7 +213,6 @@ trait ST_IntersectionBehaviors extends QueryTest { def intersectionCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -244,7 +239,6 @@ trait ST_IntersectionBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala index 496d015d7..4a480dfd3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.core.types.model.GeometryTypeEnum @@ -19,7 +18,6 @@ trait ST_IntersectsBehaviors extends QueryTest { def intersectsBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI, resolution: Int): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -95,7 +93,6 @@ trait ST_IntersectsBehaviors extends QueryTest { def intersectsAggBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -151,7 +148,6 @@ trait ST_IntersectsBehaviors extends QueryTest { def selfIntersectsBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI, resolution: Int): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) import mc.functions._ mc.register(spark) @@ -196,7 +192,6 @@ trait ST_IntersectsBehaviors extends QueryTest { def intersectsCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -223,7 +218,6 @@ trait ST_IntersectsBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala index 2d45cbfa4..3ff0d38b0 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_RotateBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -16,7 +15,6 @@ trait ST_RotateBehaviors extends QueryTest { def rotateBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -38,7 +36,6 @@ trait ST_RotateBehaviors extends QueryTest { def rotateCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -68,7 +65,6 @@ trait ST_RotateBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala index 7b901d7a0..8cd3962ba 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_ScaleBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -15,7 +14,6 @@ trait ST_ScaleBehaviors extends QueryTest { def scaleBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -37,7 +35,6 @@ trait ST_ScaleBehaviors extends QueryTest { def scaleCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -67,7 +64,6 @@ trait ST_ScaleBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala index 33be242bd..062c5bcc9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SetSRIDBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -14,7 +13,6 @@ trait ST_SetSRIDBehaviors extends QueryTest { def setSRIDBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -66,7 +64,6 @@ trait ST_SetSRIDBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala index 47f4dc458..672ca0efa 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_SimplifyBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -17,7 +16,6 @@ trait ST_SimplifyBehaviors extends QueryTest { def simplifyBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -54,7 +52,6 @@ trait ST_SimplifyBehaviors extends QueryTest { def simplifyCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -84,7 +81,6 @@ trait ST_SimplifyBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) import mc.functions._ diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala index 4b4576d56..6c11580ec 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TransformBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -22,7 +21,6 @@ trait ST_TransformBehaviors extends QueryTest { def reprojectGeometries(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -119,7 +117,6 @@ trait ST_TransformBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala index aaca52282..39eb08ef6 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_TranslateBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -15,7 +14,6 @@ trait ST_TranslateBehaviors extends QueryTest { def translateBehaviour(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -38,7 +36,6 @@ trait ST_TranslateBehaviors extends QueryTest { def translateCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -68,7 +65,6 @@ trait ST_TranslateBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala index a5ac7ad96..3c51530e9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_UnionBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.geometry -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext @@ -17,7 +16,6 @@ trait ST_UnionBehaviors extends QueryTest { def unionBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -43,7 +41,6 @@ trait ST_UnionBehaviors extends QueryTest { def unionAggBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -82,7 +79,6 @@ trait ST_UnionBehaviors extends QueryTest { def unionCodegen(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) val sc = spark import mc.functions._ @@ -110,7 +106,6 @@ trait ST_UnionBehaviors extends QueryTest { def auxiliaryMethods(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register(spark) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala index bec094343..6d570c757 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetadataBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_BandMetadataBehaviors extends QueryTest { def bandMetadataBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { val sc = spark spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala index 444fd3662..e8c7cc214 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBoxBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_BoundingBoxBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala index a59e2a4d3..397f02a95 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ClipBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_ClipBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala index 8788b0636..7756d0ff8 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAggBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_CombineAvgAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala index 07f07735f..8ce57f5b8 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_CombineAvgBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala index db4526f0c..03d82b955 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAggBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_DerivedBandAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala index 0c4e1eec6..753392b01 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_DerivedBandBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala index c90f56eb2..24e5897f7 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBandsBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_FromBandsBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala index 0b0260d46..625123bad 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFileBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_FromFileBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala index 86ee3803a..e5ab3c159 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReferenceBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_GeoReferenceBehaviors extends QueryTest { //noinspection MapGetGet def geoReferenceBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala index 01d555f26..69bf7f0f9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoDataBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_GetNoDataBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala index 52f307ab3..99307a158 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdatasetBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_GetSubdatasetBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala index b9ae2f98b..a39c02ca3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_HeightBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_HeightBehaviors extends QueryTest { def heightBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala index d1659e98f..1af81d15d 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoDataBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_InitNoDataBehaviors extends QueryTest { //noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala index 490de50ff..6ce9771e9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmptyBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_IsEmptyBehaviors extends QueryTest { // noinspection AccessorLikeMethodIsUnit def isEmptyBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala index 364613f93..0f896ad4e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_MapAlgebraBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala index 51a54470a..316482c1c 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSizeBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_MemSizeBehaviors extends QueryTest { def memSizeBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala index 060795d4e..38d5d3ed9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_MergeAggBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala index 491210593..fae8e5913 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_MergeBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala index 6785a783d..c335ee0e9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetadataBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_MetadataBehaviors extends QueryTest { def metadataBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala index fd1c51913..0ef995280 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_NDVIBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala index 9509acf38..49800ed40 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBandsBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_NumBandsBehaviors extends QueryTest { def numBandsBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala index ff287b7f2..0d6565300 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeightBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_PixelHeightBehaviors extends QueryTest { def pixelHeightBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala index 449e9f03c..346172bfb 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidthBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_PixelWidthBehaviors extends QueryTest { def pixelWidthBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala index 5d9501476..d41882506 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvgBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_RasterToGridAvgBehaviors extends QueryTest { def rasterToGridAvgBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala index 5cffe5073..5d10766f3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCountBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_RasterToGridCountBehaviors extends QueryTest { def rasterToGridCountBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala index be2fdea25..df5d8ee5e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMaxBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_RasterToGridMaxBehaviors extends QueryTest { def rasterToGridMaxBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala index 664ab7280..ade27ed78 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedianBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_RasterToGridMedianBehaviors extends QueryTest { def rasterToGridMedianBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala index 921b17334..38ab49db1 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMinBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_RasterToGridMinBehaviors extends QueryTest { def rasterToGridMinBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala index e7b59a03d..cbd00572c 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_RasterToWorldCoordBehaviors extends QueryTest { def rasterToWorldCoordBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala index 4af151c6a..2fab5d5ec 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordXBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_RasterToWorldCoordXBehaviors extends QueryTest { def rasterToWorldCoordX(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala index 66f27fdb6..9da7b5ec9 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordYBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_RasterToWorldCoordYBehaviors extends QueryTest { def rasterToWorldCoordY(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala index adab09401..118a64e2a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTileBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_ReTileBehaviors extends QueryTest { def retileBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala index 3810fecab..9bdbb7ef1 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_RotationBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_RotationBehaviors extends QueryTest { def rotationBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala index 1285b6c09..31fde84c4 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRIDBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_SRIDBehaviors extends QueryTest { def sridBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala index c389856cf..3543fb469 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleXBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_ScaleXBehaviors extends QueryTest { def scaleXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala index b989158ce..1251f660a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleYBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_ScaleYBehaviors extends QueryTest { def scaleYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala index d13e17783..2d35388fd 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoDataBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_SetNoDataBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala index 61ef921ff..f7e745613 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewXBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_SkewXBehaviors extends QueryTest { def skewXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala index dd4b80e0e..ef444858f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewYBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_SkewYBehaviors extends QueryTest { def skewYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala index df1d7ebd2..c8f7d435b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SubdatasetsBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_SubdatasetsBehaviors extends QueryTest { def subdatasetsBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala index 85ba51f2c..610e7c657 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_SummaryBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_SummaryBehaviors extends QueryTest { def summaryBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala index a7b7965c9..c346e82db 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TessellateBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_TessellateBehaviors extends QueryTest { // noinspection MapGetGet def tessellateBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala index cae81678e..560e54dee 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTilesBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -13,7 +12,6 @@ trait RST_ToOverlappingTilesBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala index 78dff0745..257d1cfc3 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpenBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_TryOpenBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala index c8b4474ee..e997a5c87 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftXBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_UpperLeftXBehaviors extends QueryTest { def upperLeftXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala index 35bd61d66..d98824f3d 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftYBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_UpperLeftYBehaviors extends QueryTest { def upperLeftYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala index c5cc5d22f..f3fd3c416 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WidthBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -11,7 +10,6 @@ trait RST_WidthBehaviors extends QueryTest { def widthBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala index 0700617bd..baef5fa90 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_WorldToRasterCoordBehaviors extends QueryTest { def worldToRasterCoordBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala index d698b18a0..f9fd82dff 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordXBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_WorldToRasterCoordXBehaviors extends QueryTest { def worldToRasterCoordXBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala index 3f4c276b2..bffba09c8 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordYBehaviors.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.expressions.raster -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI import com.databricks.labs.mosaic.core.index.IndexSystem import com.databricks.labs.mosaic.functions.MosaicContext @@ -12,7 +11,6 @@ trait RST_WorldToRasterCoordYBehaviors extends QueryTest { def worldToRasterCoordYBehavior(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { spark.sparkContext.setLogLevel("FATAL") - spark.conf.set(MOSAIC_TEST, "true") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala b/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala index 495d14bd5..e3096ba84 100644 --- a/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/models/knn/SpatialKNNTest.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.models.knn -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.JTS import com.databricks.labs.mosaic.core.index.{BNGIndexSystem, H3IndexSystem} import com.databricks.labs.mosaic.functions.MosaicContext @@ -18,7 +17,6 @@ class SpatialKNNTest extends AnyFlatSpec with SpatialKNNBehaviors with SparkSuit .set(MOSAIC_INDEX_SYSTEM, "H3") .set(MOSAIC_GEOMETRY_API, "JTS") .set(MOSAIC_RASTER_API, "GDAL") - .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .set("spark.sql.parquet.compression.codec", "uncompressed") @@ -30,7 +28,6 @@ class SpatialKNNTest extends AnyFlatSpec with SpatialKNNBehaviors with SparkSuit .set(MOSAIC_INDEX_SYSTEM, "BNG") .set(MOSAIC_GEOMETRY_API, "JTS") .set(MOSAIC_RASTER_API, "GDAL") - .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .set("spark.sql.parquet.compression.codec", "uncompressed") diff --git a/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala b/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala index 07412eaa3..55bdcf1e2 100644 --- a/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala +++ b/src/test/scala/com/databricks/labs/mosaic/sql/extensions/TestSQLExtensions.scala @@ -16,7 +16,6 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa .set(MOSAIC_INDEX_SYSTEM, "H3") .set(MOSAIC_GEOMETRY_API, "JTS") .set(MOSAIC_RASTER_API, "GDAL") - .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") var spark = withConf(conf) spark.sparkContext.setLogLevel("FATAL") @@ -26,7 +25,6 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa .set(MOSAIC_INDEX_SYSTEM, "BNG") .set(MOSAIC_GEOMETRY_API, "JTS") .set(MOSAIC_RASTER_API, "GDAL") - .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") spark = withConf(conf) spark.sparkContext.setLogLevel("FATAL") @@ -36,7 +34,6 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa .set(MOSAIC_INDEX_SYSTEM, "DummyIndex") .set(MOSAIC_GEOMETRY_API, "DummyAPI") .set(MOSAIC_RASTER_API, "GDAL") - .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQL") spark = withConf(conf) spark.sparkContext.setLogLevel("FATAL") @@ -45,7 +42,6 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa } conf = new SparkConf(false) - .set(MOSAIC_TEST, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicSQLDefault") spark = withConf(conf) spark.sparkContext.setLogLevel("FATAL") @@ -57,7 +53,6 @@ class TestSQLExtensions extends AnyFlatSpec with SQLExtensionsBehaviors with Spa assume(System.getProperty("os.name") == "Linux") val conf = new SparkConf(loadDefaults = false) - .set(MOSAIC_TEST, "true") .set(MOSAIC_GDAL_NATIVE, "true") .set("spark.sql.extensions", "com.databricks.labs.mosaic.sql.extensions.MosaicGDAL") val spark = withConf(conf) diff --git a/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala b/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala index d68fd9aab..d9e5fd534 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/MosaicSpatialQueryTest.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.test -import com.databricks.labs.mosaic.MOSAIC_TEST import com.databricks.labs.mosaic.core.geometry.api.{GeometryAPI, JTS} import com.databricks.labs.mosaic.core.index._ import com.databricks.labs.mosaic.functions.MosaicContext diff --git a/src/test/scala/com/databricks/labs/mosaic/test/SparkCodeGenSuite.scala b/src/test/scala/com/databricks/labs/mosaic/test/SparkCodeGenSuite.scala index 6870da193..730af9d9d 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/SparkCodeGenSuite.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/SparkCodeGenSuite.scala @@ -1,12 +1,11 @@ package com.databricks.labs.mosaic.test -import com.databricks.labs.mosaic.{MOSAIC_TEST, MOSAIC_GDAL_NATIVE} +import com.databricks.labs.mosaic.MOSAIC_GDAL_NATIVE trait SparkCodeGenSuite extends SparkSuite { override def beforeAll(): Unit = { super.beforeAll() - spark.conf.set(MOSAIC_TEST, "true") spark.conf.set(MOSAIC_GDAL_NATIVE, "false") spark.conf.set("spark.sql.codegen.factoryMode", "CODEGEN_ONLY") spark.conf.set("spark.sql.codegen.fallback", "false") diff --git a/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala b/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala index 38340783b..2346ad05f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala +++ b/src/test/scala/com/databricks/labs/mosaic/test/SparkSuite.scala @@ -1,6 +1,5 @@ package com.databricks.labs.mosaic.test -import com.databricks.labs.mosaic.MOSAIC_TEST import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, TestSuite} import org.apache.spark.sql._ @@ -9,7 +8,6 @@ trait SparkSuite extends TestSuite with BeforeAndAfterAll { var sparkConf: SparkConf = new SparkConf(false) - .set(MOSAIC_TEST, "true") .set("spark.executor.extraLibraryPath", "/usr/lib/gdal") .set("spark.sql.parquet.compression.codec", "uncompressed") @transient private var _sc: SparkContext = _ From b2c31bd5d8ac0c8738cfd0df79e30a57b889d3cc Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 19:30:05 -0500 Subject: [PATCH 015/118] Python setup_ test related. --- python/test/utils/setup_fuse.py | 7 +++---- python/test/utils/setup_gdal.py | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 7fcebda4c..1e91c51ab 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -5,8 +5,6 @@ from test.context import api -FUSE_INIT_SCRIPT_FILENAME = "mosaic-fuse-init.sh" - class FuseInstaller: def __init__( self, with_mosaic_pip, with_gdal, @@ -18,6 +16,7 @@ def __init__( self.with_gdal = with_gdal self.jar_copy = jar_copy self.jni_so_copy = jni_so_copy + self.FUSE_INIT_SCRIPT_FILENAME = "mosaic-fuse-init.sh" def __del__(self): self._temp_dir.cleanup() @@ -29,12 +28,12 @@ def do_op(self): self.with_gdal, self.jar_copy, self.jni_so_copy, - script_out_name=FUSE_INIT_SCRIPT_FILENAME + script_out_name=self.FUSE_INIT_SCRIPT_FILENAME ) def run_init_script(self): fuse_install_script_target = os.path.join( - self._temp_dir.name, FUSE_INIT_SCRIPT_FILENAME + self._temp_dir.name, self.FUSE_INIT_SCRIPT_FILENAME ) os.chmod(fuse_install_script_target, mode=0x744) result = subprocess.run( diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index 66b647db2..d244bcf45 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -5,22 +5,21 @@ from test.context import api -GDAL_INIT_SCRIPT_FILENAME = "mosaic-gdal-init.sh" - class GDALInstaller: def __init__(self): self._site_packages = working_set.find(Requirement("keplergl")).location self._temp_dir = tempfile.TemporaryDirectory() + self.GDAL_INIT_SCRIPT_FILENAME = "mosaic-gdal-init.sh" def __del__(self): self._temp_dir.cleanup() def copy_objects(self): - api.setup_gdal(self._temp_dir.name, script_out_name=GDAL_INIT_SCRIPT_FILENAME) + api.setup_gdal(self._temp_dir.name, script_out_name=self.GDAL_INIT_SCRIPT_FILENAME) def run_init_script(self): gdal_install_script_target = os.path.join( - self._temp_dir.name, GDAL_INIT_SCRIPT_FILENAME + self._temp_dir.name, self.GDAL_INIT_SCRIPT_FILENAME ) os.chmod(gdal_install_script_target, mode=0x744) result = subprocess.run( From 15ff5b938567e885e637a9f681b0e78a4d7e0e45 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 19:58:04 -0500 Subject: [PATCH 016/118] adjust do_op. --- python/test/utils/setup_fuse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 1e91c51ab..17c4ed245 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -26,8 +26,8 @@ def do_op(self): self._temp_dir.name, self.with_mosaic_pip, self.with_gdal, - self.jar_copy, - self.jni_so_copy, + jar_copy=self.jar_copy, + jni_so_copy=self.jni_so_copy, script_out_name=self.FUSE_INIT_SCRIPT_FILENAME ) From ccaeda4343caf06ace6cfc312225c195f4786b70 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 21:33:47 -0500 Subject: [PATCH 017/118] Modify to use branch script temporarily. --- python/mosaic/api/fuse.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 1376d93a2..6cecf081c 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -68,7 +68,9 @@ def configure(self) -> None: script_out_path = f'{self.to_fuse_dir}/{self.script_out_name}' if with_script: # - start with the unconfigured script - script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' + # TODO: MODIFY AFTER PR MERGE + # script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' + script_url = f'https://github.com/mjohns-databricks/mosaic/raw/gdal-jammy-3/scripts/{self.script_in_name}' script = requests.get(script_url, allow_redirects=True).text # - tokens used in script From 851384e1471dd016b798b3a0f3d5992ecd997a76 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 20 Dec 2023 22:24:21 -0500 Subject: [PATCH 018/118] Adding sudo to cp in script. --- scripts/mosaic-gdal-init.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 4ae6cf30b..c5395ef9f 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -67,9 +67,9 @@ then if [ $WITH_FUSE_SO == 1 ] then # copy from fuse dir - cp $FUSE_DIR/libgdalalljni.so /usr/lib - cp $FUSE_DIR/libgdalalljni.so.30 /usr/lib - cp $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib + sudo cp $FUSE_DIR/libgdalalljni.so /usr/lib + sudo cp $FUSE_DIR/libgdalalljni.so.30 /usr/lib + sudo cp $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib else # copy from github GITHUB_REPO_PATH=databrickslabs/mosaic/raw/main/resources/gdal/jammy From 6ec91ab3f690ecbcced680183a6a1469d2d7ab7a Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 21 Dec 2023 07:07:06 -0500 Subject: [PATCH 019/118] Adjusting Fuse cp for so files to be no-clobber. --- scripts/mosaic-gdal-init.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index c5395ef9f..9c57fa8b9 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -66,10 +66,10 @@ then # - add pre-build JNI shared object to the path if [ $WITH_FUSE_SO == 1 ] then - # copy from fuse dir - sudo cp $FUSE_DIR/libgdalalljni.so /usr/lib - sudo cp $FUSE_DIR/libgdalalljni.so.30 /usr/lib - sudo cp $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib + # copy from fuse dir with no-clobber + sudo cp -n $FUSE_DIR/libgdalalljni.so /usr/lib + sudo cp -n $FUSE_DIR/libgdalalljni.so.30 /usr/lib + sudo cp -n $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib else # copy from github GITHUB_REPO_PATH=databrickslabs/mosaic/raw/main/resources/gdal/jammy From a82ec461cf1ecaf14ac1b2a10c9ba3ba447d5f2a Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 21 Dec 2023 08:39:13 -0500 Subject: [PATCH 020/118] - override to main for tests. - use sudo to download resources (for tests). --- python/mosaic/api/fuse.py | 13 ++++--------- python/test/utils/setup_fuse.py | 1 + python/test/utils/setup_gdal.py | 6 +++++- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 6cecf081c..adee1f39b 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -3,7 +3,7 @@ import os import pkg_resources import requests -import shutil +import subprocess __all__ = ["SetupMgr", "setup_fuse_install"] @@ -139,7 +139,7 @@ def configure(self) -> None: # --- end of script config --- with_resources = self.jar_copy or self.jni_so_copy - if with_resources: + if with_resources: # - handle jar copy if self.jar_copy: # url and version details @@ -148,21 +148,16 @@ def configure(self) -> None: if github_version == 'main': latest = str(requests.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] - # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' jar_url = f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}' - jar_request = requests.get(jar_url, allow_redirects=True, stream=True) - with open(f'{self.to_fuse_dir}/{jar_filename}', 'wb') as jar_file: - shutil.copyfileobj(jar_request.raw, jar_file) + subprocess.run(['sudo', 'wget', '-P', self.to_fuse_dir, jar_url]) # - handle so copy if self.jni_so_copy: for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: so_url = f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}' - so_request = requests.get(so_url, allow_redirects=True, stream=True) - with open(f'{self.to_fuse_dir}/{so_filename}', 'wb') as so_file: - shutil.copyfileobj(so_request.raw, so_file) + subprocess.run(['sudo', 'wget', '-P', self.to_fuse_dir, so_url]) # - echo status print(f"::: Install setup complete :::") diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 17c4ed245..9097dbd34 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -28,6 +28,7 @@ def do_op(self): self.with_gdal, jar_copy=self.jar_copy, jni_so_copy=self.jni_so_copy, + override_mosaic_version="main", script_out_name=self.FUSE_INIT_SCRIPT_FILENAME ) diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index d244bcf45..c4f082280 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -15,7 +15,11 @@ def __del__(self): self._temp_dir.cleanup() def copy_objects(self): - api.setup_gdal(self._temp_dir.name, script_out_name=self.GDAL_INIT_SCRIPT_FILENAME) + api.setup_gdal( + self._temp_dir.name, + override_mosaic_version="main", + script_out_name=self.GDAL_INIT_SCRIPT_FILENAME + ) def run_init_script(self): gdal_install_script_target = os.path.join( From f1a1ab907566002e5885db762af8194f5019a2c2 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 21 Dec 2023 12:39:52 -0500 Subject: [PATCH 021/118] - Adjusted test to avoid repeat installs. - better mosaic + github version handling. --- python/mosaic/api/fuse.py | 26 +++++++++++++++++++------- python/test/test_fuse_install.py | 24 ------------------------ 2 files changed, 19 insertions(+), 31 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index adee1f39b..cf7bb1b6f 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -53,6 +53,11 @@ def configure(self) -> None: set(self.override_mosaic_version).issubset(set('=0123456789.')) ): github_version = self.override_mosaic_version.replace('=','') + elif ( + self.override_mosaic_version is not None and + self.override_mosaic_version == 'main' + ): + github_version = 'main' elif mosaic_version is None: github_version = 'main' @@ -74,7 +79,7 @@ def configure(self) -> None: script = requests.get(script_url, allow_redirects=True).text # - tokens used in script - SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added + SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added SCRIPT_GITHUB_VERSION_TOKEN = 'GITHUB_VERSION=__GITHUB_VERSION__' SCRIPT_MOSAIC_PIP_VERSION_TOKEN = "MOSAIC_PIP_VERSION='__MOSAIC_PIP_VERSION__'" # <- ' added SCRIPT_WITH_MOSAIC_TOKEN = 'WITH_MOSAIC=0' @@ -120,16 +125,21 @@ def configure(self) -> None: # - set the mosaic version for pip pip_str='' - if self.override_mosaic_version is not None: + if ( + self.override_mosaic_version is not None and + not self.override_mosaic_version == 'main' + ): pip_str = f'=={self.override_mosaic_version}' - if any(c in self.override_mosaic_version for c in ['=','<','<']): - pip_str = self.override_mosaic_version + if any(c in self.override_mosaic_version for c in ['=','>','<']): + pip_str = f"""{self.override_mosaic_version.replace("'","").replace('"','')}""" + else: + pip_str = f"=={self.override_mosaic_version}" elif mosaic_version is not None: - pip_str = f'=={mosaic_version}' + pip_str = f"=={mosaic_version}" script = script.replace( SCRIPT_MOSAIC_PIP_VERSION_TOKEN, SCRIPT_MOSAIC_PIP_VERSION_TOKEN.replace( - '__MOSAIC_PIP_VERSION__', pip_str) + "__MOSAIC_PIP_VERSION__", pip_str) ) # - write the configured init script @@ -139,6 +149,7 @@ def configure(self) -> None: # --- end of script config --- with_resources = self.jar_copy or self.jni_so_copy + release_version = None if with_resources: # - handle jar copy if self.jar_copy: @@ -162,7 +173,8 @@ def configure(self) -> None: # - echo status print(f"::: Install setup complete :::") print(f"- Settings: 'with_mosaic_pip'? {self.with_mosaic_pip}, 'with_gdal'? {self.with_gdal}, 'with_ubuntugis'? {self.with_ubuntugis}") - print(f" 'override_mosaic_version'? {self.override_mosaic_version}, 'jar_copy'? {self.jar_copy}, 'jni_so_copy'? {self.jni_so_copy}") + print(f" 'jar_copy'? {self.jar_copy}, 'jni_so_copy'? {self.jni_so_copy}, 'override_mosaic_version'? {self.override_mosaic_version}") + print(f"- Derived: 'mosaic_version'? {mosaic_version}, 'github_version'? {github_version}, 'release_version'? {release_version}, 'pip_str'? {pip_str}") print(f"- Fuse Dir: '{self.to_fuse_dir}'") if with_script: print(f"- Init Script: configured and stored at '{self.script_out_name}'; ", end='') diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py index 8caa16c6e..dd63754be 100644 --- a/python/test/test_fuse_install.py +++ b/python/test/test_fuse_install.py @@ -30,12 +30,6 @@ def test_setup_sh_pip_only(self): except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - try: - installer_result = installer.run_init_script() - except Exception: - self.fail("Running fuse init script raised an exception.") - self.assertEqual(installer_result, 0) - files = installer.list_files() self.assertEquals(len(files), 1) self.assertEquals(files[0][-3:].lower(), '.sh') @@ -47,12 +41,6 @@ def test_setup_sh_gdal(self): except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - try: - installer_result = installer.run_init_script() - except Exception: - self.fail("Running fuse init script raised an exception.") - self.assertEqual(installer_result, 0) - files = installer.list_files() self.assertEquals(len(files), 1) self.assertEquals(files[0][-3:].lower(), '.sh') @@ -64,12 +52,6 @@ def test_setup_sh_gdal_jni(self): except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - try: - installer_result = installer.run_init_script() - except Exception: - self.fail("Running fuse init script raised an exception.") - self.assertEqual(installer_result, 0) - files = installer.list_files() self.assertEqual(len(files), 4) @@ -90,12 +72,6 @@ def test_setup_sh_all(self): except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - try: - installer_result = installer.run_init_script() - except Exception: - self.fail("Running fuse init script raised an exception.") - self.assertEqual(installer_result, 0) - files = installer.list_files() self.assertEqual(len(files), 5) From d6e5fd818664ccca87801d7fe88750c46d6fe9f7 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 21 Dec 2023 13:59:49 -0500 Subject: [PATCH 022/118] - reduce logging for tests. - wait for downloads. --- python/mosaic/api/fuse.py | 17 +++++++++++++---- python/test/utils/setup_fuse.py | 3 +-- python/test/utils/setup_gdal.py | 3 +-- python/test/utils/spark_test_case.py | 4 ++-- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index cf7bb1b6f..3cba08701 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -48,6 +48,9 @@ def configure(self) -> None: # may be used in pip install mosaic_version = get_install_mosaic_version() github_version = mosaic_version # <- valid or None + pip_str = '' + release_version = None + if ( self.override_mosaic_version is not None and set(self.override_mosaic_version).issubset(set('=0123456789.')) @@ -124,7 +127,6 @@ def configure(self) -> None: ) # - set the mosaic version for pip - pip_str='' if ( self.override_mosaic_version is not None and not self.override_mosaic_version == 'main' @@ -149,7 +151,6 @@ def configure(self) -> None: # --- end of script config --- with_resources = self.jar_copy or self.jni_so_copy - release_version = None if with_resources: # - handle jar copy if self.jar_copy: @@ -162,13 +163,21 @@ def configure(self) -> None: # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' jar_url = f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}' - subprocess.run(['sudo', 'wget', '-P', self.to_fuse_dir, jar_url]) + jar_result = subprocess.run( + ['sudo', 'wget', '-P', self.to_fuse_dir, jar_url], + stdout=subprocess.DEVNULL + ) + print(f"jar '{jar_url}' download done... returncode? {jar_result.returncode}") # <- wait for return # - handle so copy if self.jni_so_copy: for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: so_url = f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}' - subprocess.run(['sudo', 'wget', '-P', self.to_fuse_dir, so_url]) + so_result = subprocess.run( + ['sudo', 'wget', '-P', self.to_fuse_dir, so_url], + stdout=subprocess.DEVNULL + ) + print(f"so '{so_url}' download done... returncode {so_result.returncode}") # <- wait for return # - echo status print(f"::: Install setup complete :::") diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 9097dbd34..72ac8d13e 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -39,10 +39,9 @@ def run_init_script(self): os.chmod(fuse_install_script_target, mode=0x744) result = subprocess.run( [fuse_install_script_target], - stdout=subprocess.PIPE, + stdout=subprocess.DEVNULL, env=dict(os.environ, DATABRICKS_ROOT_VIRTUALENV_ENV=self._site_packages), ) - print(result.stdout.decode()) return result.returncode def list_files(self): diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index c4f082280..90e4e3e25 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -28,10 +28,9 @@ def run_init_script(self): os.chmod(gdal_install_script_target, mode=0x744) result = subprocess.run( [gdal_install_script_target], - stdout=subprocess.PIPE, + stdout=subprocess.DEVNULL, env=dict(os.environ, DATABRICKS_ROOT_VIRTUALENV_ENV=self._site_packages), ) - print(result.stdout.decode()) return result.returncode def list_files(self): diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index 2c92b5758..fc5bb8d67 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -23,9 +23,9 @@ def setUpClass(cls) -> None: .getOrCreate() ) cls.spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") - cls.spark.sparkContext.setLogLevel("WARN") + cls.spark.sparkContext.setLogLevel("FATAL") @classmethod def tearDownClass(cls) -> None: - cls.spark.sparkContext.setLogLevel("warn") + cls.spark.sparkContext.setLogLevel("FATAL") cls.spark.stop() From 36582577059646f9f0d01de057f130b10e4d1ceb Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 26 Dec 2023 15:21:23 -0500 Subject: [PATCH 023/118] wait for downloads for tests. --- python/mosaic/api/fuse.py | 37 ++++++++++++++++++++++------ python/test/test_fuse_install.py | 14 +++++------ python/test/utils/spark_test_case.py | 6 +++-- scripts/mosaic-gdal-init.sh | 8 +++--- 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 3cba08701..1f87263dc 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -4,6 +4,7 @@ import pkg_resources import requests import subprocess +import time __all__ = ["SetupMgr", "setup_fuse_install"] @@ -64,7 +65,7 @@ def configure(self) -> None: elif mosaic_version is None: github_version = 'main' - GITHUB_CONTENT_URL_BASE = 'https://github.com/databrickslabs/mosaic/raw' + GITHUB_CONTENT_URL_BASE = 'https://raw.githubusercontent.com/databrickslabs/mosaic' GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/v_{github_version}' if github_version == 'main': GITHUB_CONTENT_TAG_URL = f'{GITHUB_CONTENT_URL_BASE}/main' @@ -78,7 +79,7 @@ def configure(self) -> None: # - start with the unconfigured script # TODO: MODIFY AFTER PR MERGE # script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' - script_url = f'https://github.com/mjohns-databricks/mosaic/raw/gdal-jammy-3/scripts/{self.script_in_name}' + script_url = f'https://raw.githubusercontent.com/mjohns-databricks/mosaic/gdal-jammy-3/scripts/{self.script_in_name}' script = requests.get(script_url, allow_redirects=True).text # - tokens used in script @@ -161,23 +162,43 @@ def configure(self) -> None: latest = str(requests.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] # download jar + jar_timeout = 30 jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' jar_url = f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}' jar_result = subprocess.run( - ['sudo', 'wget', '-P', self.to_fuse_dir, jar_url], - stdout=subprocess.DEVNULL + ['sudo', 'wget', '-nv', '-P', self.to_fuse_dir, jar_url], + stdout=subprocess.DEVNULL, + timeout=jar_timeout ) - print(f"jar '{jar_url}' download done... returncode? {jar_result.returncode}") # <- wait for return + num_jar_waits = 0 + while ( + num_jar_waits <= jar_timeout and + not os.path.exists(f"{self.to_fuse_dir}/{jar_filename}") and + jar_result.returncode == 0 + ): + time.sleep(1) + num_jar_waits += 1 + print(f"jar '{jar_url}' download... returncode? {jar_result.returncode}, wait time? {num_jar_waits}") # - handle so copy if self.jni_so_copy: + so_timeout = 30 for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: so_url = f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}' so_result = subprocess.run( - ['sudo', 'wget', '-P', self.to_fuse_dir, so_url], - stdout=subprocess.DEVNULL + ['sudo', 'wget', '-nv', '-P', self.to_fuse_dir, so_url], + stdout=subprocess.DEVNULL, + timeout=so_timeout ) - print(f"so '{so_url}' download done... returncode {so_result.returncode}") # <- wait for return + num_so_waits = 0 + while ( + num_so_waits <= so_timeout and + not os.path.exists(f"{self.to_fuse_dir}/{so_filename}") and + so_result.returncode == 0 + ): + time.sleep(1) + num_so_waits += 1 + print(f"so '{so_url}' download... returncode {so_result.returncode}, wait time? {num_so_waits}") # <- wait for return # - echo status print(f"::: Install setup complete :::") diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py index dd63754be..3594c9bca 100644 --- a/python/test/test_fuse_install.py +++ b/python/test/test_fuse_install.py @@ -10,7 +10,7 @@ def test_setup_no_op(self): except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - self.assertEquals(len(installer.list_files()), 0) + self.assertEqual(len(installer.list_files()), 0) def test_setup_jar_only(self): installer = FuseInstaller(False, False, jar_copy=True, jni_so_copy=False) @@ -20,8 +20,8 @@ def test_setup_jar_only(self): self.fail("Executing `setup_fuse_install()` raised an exception.") files = installer.list_files() - self.assertEquals(len(files), 1) - self.assertEquals(files[0][-4:].lower(), '.jar') + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-4:].lower(), '.jar') def test_setup_sh_pip_only(self): installer = FuseInstaller(True, False, jar_copy=False, jni_so_copy=False) @@ -31,8 +31,8 @@ def test_setup_sh_pip_only(self): self.fail("Executing `setup_fuse_install()` raised an exception.") files = installer.list_files() - self.assertEquals(len(files), 1) - self.assertEquals(files[0][-3:].lower(), '.sh') + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-3:].lower(), '.sh') def test_setup_sh_gdal(self): installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=False) @@ -42,8 +42,8 @@ def test_setup_sh_gdal(self): self.fail("Executing `setup_fuse_install()` raised an exception.") files = installer.list_files() - self.assertEquals(len(files), 1) - self.assertEquals(files[0][-3:].lower(), '.sh') + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-3:].lower(), '.sh') def test_setup_sh_gdal_jni(self): installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=True) diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index fc5bb8d67..d26afbd1a 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -23,9 +23,11 @@ def setUpClass(cls) -> None: .getOrCreate() ) cls.spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") - cls.spark.sparkContext.setLogLevel("FATAL") + @classmethod + def setUp(self) -> None: + self.spark.sparkContext.setLogLevel("FATAL") + @classmethod def tearDownClass(cls) -> None: - cls.spark.sparkContext.setLogLevel("FATAL") cls.spark.stop() diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 9c57fa8b9..dcc56ae47 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -72,10 +72,10 @@ then sudo cp -n $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib else # copy from github - GITHUB_REPO_PATH=databrickslabs/mosaic/raw/main/resources/gdal/jammy + GITHUB_REPO_PATH=databrickslabs/mosaic/main/resources/gdal/jammy - sudo wget -P /usr/lib -nc https://github.com/$GITHUB_REPO_PATH/libgdalalljni.so - sudo wget -P /usr/lib -nc https://github.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 - sudo wget -P /usr/lib -nc https://github.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 fi fi From c793c383f5982464ae75216216d2b8d9bbd4c3a2 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 26 Dec 2023 16:38:58 -0500 Subject: [PATCH 024/118] download back to requests. --- .github/actions/scala_build/action.yml | 6 +-- python/mosaic/api/fuse.py | 50 +++++-------------- python/test/test_gdal_install.py | 2 +- python/test/utils/mosaic_test_case.py | 3 ++ .../test/utils/mosaic_test_case_with_gdal.py | 3 ++ python/test/utils/spark_test_case.py | 18 ++++--- 6 files changed, 33 insertions(+), 49 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 5bc4cd657..651232edb 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -33,9 +33,9 @@ runs: sudo apt-get install -y gdal-bin libgdal-dev python3-gdal # - install gdal with numpy pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}' - sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so - sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30 - sudo wget -P /usr/lib -nc https://github.com/databrickslabs/mosaic/raw/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 + sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 - name: Test and build the scala JAR - skip tests is false if: inputs.skip_tests == 'false' shell: bash diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 1f87263dc..0fe2463a5 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -3,8 +3,6 @@ import os import pkg_resources import requests -import subprocess -import time __all__ = ["SetupMgr", "setup_fuse_install"] @@ -152,6 +150,7 @@ def configure(self) -> None: # --- end of script config --- with_resources = self.jar_copy or self.jni_so_copy + resource_statuses = {} if with_resources: # - handle jar copy if self.jar_copy: @@ -161,44 +160,20 @@ def configure(self) -> None: if github_version == 'main': latest = str(requests.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] - # download jar - jar_timeout = 30 + # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' - jar_url = f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}' - jar_result = subprocess.run( - ['sudo', 'wget', '-nv', '-P', self.to_fuse_dir, jar_url], - stdout=subprocess.DEVNULL, - timeout=jar_timeout - ) - num_jar_waits = 0 - while ( - num_jar_waits <= jar_timeout and - not os.path.exists(f"{self.to_fuse_dir}/{jar_filename}") and - jar_result.returncode == 0 - ): - time.sleep(1) - num_jar_waits += 1 - print(f"jar '{jar_url}' download... returncode? {jar_result.returncode}, wait time? {num_jar_waits}") - - # - handle so copy + with open(f"{self.to_fuse_dir}/{jar_filename}", 'wb') as f: + jar_url = f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}' + r = requests.get(jar_url) + f.write(r.content) + resource_statuses['JAR'] = r.status_code + # - handle so copy if self.jni_so_copy: - so_timeout = 30 for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: - so_url = f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}' - so_result = subprocess.run( - ['sudo', 'wget', '-nv', '-P', self.to_fuse_dir, so_url], - stdout=subprocess.DEVNULL, - timeout=so_timeout - ) - num_so_waits = 0 - while ( - num_so_waits <= so_timeout and - not os.path.exists(f"{self.to_fuse_dir}/{so_filename}") and - so_result.returncode == 0 - ): - time.sleep(1) - num_so_waits += 1 - print(f"so '{so_url}' download... returncode {so_result.returncode}, wait time? {num_so_waits}") # <- wait for return + with open(f"{self.to_fuse_dir}/{so_filename}", 'wb') as f: + r = requests.get(f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}') + f.write(r.content) + resource_statuses[so_filename] = r.status_code # - echo status print(f"::: Install setup complete :::") @@ -212,6 +187,7 @@ def configure(self) -> None: print(f" more at https://docs.databricks.com/en/init-scripts/cluster-scoped.html") if with_resources: print(f"- Resource(s): copied") + print(resource_statuses) print("\n") def setup_fuse_install( diff --git a/python/test/test_gdal_install.py b/python/test/test_gdal_install.py index 79cbde1e5..bd617adc1 100644 --- a/python/test/test_gdal_install.py +++ b/python/test/test_gdal_install.py @@ -19,4 +19,4 @@ def test_setup_gdal(self): gdalinfo_result = installer.test_gdalinfo() self.assertEqual(gdalinfo_result, "GDAL 3.4.1, released 2021/12/27\n") - self.assertEquals(len(installer.list_files()), 1) # <- just init script \ No newline at end of file + self.assertEqual(len(installer.list_files()), 1) # <- just init script \ No newline at end of file diff --git a/python/test/utils/mosaic_test_case.py b/python/test/utils/mosaic_test_case.py index 44f5a81df..f2b9a5693 100644 --- a/python/test/utils/mosaic_test_case.py +++ b/python/test/utils/mosaic_test_case.py @@ -13,6 +13,9 @@ class MosaicTestCase(SparkTestCase): def setUpClass(cls) -> None: super().setUpClass() api.enable_mosaic(cls.spark) + + def setUp(self) -> None: + return super().setUp() def generate_input_single_linestring(self) -> DataFrame: return self.spark.createDataFrame( diff --git a/python/test/utils/mosaic_test_case_with_gdal.py b/python/test/utils/mosaic_test_case_with_gdal.py index 233698bab..eb2d93c31 100644 --- a/python/test/utils/mosaic_test_case_with_gdal.py +++ b/python/test/utils/mosaic_test_case_with_gdal.py @@ -18,3 +18,6 @@ def generate_singleband_raster_df(self) -> DataFrame: .option("raster.read.strategy", "in_memory") .load("test/data/MCD43A4.A2018185.h10v07.006.2018194033728_B04.TIF") ) + + def setUp(self) -> None: + return super().setUp() diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index d26afbd1a..90b097964 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -1,15 +1,15 @@ -import unittest -import os from importlib.metadata import version - from pyspark.sql import SparkSession +import logging import mosaic - +import os +import unittest class SparkTestCase(unittest.TestCase): spark = None library_location = None + @classmethod def setUpClass(cls) -> None: @@ -23,11 +23,13 @@ def setUpClass(cls) -> None: .getOrCreate() ) cls.spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") - - @classmethod - def setUp(self) -> None: - self.spark.sparkContext.setLogLevel("FATAL") @classmethod def tearDownClass(cls) -> None: cls.spark.stop() + + def setUp(self) -> None: + logging.getLogger("log4j").setLevel(logging.ERROR) + logging.getLogger("pyspark").setLevel(logging.ERROR) + logging.getLogger("py4j").setLevel(logging.ERROR) + self.spark.sparkContext.setLogLevel("FATAL") From 805a08fe5f743b8c6143cc2d463a681f22afbaae Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 26 Dec 2023 17:38:09 -0500 Subject: [PATCH 025/118] don't cleanup temp dir until finished. --- python/mosaic/api/fuse.py | 36 ++++++++++++++++++++-------- python/test/test_fuse_install.py | 4 ++++ python/test/test_gdal_install.py | 5 ++++ python/test/utils/setup_fuse.py | 2 +- python/test/utils/setup_gdal.py | 2 +- python/test/utils/spark_test_case.py | 3 +++ 6 files changed, 40 insertions(+), 12 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 0fe2463a5..ee646a735 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -3,6 +3,7 @@ import os import pkg_resources import requests +import time __all__ = ["SetupMgr", "setup_fuse_install"] @@ -152,28 +153,43 @@ def configure(self) -> None: with_resources = self.jar_copy or self.jni_so_copy resource_statuses = {} if with_resources: + CHUNK_SIZE = 1024 * 1024 * 64 # 64MB + s = requests.Session() # - handle jar copy if self.jar_copy: # url and version details GITHUB_RELEASE_URL_BASE = 'https://github.com/databrickslabs/mosaic/releases' resource_version = github_version if github_version == 'main': - latest = str(requests.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) + latest = str(s.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' - with open(f"{self.to_fuse_dir}/{jar_filename}", 'wb') as f: - jar_url = f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}' - r = requests.get(jar_url) - f.write(r.content) - resource_statuses['JAR'] = r.status_code + jar_path = f'{self.to_fuse_dir}/{jar_filename}' + r = s.get( + f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}', + stream=True + ) + with open(jar_path, 'wb') as f: + for ch in r.iter_content(chunk_size=CHUNK_SIZE): + f.write(ch) + while not os.path.exists(jar_path) and r.status_code == 200: + time.wait(1) + resource_statuses[jar_filename] = r.status_code # - handle so copy if self.jni_so_copy: for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: - with open(f"{self.to_fuse_dir}/{so_filename}", 'wb') as f: - r = requests.get(f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}') - f.write(r.content) - resource_statuses[so_filename] = r.status_code + so_path = f'{self.to_fuse_dir}/{so_filename}' + r = s.get( + f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}', + stream=True + ) + with open(so_path, 'wb') as f: + for ch in r.iter_content(chunk_size=CHUNK_SIZE): + f.write(ch) + while not os.path.exists(so_path) and r.status_code == 200: + time.wait(1) + resource_statuses[so_filename] = r.status_code # - echo status print(f"::: Install setup complete :::") diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py index 3594c9bca..495a5e50a 100644 --- a/python/test/test_fuse_install.py +++ b/python/test/test_fuse_install.py @@ -3,6 +3,10 @@ class TestFuseInstall(SparkTestCase): + def tearDown(self) -> None: + super.tearDown() + self.installer._temp_dir.cleanup() + def test_setup_no_op(self): installer = FuseInstaller(False, False, jar_copy=False, jni_so_copy=False) try: diff --git a/python/test/test_gdal_install.py b/python/test/test_gdal_install.py index bd617adc1..b67ec65d4 100644 --- a/python/test/test_gdal_install.py +++ b/python/test/test_gdal_install.py @@ -2,6 +2,11 @@ class TestGDALInstall(SparkTestCase): + + def tearDown(self) -> None: + super.tearDown() + self.installer._temp_dir.cleanup() + def test_setup_gdal(self): installer = GDALInstaller() try: diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 72ac8d13e..8cb3dbb9a 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -11,7 +11,7 @@ def __init__( jar_copy = False, jni_so_copy = False ): self._site_packages = working_set.find(Requirement("keplergl")).location - self._temp_dir = tempfile.TemporaryDirectory() + self._temp_dir = tempfile.TemporaryDirectory(delete=False) self.with_mosaic_pip = with_mosaic_pip self.with_gdal = with_gdal self.jar_copy = jar_copy diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index 90e4e3e25..35110bdd0 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -8,7 +8,7 @@ class GDALInstaller: def __init__(self): self._site_packages = working_set.find(Requirement("keplergl")).location - self._temp_dir = tempfile.TemporaryDirectory() + self._temp_dir = tempfile.TemporaryDirectory(delete=False) self.GDAL_INIT_SCRIPT_FILENAME = "mosaic-gdal-init.sh" def __del__(self): diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index 90b097964..b21a84100 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -33,3 +33,6 @@ def setUp(self) -> None: logging.getLogger("pyspark").setLevel(logging.ERROR) logging.getLogger("py4j").setLevel(logging.ERROR) self.spark.sparkContext.setLogLevel("FATAL") + + def tearDown(self) -> None: + return super().tearDown() From 8c99872d1399a1ee785ff62310e4a2d335632541 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 26 Dec 2023 19:16:01 -0500 Subject: [PATCH 026/118] logging silencing. --- python/mosaic/api/fuse.py | 4 - python/test/test_display_handler.py | 3 + python/test/test_fuse_install.py | 85 +++++++++---------- python/test/test_gdal_install.py | 17 ++-- python/test/test_library_handler.py | 3 + python/test/test_mosaic.py | 3 + python/test/test_raster_functions.py | 3 + python/test/test_vector_functions.py | 3 + python/test/utils/mosaic_test_case.py | 4 + .../test/utils/mosaic_test_case_with_gdal.py | 6 +- python/test/utils/spark_test_case.py | 13 +-- 11 files changed, 71 insertions(+), 73 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index ee646a735..1f3b89eaf 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -173,8 +173,6 @@ def configure(self) -> None: with open(jar_path, 'wb') as f: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) - while not os.path.exists(jar_path) and r.status_code == 200: - time.wait(1) resource_statuses[jar_filename] = r.status_code # - handle so copy if self.jni_so_copy: @@ -187,8 +185,6 @@ def configure(self) -> None: with open(so_path, 'wb') as f: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) - while not os.path.exists(so_path) and r.status_code == 200: - time.wait(1) resource_statuses[so_filename] = r.status_code # - echo status diff --git a/python/test/test_display_handler.py b/python/test/test_display_handler.py index 89e69324c..ed1661f12 100644 --- a/python/test/test_display_handler.py +++ b/python/test/test_display_handler.py @@ -4,6 +4,9 @@ class TestDisplayHandler(MosaicTestCase): + def setUp(self) -> None: + return super().setUp() + def test_display(self): df = self.wkt_boroughs() poly_df = df.select(st_makepolygon(st_geomfromwkt("wkt")).alias("polygon_geom")) diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py index 495a5e50a..d6a922e38 100644 --- a/python/test/test_fuse_install.py +++ b/python/test/test_fuse_install.py @@ -2,10 +2,8 @@ class TestFuseInstall(SparkTestCase): - - def tearDown(self) -> None: - super.tearDown() - self.installer._temp_dir.cleanup() + def setUp(self) -> None: + return super().setUp() def test_setup_no_op(self): installer = FuseInstaller(False, False, jar_copy=False, jni_so_copy=False) @@ -20,75 +18,70 @@ def test_setup_jar_only(self): installer = FuseInstaller(False, False, jar_copy=True, jni_so_copy=False) try: installer.do_op() + files = installer.list_files() + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-4:].lower(), '.jar') except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-4:].lower(), '.jar') - def test_setup_sh_pip_only(self): installer = FuseInstaller(True, False, jar_copy=False, jni_so_copy=False) try: installer.do_op() + files = installer.list_files() + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-3:].lower(), '.sh') except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-3:].lower(), '.sh') - def test_setup_sh_gdal(self): installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=False) try: installer.do_op() + files = installer.list_files() + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-3:].lower(), '.sh') except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-3:].lower(), '.sh') - def test_setup_sh_gdal_jni(self): installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=True) try: installer.do_op() + files = installer.list_files() + self.assertEqual(len(files), 4) + + found_sh = False + so_cnt = 0 + for f in files: + if f.lower().endswith('.sh'): + found_sh = True + elif 'libgdalall.jni.so' in f.lower(): + so_cnt += 1 + self.assertTrue(found_sh) + self.assertEqual(so_cnt, 3) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - files = installer.list_files() - self.assertEqual(len(files), 4) - - found_sh = False - so_cnt = 0 - for f in files: - if f.lower().endswith('.sh'): - found_sh = True - elif 'libgdalall.jni.so' in f.lower(): - so_cnt += 1 - self.assertTrue(found_sh) - self.assertEqual(so_cnt, 3) - def test_setup_sh_all(self): installer = FuseInstaller(True, True, jar_copy=True, jni_so_copy=True) try: installer.do_op() + files = installer.list_files() + self.assertEqual(len(files), 5) + + found_sh = False + found_jar = False + so_cnt = 0 + for f in files: + if f.lower().endswith('.sh'): + found_sh = True + elif f.lower().endswith('.jar'): + found_jar = True + elif 'libgdalall.jni.so' in f.lower(): + so_cnt += 1 + self.assertTrue(found_sh) + self.assertTrue(found_jar) + self.assertEqual(so_cnt, 3) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - - files = installer.list_files() - self.assertEqual(len(files), 5) - - found_sh = False - found_jar = False - so_cnt = 0 - for f in files: - if f.lower().endswith('.sh'): - found_sh = True - elif f.lower().endswith('.jar'): - found_jar = True - elif 'libgdalall.jni.so' in f.lower(): - so_cnt += 1 - self.assertTrue(found_sh) - self.assertTrue(found_jar) - self.assertEqual(so_cnt, 3) diff --git a/python/test/test_gdal_install.py b/python/test/test_gdal_install.py index b67ec65d4..d56fe0d4e 100644 --- a/python/test/test_gdal_install.py +++ b/python/test/test_gdal_install.py @@ -2,26 +2,21 @@ class TestGDALInstall(SparkTestCase): - - def tearDown(self) -> None: - super.tearDown() - self.installer._temp_dir.cleanup() + def setUp(self) -> None: + return super().setUp() def test_setup_gdal(self): installer = GDALInstaller() try: installer.copy_objects() + self.assertEqual(len(installer.list_files()), 1) # <- just init script except Exception: self.fail("Copying objects with `setup_gdal()` raised an exception.") try: installer_result = installer.run_init_script() + self.assertEqual(installer_result, 0) + gdalinfo_result = installer.test_gdalinfo() + self.assertEqual(gdalinfo_result, "GDAL 3.4.1, released 2021/12/27\n") except Exception: self.fail("Execution of GDAL init script raised an exception.") - - self.assertEqual(installer_result, 0) - - gdalinfo_result = installer.test_gdalinfo() - self.assertEqual(gdalinfo_result, "GDAL 3.4.1, released 2021/12/27\n") - - self.assertEqual(len(installer.list_files()), 1) # <- just init script \ No newline at end of file diff --git a/python/test/test_library_handler.py b/python/test/test_library_handler.py index 6b782dc41..8c275e8c2 100644 --- a/python/test/test_library_handler.py +++ b/python/test/test_library_handler.py @@ -5,6 +5,9 @@ class TestMosaicLibraryHandler(SparkTestCase): + def setUp(self) -> None: + return super().setUp() + def test_auto_attach_enabled(self): handler = MosaicLibraryHandler(self.spark) self.assertFalse(handler.auto_attach_enabled) diff --git a/python/test/test_mosaic.py b/python/test/test_mosaic.py index 163e8f7fc..149afd06c 100644 --- a/python/test/test_mosaic.py +++ b/python/test/test_mosaic.py @@ -5,6 +5,9 @@ class TestMosaicContext(SparkTestCase): + def setUp(self) -> None: + return super().setUp() + def test_invoke_function(self): _ = MosaicLibraryHandler(self.spark) context = MosaicContext(self.spark) diff --git a/python/test/test_raster_functions.py b/python/test/test_raster_functions.py index cfbcb39ee..f8e09b6f5 100644 --- a/python/test/test_raster_functions.py +++ b/python/test/test_raster_functions.py @@ -5,6 +5,9 @@ class TestRasterFunctions(MosaicTestCaseWithGDAL): + def setUp(self) -> None: + return super().setUp() + def test_read_raster(self): result = self.generate_singleband_raster_df().first() self.assertEqual(result.length, 1067862) diff --git a/python/test/test_vector_functions.py b/python/test/test_vector_functions.py index 2d189caba..67cfc3cf2 100644 --- a/python/test/test_vector_functions.py +++ b/python/test/test_vector_functions.py @@ -7,6 +7,9 @@ class TestVectorFunctions(MosaicTestCase): + def setUp(self) -> None: + return super().setUp() + def test_st_point(self): expected = [ "POINT (0 0)", diff --git a/python/test/utils/mosaic_test_case.py b/python/test/utils/mosaic_test_case.py index f2b9a5693..0d62c4378 100644 --- a/python/test/utils/mosaic_test_case.py +++ b/python/test/utils/mosaic_test_case.py @@ -9,6 +9,10 @@ class MosaicTestCase(SparkTestCase): + + def setUp(self) -> None: + return super.setUp() + @classmethod def setUpClass(cls) -> None: super().setUpClass() diff --git a/python/test/utils/mosaic_test_case_with_gdal.py b/python/test/utils/mosaic_test_case_with_gdal.py index eb2d93c31..cbcf1aa13 100644 --- a/python/test/utils/mosaic_test_case_with_gdal.py +++ b/python/test/utils/mosaic_test_case_with_gdal.py @@ -6,6 +6,9 @@ class MosaicTestCaseWithGDAL(MosaicTestCase): + def setUp(self) -> None: + return super().setUp() + @classmethod def setUpClass(cls) -> None: super().setUpClass() @@ -18,6 +21,3 @@ def generate_singleband_raster_df(self) -> DataFrame: .option("raster.read.strategy", "in_memory") .load("test/data/MCD43A4.A2018185.h10v07.006.2018194033728_B04.TIF") ) - - def setUp(self) -> None: - return super().setUp() diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index b21a84100..6f788b0ed 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -1,7 +1,6 @@ from importlib.metadata import version from pyspark.sql import SparkSession -import logging import mosaic import os import unittest @@ -10,13 +9,13 @@ class SparkTestCase(unittest.TestCase): spark = None library_location = None - @classmethod def setUpClass(cls) -> None: cls.library_location = f"{mosaic.__path__[0]}/lib/mosaic-{version('databricks-mosaic')}-jar-with-dependencies.jar" if not os.path.exists(cls.library_location): cls.library_location = f"{mosaic.__path__[0]}/lib/mosaic-{version('databricks-mosaic')}-SNAPSHOT-jar-with-dependencies.jar" + cls.spark = ( SparkSession.builder.master("local") .config("spark.jars", cls.library_location) @@ -29,10 +28,6 @@ def tearDownClass(cls) -> None: cls.spark.stop() def setUp(self) -> None: - logging.getLogger("log4j").setLevel(logging.ERROR) - logging.getLogger("pyspark").setLevel(logging.ERROR) - logging.getLogger("py4j").setLevel(logging.ERROR) - self.spark.sparkContext.setLogLevel("FATAL") - - def tearDown(self) -> None: - return super().tearDown() + log4j = self.spark.sparkContext._jvm.org.apache.log4j + log4j.LogManager.getRootLogger().setLevel(log4j.Level.FATAL) + return super().setUp() \ No newline at end of file From d2176760c39db2b5392d8952d55fbefb40d11f1c Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 26 Dec 2023 20:07:43 -0500 Subject: [PATCH 027/118] controlling tempdir for fuse install tests. --- python/mosaic/api/fuse.py | 1 - python/test/utils/setup_fuse.py | 13 +++++++------ python/test/utils/setup_gdal.py | 13 +++++++------ python/test/utils/spark_test_case.py | 2 +- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 1f3b89eaf..df15103c5 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -3,7 +3,6 @@ import os import pkg_resources import requests -import time __all__ = ["SetupMgr", "setup_fuse_install"] diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 8cb3dbb9a..06c5a0ca1 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -1,17 +1,18 @@ -import os -import tempfile -import subprocess from pkg_resources import working_set, Requirement - from test.context import api +import os +import shutil +import subprocess +import tempfile + class FuseInstaller: def __init__( self, with_mosaic_pip, with_gdal, jar_copy = False, jni_so_copy = False ): self._site_packages = working_set.find(Requirement("keplergl")).location - self._temp_dir = tempfile.TemporaryDirectory(delete=False) + self._temp_dir = tempfile.mkdtemp() self.with_mosaic_pip = with_mosaic_pip self.with_gdal = with_gdal self.jar_copy = jar_copy @@ -19,7 +20,7 @@ def __init__( self.FUSE_INIT_SCRIPT_FILENAME = "mosaic-fuse-init.sh" def __del__(self): - self._temp_dir.cleanup() + shutil.rmtree(self._temp_dir) def do_op(self): api.setup_fuse_install( diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index 35110bdd0..2ccbf6e87 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -1,18 +1,19 @@ -import os -import tempfile -import subprocess from pkg_resources import working_set, Requirement - from test.context import api +import os +import shutil +import subprocess +import tempfile + class GDALInstaller: def __init__(self): self._site_packages = working_set.find(Requirement("keplergl")).location - self._temp_dir = tempfile.TemporaryDirectory(delete=False) + self._temp_dir = tempfile.mkdtemp() self.GDAL_INIT_SCRIPT_FILENAME = "mosaic-gdal-init.sh" def __del__(self): - self._temp_dir.cleanup() + shutil.rmtree(self._temp_dir) def copy_objects(self): api.setup_gdal( diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index 6f788b0ed..c94138a82 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -29,5 +29,5 @@ def tearDownClass(cls) -> None: def setUp(self) -> None: log4j = self.spark.sparkContext._jvm.org.apache.log4j - log4j.LogManager.getRootLogger().setLevel(log4j.Level.FATAL) + log4j.LogManager.getLogger("log4j.logger.org.apache.spark.api.python.PythonGatewayServer").setLevel(log4j.Level.FATAL) return super().setUp() \ No newline at end of file From 6c9c0d49c82f6ea67fa0fa9ea2976f219823e023 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 26 Dec 2023 20:56:36 -0500 Subject: [PATCH 028/118] temp dir obj now a string. --- python/test/utils/mosaic_test_case.py | 1 - python/test/utils/setup_fuse.py | 6 +++--- python/test/utils/setup_gdal.py | 6 +++--- python/test/utils/spark_test_case.py | 8 ++++---- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/python/test/utils/mosaic_test_case.py b/python/test/utils/mosaic_test_case.py index 0d62c4378..986f9dc79 100644 --- a/python/test/utils/mosaic_test_case.py +++ b/python/test/utils/mosaic_test_case.py @@ -9,7 +9,6 @@ class MosaicTestCase(SparkTestCase): - def setUp(self) -> None: return super.setUp() diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 06c5a0ca1..516a3defb 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -24,7 +24,7 @@ def __del__(self): def do_op(self): api.setup_fuse_install( - self._temp_dir.name, + self._temp_dir, self.with_mosaic_pip, self.with_gdal, jar_copy=self.jar_copy, @@ -35,7 +35,7 @@ def do_op(self): def run_init_script(self): fuse_install_script_target = os.path.join( - self._temp_dir.name, self.FUSE_INIT_SCRIPT_FILENAME + self._temp_dir, self.FUSE_INIT_SCRIPT_FILENAME ) os.chmod(fuse_install_script_target, mode=0x744) result = subprocess.run( @@ -46,4 +46,4 @@ def run_init_script(self): return result.returncode def list_files(self): - return os.listdir(self._temp_dir.name) + return os.listdir(self._temp_dir) diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index 2ccbf6e87..748f939f6 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -17,14 +17,14 @@ def __del__(self): def copy_objects(self): api.setup_gdal( - self._temp_dir.name, + self._temp_dir, override_mosaic_version="main", script_out_name=self.GDAL_INIT_SCRIPT_FILENAME ) def run_init_script(self): gdal_install_script_target = os.path.join( - self._temp_dir.name, self.GDAL_INIT_SCRIPT_FILENAME + self._temp_dir, self.GDAL_INIT_SCRIPT_FILENAME ) os.chmod(gdal_install_script_target, mode=0x744) result = subprocess.run( @@ -35,7 +35,7 @@ def run_init_script(self): return result.returncode def list_files(self): - return os.listdir(self._temp_dir.name) + return os.listdir(self._temp_dir) def test_gdalinfo(self): result = subprocess.run(["gdalinfo", "--version"], stdout=subprocess.PIPE) diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index c94138a82..31532d81a 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -3,6 +3,7 @@ import mosaic import os +import pyspark import unittest class SparkTestCase(unittest.TestCase): @@ -14,8 +15,9 @@ def setUpClass(cls) -> None: cls.library_location = f"{mosaic.__path__[0]}/lib/mosaic-{version('databricks-mosaic')}-jar-with-dependencies.jar" if not os.path.exists(cls.library_location): cls.library_location = f"{mosaic.__path__[0]}/lib/mosaic-{version('databricks-mosaic')}-SNAPSHOT-jar-with-dependencies.jar" - - + + cls.sc=pyspark.SparkContext() + cls.sc.setSystemProperty("spark.log.level", "FATAL") cls.spark = ( SparkSession.builder.master("local") .config("spark.jars", cls.library_location) @@ -28,6 +30,4 @@ def tearDownClass(cls) -> None: cls.spark.stop() def setUp(self) -> None: - log4j = self.spark.sparkContext._jvm.org.apache.log4j - log4j.LogManager.getLogger("log4j.logger.org.apache.spark.api.python.PythonGatewayServer").setLevel(log4j.Level.FATAL) return super().setUp() \ No newline at end of file From 76bcf1ac3fbe10c4f16fdb24ec8548f881849236 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 26 Dec 2023 22:29:39 -0500 Subject: [PATCH 029/118] removing sc conflict. --- python/test/utils/spark_test_case.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index 31532d81a..0706981fe 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -9,25 +9,31 @@ class SparkTestCase(unittest.TestCase): spark = None library_location = None + log4jref = None @classmethod def setUpClass(cls) -> None: cls.library_location = f"{mosaic.__path__[0]}/lib/mosaic-{version('databricks-mosaic')}-jar-with-dependencies.jar" if not os.path.exists(cls.library_location): cls.library_location = f"{mosaic.__path__[0]}/lib/mosaic-{version('databricks-mosaic')}-SNAPSHOT-jar-with-dependencies.jar" - - cls.sc=pyspark.SparkContext() - cls.sc.setSystemProperty("spark.log.level", "FATAL") cls.spark = ( SparkSession.builder.master("local") .config("spark.jars", cls.library_location) .getOrCreate() ) cls.spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") + cls.spark.sparkContext.setLogLevel("FATAL") + cls.log4jref = cls.spark.sparkContext._jvm.org.apache.log4j + cls.log4jref.LogManager.getLogger("org.apache.spark.repl.Main").setLogLevel(cls.log4jref.Level.FATAL) + cls.log4jref.LogManager.getRootLogger().setLogLevel(cls.log4jref.Level.FATAL) + + @classmethod def tearDownClass(cls) -> None: cls.spark.stop() def setUp(self) -> None: - return super().setUp() \ No newline at end of file + self.spark.sparkContext.setLogLevel("FATAL") + self.log4jref.LogManager.getLogger("org.apache.spark.repl.Main").setLogLevel(self.log4jref.Level.FATAL) + self.log4jref.LogManager.getRootLogger().setLogLevel(self.log4jref.Level.FATAL) From 298dce57b2d27aa0b4a77521df745b9b5699a2ec Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 07:56:54 -0500 Subject: [PATCH 030/118] quiet log4j2-defaults.properties --- .github/actions/scala_build/action.yml | 7 ++++++- python/setup.cfg | 6 ++---- python/test/utils/spark_test_case.py | 7 ------- scripts/mosaic-gdal-init.sh | 8 ++++---- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 651232edb..faeb0476b 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -39,7 +39,12 @@ runs: - name: Test and build the scala JAR - skip tests is false if: inputs.skip_tests == 'false' shell: bash - run: sudo mvn -q clean install + run: | + # - quiet log4j2 + sed -i -e 's/info/error/g' org/apache/spark/log4j2-defaults.properties + sed -i -e 's/warn/error/g' org/apache/spark/log4j2-defaults.properties + # - tests + sudo mvn -q clean install - name: Build the scala JAR - skip tests is true if: inputs.skip_tests == 'true' shell: bash diff --git a/python/setup.cfg b/python/setup.cfg index b734e5fd1..06df8facc 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -17,13 +17,11 @@ classifiers = [options] packages = find: python_requires = >=3.10.0 -setup_requires = - pyspark==3.4.1 - ipython>=7.22.0 - install_requires = keplergl==0.3.2 h3==3.7.0 + pyspark<3.5,>=3.4 + ipython>=7.22.0 [options.package_data] mosaic = diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index 0706981fe..bab355a25 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -23,11 +23,6 @@ def setUpClass(cls) -> None: ) cls.spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") cls.spark.sparkContext.setLogLevel("FATAL") - cls.log4jref = cls.spark.sparkContext._jvm.org.apache.log4j - cls.log4jref.LogManager.getLogger("org.apache.spark.repl.Main").setLogLevel(cls.log4jref.Level.FATAL) - cls.log4jref.LogManager.getRootLogger().setLogLevel(cls.log4jref.Level.FATAL) - - @classmethod def tearDownClass(cls) -> None: @@ -35,5 +30,3 @@ def tearDownClass(cls) -> None: def setUp(self) -> None: self.spark.sparkContext.setLogLevel("FATAL") - self.log4jref.LogManager.getLogger("org.apache.spark.repl.Main").setLogLevel(self.log4jref.Level.FATAL) - self.log4jref.LogManager.getRootLogger().setLogLevel(self.log4jref.Level.FATAL) diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index dcc56ae47..9c8647722 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -11,7 +11,7 @@ # - setup_gdal(...) # [4] this script has conditional logic based on variables # Author: Michael Johns | mjohns@databricks.com -# Last Modified: 20 DEC, 2023 +# Last Modified: 27 DEC, 2023 # TEMPLATE-BASED REPLACEMENT # - can also be manually specified @@ -74,8 +74,8 @@ then # copy from github GITHUB_REPO_PATH=databrickslabs/mosaic/main/resources/gdal/jammy - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 fi fi From 1c898241d0550cd71eaa42811fbd45e3e24f789e Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 08:07:37 -0500 Subject: [PATCH 031/118] update location of log4j2.properties --- .github/actions/scala_build/action.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index faeb0476b..5f2eb3f3f 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -41,8 +41,9 @@ runs: shell: bash run: | # - quiet log4j2 - sed -i -e 's/info/error/g' org/apache/spark/log4j2-defaults.properties - sed -i -e 's/warn/error/g' org/apache/spark/log4j2-defaults.properties + cp org/apache/spark/conf/log4j2.properties.template org/apache/spark/conf/log4j2.properties + sed -i -e 's/info/error/g' org/apache/spark/conf/log4j2.properties + sed -i -e 's/warn/error/g' org/apache/spark/conf/log4j2.properties # - tests sudo mvn -q clean install - name: Build the scala JAR - skip tests is true From 9cb3ac07409422bc1f5d5a5a092cfbed184ae9fc Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 08:27:35 -0500 Subject: [PATCH 032/118] find and resolve spark config path. --- .github/actions/scala_build/action.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 5f2eb3f3f..e9dceacd9 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -33,17 +33,19 @@ runs: sudo apt-get install -y gdal-bin libgdal-dev python3-gdal # - install gdal with numpy pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}' - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 - sudo wget -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 + sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 - name: Test and build the scala JAR - skip tests is false if: inputs.skip_tests == 'false' shell: bash run: | # - quiet log4j2 - cp org/apache/spark/conf/log4j2.properties.template org/apache/spark/conf/log4j2.properties - sed -i -e 's/info/error/g' org/apache/spark/conf/log4j2.properties - sed -i -e 's/warn/error/g' org/apache/spark/conf/log4j2.properties + SPARK_LOG4J2_TEMPLATE=$(readlink -f $(find . -name "log4j2.properties.template")) + SPARK_CONFIG_DIR=$(dirname "$SPARK_LOG4J2_TEMPLATE") + cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties + sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties + sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties # - tests sudo mvn -q clean install - name: Build the scala JAR - skip tests is true From d85cf25d6e798ec0dbae5aa8c98771b2a2c38d6b Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 08:41:37 -0500 Subject: [PATCH 033/118] using realpath instead of readlink. --- .github/actions/scala_build/action.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index e9dceacd9..056451b0f 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -41,8 +41,9 @@ runs: shell: bash run: | # - quiet log4j2 - SPARK_LOG4J2_TEMPLATE=$(readlink -f $(find . -name "log4j2.properties.template")) + SPARK_LOG4J2_TEMPLATE=$(realpath $(find . -name "log4j2.properties.template")) SPARK_CONFIG_DIR=$(dirname "$SPARK_LOG4J2_TEMPLATE") + echo "...configuring $SPARK_CONFIG_DIR/log4j2.properties" cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties From f1ec369597f400f16fd18573307b4accea07e9d1 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 08:58:39 -0500 Subject: [PATCH 034/118] separate mvn clean from install for logging. --- .github/actions/scala_build/action.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 056451b0f..c8b5d8c43 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -40,6 +40,8 @@ runs: if: inputs.skip_tests == 'false' shell: bash run: | + # - + sudo mvn -q clean test-compile # - quiet log4j2 SPARK_LOG4J2_TEMPLATE=$(realpath $(find . -name "log4j2.properties.template")) SPARK_CONFIG_DIR=$(dirname "$SPARK_LOG4J2_TEMPLATE") @@ -48,7 +50,7 @@ runs: sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties # - tests - sudo mvn -q clean install + sudo mvn -q install - name: Build the scala JAR - skip tests is true if: inputs.skip_tests == 'true' shell: bash From 2afab8840d89eb8c5dbd63bbdd86ee354262d018 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 09:00:16 -0500 Subject: [PATCH 035/118] comments. --- .github/actions/scala_build/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index c8b5d8c43..f791e8962 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -40,7 +40,7 @@ runs: if: inputs.skip_tests == 'false' shell: bash run: | - # - + # - setup tests sudo mvn -q clean test-compile # - quiet log4j2 SPARK_LOG4J2_TEMPLATE=$(realpath $(find . -name "log4j2.properties.template")) @@ -49,7 +49,7 @@ runs: cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties - # - tests + # - run tests sudo mvn -q install - name: Build the scala JAR - skip tests is true if: inputs.skip_tests == 'true' From e27602d29d19fac6a4f00d1a4932710482b8e19d Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 10:02:04 -0500 Subject: [PATCH 036/118] install always for scala. --- .github/actions/scala_build/action.yml | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index f791e8962..6d451aaf8 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -37,24 +37,19 @@ runs: sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 - name: Test and build the scala JAR - skip tests is false - if: inputs.skip_tests == 'false' shell: bash run: | - # - setup tests - sudo mvn -q clean test-compile + sudo mvn -q clean install -DskipTests -Dscoverage.skip # - quiet log4j2 SPARK_LOG4J2_TEMPLATE=$(realpath $(find . -name "log4j2.properties.template")) SPARK_CONFIG_DIR=$(dirname "$SPARK_LOG4J2_TEMPLATE") echo "...configuring $SPARK_CONFIG_DIR/log4j2.properties" - cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties - sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties - sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties - # - run tests - sudo mvn -q install - - name: Build the scala JAR - skip tests is true - if: inputs.skip_tests == 'true' + sudo cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties + sudo sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties + sudo sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties + if: inputs.skip_tests == 'false' shell: bash - run: sudo mvn -q clean install -DskipTests -Dscoverage.skip + run: sudo mvn -q install - name: Publish test coverage if: inputs.skip_tests == 'false' uses: codecov/codecov-action@v1 From 9b8c1207dc77e699e65617e59e12b0ff51af3706 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 10:08:08 -0500 Subject: [PATCH 037/118] always modify log4j2.properties. --- .github/actions/scala_build/action.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 6d451aaf8..87845a62f 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -36,9 +36,11 @@ runs: sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 - - name: Test and build the scala JAR - skip tests is false + - name: Test and build the scala JAR - skip tests is false + if: inputs.skip_tests == 'false' shell: bash run: | + # - clean and install (no tests) sudo mvn -q clean install -DskipTests -Dscoverage.skip # - quiet log4j2 SPARK_LOG4J2_TEMPLATE=$(realpath $(find . -name "log4j2.properties.template")) @@ -47,9 +49,21 @@ runs: sudo cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties sudo sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties sudo sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties - if: inputs.skip_tests == 'false' + # - install with tests + sudo mvn -q install + - name: Build the scala JAR - skip tests is true + if: inputs.skip_tests == 'true' shell: bash - run: sudo mvn -q install + run: | + # - clean and install (no tests) + sudo mvn -q clean install -DskipTests -Dscoverage.skip + # - quiet log4j2 + SPARK_LOG4J2_TEMPLATE=$(realpath $(find . -name "log4j2.properties.template")) + SPARK_CONFIG_DIR=$(dirname "$SPARK_LOG4J2_TEMPLATE") + echo "...configuring $SPARK_CONFIG_DIR/log4j2.properties" + sudo cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties + sudo sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties + sudo sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties - name: Publish test coverage if: inputs.skip_tests == 'false' uses: codecov/codecov-action@v1 From ba0bd4bb4757ac9fad9085a12d983cd5f5d3ae4e Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 10:11:42 -0500 Subject: [PATCH 038/118] fix yml. --- .github/actions/scala_build/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 87845a62f..34cfb4601 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -36,7 +36,7 @@ runs: sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 - - name: Test and build the scala JAR - skip tests is false + - name: Test and build the scala JAR - skip tests is false if: inputs.skip_tests == 'false' shell: bash run: | From 4c824e70e54b73dd308748ed6433c8c91a2a0778 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 10:27:33 -0500 Subject: [PATCH 039/118] remove attempted logging changes. --- .github/actions/scala_build/action.yml | 24 ++---------------------- python/test/utils/spark_test_case.py | 1 - 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 34cfb4601..5ced8b753 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -39,31 +39,11 @@ runs: - name: Test and build the scala JAR - skip tests is false if: inputs.skip_tests == 'false' shell: bash - run: | - # - clean and install (no tests) - sudo mvn -q clean install -DskipTests -Dscoverage.skip - # - quiet log4j2 - SPARK_LOG4J2_TEMPLATE=$(realpath $(find . -name "log4j2.properties.template")) - SPARK_CONFIG_DIR=$(dirname "$SPARK_LOG4J2_TEMPLATE") - echo "...configuring $SPARK_CONFIG_DIR/log4j2.properties" - sudo cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties - sudo sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties - sudo sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties - # - install with tests - sudo mvn -q install + run: sudo mvn -q clean install - name: Build the scala JAR - skip tests is true if: inputs.skip_tests == 'true' shell: bash - run: | - # - clean and install (no tests) - sudo mvn -q clean install -DskipTests -Dscoverage.skip - # - quiet log4j2 - SPARK_LOG4J2_TEMPLATE=$(realpath $(find . -name "log4j2.properties.template")) - SPARK_CONFIG_DIR=$(dirname "$SPARK_LOG4J2_TEMPLATE") - echo "...configuring $SPARK_CONFIG_DIR/log4j2.properties" - sudo cp $SPARK_LOG4J2_TEMPLATE $SPARK_CONFIG_DIR/log4j2.properties - sudo sed -i -e 's/info/error/g' $SPARK_CONFIG_DIR/log4j2.properties - sudo sed -i -e 's/warn/error/g' $SPARK_CONFIG_DIR/log4j2.properties + run: sudo mvn -q clean install -DskipTests -Dscoverage.skip - name: Publish test coverage if: inputs.skip_tests == 'false' uses: codecov/codecov-action@v1 diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index bab355a25..1bd92e08b 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -3,7 +3,6 @@ import mosaic import os -import pyspark import unittest class SparkTestCase(unittest.TestCase): From 3e1546c3cc3efe473b8ea60bf6d23c2aca2884fa Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 11:57:34 -0500 Subject: [PATCH 040/118] Add return to setup fuse functions. --- python/mosaic/api/fuse.py | 55 +++++++++++++++---- python/mosaic/api/gdal.py | 6 +-- python/test/test_fuse_install.py | 91 +++++++++++++++++--------------- python/test/test_gdal_install.py | 5 +- python/test/utils/setup_fuse.py | 8 +-- python/test/utils/setup_gdal.py | 10 ++-- 6 files changed, 107 insertions(+), 68 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index df15103c5..bb9616668 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -1,8 +1,9 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field import os import pkg_resources import requests +import time __all__ = ["SetupMgr", "setup_fuse_install"] @@ -35,12 +36,20 @@ class SetupMgr: override_mosaic_version: str = None jar_copy: bool = False jni_so_copy: bool = False + session = field(init=False) - def configure(self) -> None: + def __post_init__(self): + self.session = requests.Session() + + def __del__(self): + self.session.close() + + def configure(self) -> bool: """ Handle various config options. - if `with_mosaic_pip` or `with_gdal` or `with_ubuntugis`, script will be configured and written. + Returns True unless resources fail to download. """ # - set the mosaic and github versions # will be used in downloading resources @@ -78,7 +87,7 @@ def configure(self) -> None: # TODO: MODIFY AFTER PR MERGE # script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' script_url = f'https://raw.githubusercontent.com/mjohns-databricks/mosaic/gdal-jammy-3/scripts/{self.script_in_name}' - script = requests.get(script_url, allow_redirects=True).text + script = self.session.get(script_url, allow_redirects=True).text # - tokens used in script SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added @@ -151,21 +160,22 @@ def configure(self) -> None: with_resources = self.jar_copy or self.jni_so_copy resource_statuses = {} + jar_download_status = False + so_download_status = False if with_resources: CHUNK_SIZE = 1024 * 1024 * 64 # 64MB - s = requests.Session() # - handle jar copy if self.jar_copy: # url and version details GITHUB_RELEASE_URL_BASE = 'https://github.com/databrickslabs/mosaic/releases' resource_version = github_version if github_version == 'main': - latest = str(s.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) + latest = str(self.session.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' jar_path = f'{self.to_fuse_dir}/{jar_filename}' - r = s.get( + r = self.session.get( f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}', stream=True ) @@ -173,11 +183,14 @@ def configure(self) -> None: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) resource_statuses[jar_filename] = r.status_code + jar_download_status = True + else: + jar_download_status = True # - handle so copy if self.jni_so_copy: for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: so_path = f'{self.to_fuse_dir}/{so_filename}' - r = s.get( + r = self.session.get( f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}', stream=True ) @@ -185,7 +198,19 @@ def configure(self) -> None: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) resource_statuses[so_filename] = r.status_code - + so_download_status = True + else: + so_download_status = True + else: + jar_download_status = True + so_download_status = True + + while ( + not jar_download_status or + not so_download_status + ): + time.sleep(1.0) + # - echo status print(f"::: Install setup complete :::") print(f"- Settings: 'with_mosaic_pip'? {self.with_mosaic_pip}, 'with_gdal'? {self.with_gdal}, 'with_ubuntugis'? {self.with_ubuntugis}") @@ -200,12 +225,20 @@ def configure(self) -> None: print(f"- Resource(s): copied") print(resource_statuses) print("\n") + + if ( + not any(resource_statuses) or + all(value == 200 for value in resource_statuses.values()) + ): + return True + else: + return False def setup_fuse_install( to_fuse_dir: str, with_mosaic_pip: bool, with_gdal: bool, with_ubuntugis: bool = False, script_out_name: str = 'mosaic-fuse-init.sh', override_mosaic_version: str = None, jar_copy: bool = True, jni_so_copy: bool = True -) -> None: +) -> bool: """ [1] Copies Mosaic "fat" JAR (with dependencies) into `to_fuse_dir` - by default, version will match the current mosaic version executing the command, @@ -257,7 +290,7 @@ def setup_fuse_install( jni_so_copy: bool Whether to copy the GDAL JNI shared objects; default is True. - Returns + Returns True unless resources fail to download. ------- """ setup_mgr = SetupMgr( @@ -270,4 +303,4 @@ def setup_fuse_install( jar_copy = jar_copy, jni_so_copy = jni_so_copy ) - setup_mgr.configure() + return setup_mgr.configure() diff --git a/python/mosaic/api/gdal.py b/python/mosaic/api/gdal.py index 024002e89..be620ef71 100644 --- a/python/mosaic/api/gdal.py +++ b/python/mosaic/api/gdal.py @@ -11,7 +11,7 @@ def setup_gdal( with_mosaic_pip: bool = False, with_ubuntugis: bool = False, script_out_name: str = 'mosaic-gdal-init.sh', override_mosaic_version: str = None -) -> None: +) -> bool: """ Prepare GDAL init script and shared objects required for GDAL to run on spark. This function will generate the init script that will install GDAL on each worker node. @@ -43,7 +43,7 @@ def setup_gdal( e.g. '==0.4.0' or '<0.5,>=0.4'; default is None. - Returns + Returns True unless resources fail to download. ------- """ setup_mgr = SetupMgr( @@ -53,7 +53,7 @@ def setup_gdal( script_out_name = script_out_name, override_mosaic_version = override_mosaic_version, ) - setup_mgr.configure() + return setup_mgr.configure() def enable_gdal(spark: SparkSession) -> None: diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py index d6a922e38..6aa6831e6 100644 --- a/python/test/test_fuse_install.py +++ b/python/test/test_fuse_install.py @@ -8,7 +8,7 @@ def setUp(self) -> None: def test_setup_no_op(self): installer = FuseInstaller(False, False, jar_copy=False, jni_so_copy=False) try: - installer.do_op() + self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") @@ -17,71 +17,76 @@ def test_setup_no_op(self): def test_setup_jar_only(self): installer = FuseInstaller(False, False, jar_copy=True, jni_so_copy=False) try: - installer.do_op() - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-4:].lower(), '.jar') + self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") + + files = installer.list_files() + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-4:].lower(), '.jar') def test_setup_sh_pip_only(self): installer = FuseInstaller(True, False, jar_copy=False, jni_so_copy=False) try: - installer.do_op() - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-3:].lower(), '.sh') + self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") + + files = installer.list_files() + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-3:].lower(), '.sh') def test_setup_sh_gdal(self): installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=False) try: - installer.do_op() - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-3:].lower(), '.sh') + self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") + + files = installer.list_files() + self.assertEqual(len(files), 1) + self.assertEqual(files[0][-3:].lower(), '.sh') def test_setup_sh_gdal_jni(self): installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=True) try: - installer.do_op() - files = installer.list_files() - self.assertEqual(len(files), 4) - - found_sh = False - so_cnt = 0 - for f in files: - if f.lower().endswith('.sh'): - found_sh = True - elif 'libgdalall.jni.so' in f.lower(): - so_cnt += 1 - self.assertTrue(found_sh) - self.assertEqual(so_cnt, 3) + self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") + + files = installer.list_files() + self.assertEqual(len(files), 4) + + found_sh = False + so_cnt = 0 + for f in files: + if f.lower().endswith('.sh'): + found_sh = True + elif 'libgdalall.jni.so' in f.lower(): + so_cnt += 1 + self.assertTrue(found_sh) + self.assertEqual(so_cnt, 3) def test_setup_sh_all(self): installer = FuseInstaller(True, True, jar_copy=True, jni_so_copy=True) try: - installer.do_op() - files = installer.list_files() - self.assertEqual(len(files), 5) - - found_sh = False - found_jar = False - so_cnt = 0 - for f in files: - if f.lower().endswith('.sh'): - found_sh = True - elif f.lower().endswith('.jar'): - found_jar = True - elif 'libgdalall.jni.so' in f.lower(): - so_cnt += 1 - self.assertTrue(found_sh) - self.assertTrue(found_jar) - self.assertEqual(so_cnt, 3) + self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") + + files = installer.list_files() + self.assertEqual(len(files), 5) + + found_sh = False + found_jar = False + so_cnt = 0 + for f in files: + if f.lower().endswith('.sh'): + found_sh = True + elif f.lower().endswith('.jar'): + found_jar = True + elif 'libgdalall.jni.so' in f.lower(): + so_cnt += 1 + self.assertTrue(found_sh) + self.assertTrue(found_jar) + self.assertEqual(so_cnt, 3) diff --git a/python/test/test_gdal_install.py b/python/test/test_gdal_install.py index d56fe0d4e..340b96841 100644 --- a/python/test/test_gdal_install.py +++ b/python/test/test_gdal_install.py @@ -8,10 +8,11 @@ def setUp(self) -> None: def test_setup_gdal(self): installer = GDALInstaller() try: - installer.copy_objects() - self.assertEqual(len(installer.list_files()), 1) # <- just init script + self.assertTrue(installer.do_op()) except Exception: self.fail("Copying objects with `setup_gdal()` raised an exception.") + + self.assertEqual(len(installer.list_files()), 1) # <- just init script try: installer_result = installer.run_init_script() diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 516a3defb..4a57b6380 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -22,8 +22,8 @@ def __init__( def __del__(self): shutil.rmtree(self._temp_dir) - def do_op(self): - api.setup_fuse_install( + def do_op(self) -> bool: + return api.setup_fuse_install( self._temp_dir, self.with_mosaic_pip, self.with_gdal, @@ -33,7 +33,7 @@ def do_op(self): script_out_name=self.FUSE_INIT_SCRIPT_FILENAME ) - def run_init_script(self): + def run_init_script(self) -> int: fuse_install_script_target = os.path.join( self._temp_dir, self.FUSE_INIT_SCRIPT_FILENAME ) @@ -45,5 +45,5 @@ def run_init_script(self): ) return result.returncode - def list_files(self): + def list_files(self) -> list[str]: return os.listdir(self._temp_dir) diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index 748f939f6..d700d3b60 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -15,14 +15,14 @@ def __init__(self): def __del__(self): shutil.rmtree(self._temp_dir) - def copy_objects(self): - api.setup_gdal( + def do_op(self) -> bool: + return api.setup_gdal( self._temp_dir, override_mosaic_version="main", script_out_name=self.GDAL_INIT_SCRIPT_FILENAME ) - def run_init_script(self): + def run_init_script(self) -> int: gdal_install_script_target = os.path.join( self._temp_dir, self.GDAL_INIT_SCRIPT_FILENAME ) @@ -34,9 +34,9 @@ def run_init_script(self): ) return result.returncode - def list_files(self): + def list_files(self) -> list[str]: return os.listdir(self._temp_dir) - def test_gdalinfo(self): + def test_gdalinfo(self) -> str: result = subprocess.run(["gdalinfo", "--version"], stdout=subprocess.PIPE) return result.stdout.decode() From 6f1994f46bc294f17152c4a1b4aa35f2b0a0c557 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 12:29:12 -0500 Subject: [PATCH 041/118] modify requests session handling. --- python/mosaic/api/fuse.py | 14 +++++++------- python/test/utils/spark_test_case.py | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index bb9616668..01f29f4bf 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -36,13 +36,13 @@ class SetupMgr: override_mosaic_version: str = None jar_copy: bool = False jni_so_copy: bool = False - session = field(init=False) + _session:requests.Session = field(init=False) def __post_init__(self): - self.session = requests.Session() + self._session = requests.Session() def __del__(self): - self.session.close() + self._session.close() def configure(self) -> bool: """ @@ -87,7 +87,7 @@ def configure(self) -> bool: # TODO: MODIFY AFTER PR MERGE # script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' script_url = f'https://raw.githubusercontent.com/mjohns-databricks/mosaic/gdal-jammy-3/scripts/{self.script_in_name}' - script = self.session.get(script_url, allow_redirects=True).text + script = self._session.get(script_url, allow_redirects=True).text # - tokens used in script SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added @@ -170,12 +170,12 @@ def configure(self) -> bool: GITHUB_RELEASE_URL_BASE = 'https://github.com/databrickslabs/mosaic/releases' resource_version = github_version if github_version == 'main': - latest = str(self.session.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) + latest = str(self._session.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' jar_path = f'{self.to_fuse_dir}/{jar_filename}' - r = self.session.get( + r = self._session.get( f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}', stream=True ) @@ -190,7 +190,7 @@ def configure(self) -> bool: if self.jni_so_copy: for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: so_path = f'{self.to_fuse_dir}/{so_filename}' - r = self.session.get( + r = self._session.get( f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}', stream=True ) diff --git a/python/test/utils/spark_test_case.py b/python/test/utils/spark_test_case.py index 1bd92e08b..98d2743bf 100644 --- a/python/test/utils/spark_test_case.py +++ b/python/test/utils/spark_test_case.py @@ -18,6 +18,8 @@ def setUpClass(cls) -> None: cls.spark = ( SparkSession.builder.master("local") .config("spark.jars", cls.library_location) + .config("spark.driver.extraJavaOptions", "-Dorg.apache.logging.log4j.level=FATAL") + .config("spark.executor.extraJavaOptions", "-Dorg.apache.logging.log4j.level=FATAL") .getOrCreate() ) cls.spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") From e06e202e1239bce278d99bbaef2c9e478e1269b3 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 13:01:09 -0500 Subject: [PATCH 042/118] removed test JAR / SO downloads. --- python/mosaic/api/fuse.py | 32 +++------------------ python/test/test_fuse_install.py | 48 ++++---------------------------- 2 files changed, 9 insertions(+), 71 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 01f29f4bf..81e9e5749 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -36,13 +36,6 @@ class SetupMgr: override_mosaic_version: str = None jar_copy: bool = False jni_so_copy: bool = False - _session:requests.Session = field(init=False) - - def __post_init__(self): - self._session = requests.Session() - - def __del__(self): - self._session.close() def configure(self) -> bool: """ @@ -87,7 +80,7 @@ def configure(self) -> bool: # TODO: MODIFY AFTER PR MERGE # script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' script_url = f'https://raw.githubusercontent.com/mjohns-databricks/mosaic/gdal-jammy-3/scripts/{self.script_in_name}' - script = self._session.get(script_url, allow_redirects=True).text + script = requests.get(script_url, allow_redirects=True).text # - tokens used in script SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added @@ -160,8 +153,6 @@ def configure(self) -> bool: with_resources = self.jar_copy or self.jni_so_copy resource_statuses = {} - jar_download_status = False - so_download_status = False if with_resources: CHUNK_SIZE = 1024 * 1024 * 64 # 64MB # - handle jar copy @@ -170,12 +161,12 @@ def configure(self) -> bool: GITHUB_RELEASE_URL_BASE = 'https://github.com/databrickslabs/mosaic/releases' resource_version = github_version if github_version == 'main': - latest = str(self._session.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) + latest = str(requests.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' jar_path = f'{self.to_fuse_dir}/{jar_filename}' - r = self._session.get( + r = requests.get( f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}', stream=True ) @@ -183,14 +174,11 @@ def configure(self) -> bool: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) resource_statuses[jar_filename] = r.status_code - jar_download_status = True - else: - jar_download_status = True # - handle so copy if self.jni_so_copy: for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: so_path = f'{self.to_fuse_dir}/{so_filename}' - r = self._session.get( + r = requests.get( f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}', stream=True ) @@ -198,18 +186,6 @@ def configure(self) -> bool: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) resource_statuses[so_filename] = r.status_code - so_download_status = True - else: - so_download_status = True - else: - jar_download_status = True - so_download_status = True - - while ( - not jar_download_status or - not so_download_status - ): - time.sleep(1.0) # - echo status print(f"::: Install setup complete :::") diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py index 6aa6831e6..7c2208b18 100644 --- a/python/test/test_fuse_install.py +++ b/python/test/test_fuse_install.py @@ -11,8 +11,8 @@ def test_setup_no_op(self): self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - - self.assertEqual(len(installer.list_files()), 0) + + self.assertEqual(len(installer.list_files()), 0) # <- nothing generated def test_setup_jar_only(self): installer = FuseInstaller(False, False, jar_copy=True, jni_so_copy=False) @@ -20,10 +20,6 @@ def test_setup_jar_only(self): self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-4:].lower(), '.jar') def test_setup_sh_pip_only(self): installer = FuseInstaller(True, False, jar_copy=False, jni_so_copy=False) @@ -31,10 +27,8 @@ def test_setup_sh_pip_only(self): self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-3:].lower(), '.sh') + + self.assertEqual(len(installer.list_files()), 1) # <- just init script def test_setup_sh_gdal(self): installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=False) @@ -43,9 +37,7 @@ def test_setup_sh_gdal(self): except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - files = installer.list_files() - self.assertEqual(len(files), 1) - self.assertEqual(files[0][-3:].lower(), '.sh') + self.assertEqual(len(installer.list_files()), 1) # <- just init script def test_setup_sh_gdal_jni(self): installer = FuseInstaller(False, True, jar_copy=False, jni_so_copy=True) @@ -53,19 +45,6 @@ def test_setup_sh_gdal_jni(self): self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - - files = installer.list_files() - self.assertEqual(len(files), 4) - - found_sh = False - so_cnt = 0 - for f in files: - if f.lower().endswith('.sh'): - found_sh = True - elif 'libgdalall.jni.so' in f.lower(): - so_cnt += 1 - self.assertTrue(found_sh) - self.assertEqual(so_cnt, 3) def test_setup_sh_all(self): installer = FuseInstaller(True, True, jar_copy=True, jni_so_copy=True) @@ -73,20 +52,3 @@ def test_setup_sh_all(self): self.assertTrue(installer.do_op()) except Exception: self.fail("Executing `setup_fuse_install()` raised an exception.") - - files = installer.list_files() - self.assertEqual(len(files), 5) - - found_sh = False - found_jar = False - so_cnt = 0 - for f in files: - if f.lower().endswith('.sh'): - found_sh = True - elif f.lower().endswith('.jar'): - found_jar = True - elif 'libgdalall.jni.so' in f.lower(): - so_cnt += 1 - self.assertTrue(found_sh) - self.assertTrue(found_jar) - self.assertEqual(so_cnt, 3) From b91b6cf4c0d00e5f0d4e92e9dd884de17e9f5bba Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 13:41:50 -0500 Subject: [PATCH 043/118] wrapping requests in `with` command. --- python/mosaic/api/fuse.py | 43 ++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 81e9e5749..5ce38e6c6 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -3,7 +3,6 @@ import os import pkg_resources import requests -import time __all__ = ["SetupMgr", "setup_fuse_install"] @@ -80,7 +79,9 @@ def configure(self) -> bool: # TODO: MODIFY AFTER PR MERGE # script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' script_url = f'https://raw.githubusercontent.com/mjohns-databricks/mosaic/gdal-jammy-3/scripts/{self.script_in_name}' - script = requests.get(script_url, allow_redirects=True).text + script = None + with requests.Session() as s: + script = s.get(script_url, allow_redirects=True).text # - tokens used in script SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added @@ -161,31 +162,35 @@ def configure(self) -> bool: GITHUB_RELEASE_URL_BASE = 'https://github.com/databrickslabs/mosaic/releases' resource_version = github_version if github_version == 'main': - latest = str(requests.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) + latest = None + with requests.Session() as s: + latest = str(s.get(f'{GITHUB_RELEASE_URL_BASE}/latest', allow_redirects=True).content) resource_version = latest.split("/tag/v_")[1].split('"')[0] # download jar jar_filename = f'mosaic-{resource_version}-jar-with-dependencies.jar' jar_path = f'{self.to_fuse_dir}/{jar_filename}' - r = requests.get( - f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}', - stream=True - ) - with open(jar_path, 'wb') as f: - for ch in r.iter_content(chunk_size=CHUNK_SIZE): - f.write(ch) - resource_statuses[jar_filename] = r.status_code + with requests.Session() as s: + r = s.get( + f'{GITHUB_RELEASE_URL_BASE}/download/v_{resource_version}/{jar_filename}', + stream=True + ) + with open(jar_path, 'wb') as f: + for ch in r.iter_content(chunk_size=CHUNK_SIZE): + f.write(ch) + resource_statuses[jar_filename] = r.status_code # - handle so copy if self.jni_so_copy: for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: so_path = f'{self.to_fuse_dir}/{so_filename}' - r = requests.get( - f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}', - stream=True - ) - with open(so_path, 'wb') as f: - for ch in r.iter_content(chunk_size=CHUNK_SIZE): - f.write(ch) - resource_statuses[so_filename] = r.status_code + with requests.Session() as s: + r = s.get( + f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}', + stream=True + ) + with open(so_path, 'wb') as f: + for ch in r.iter_content(chunk_size=CHUNK_SIZE): + f.write(ch) + resource_statuses[so_filename] = r.status_code # - echo status print(f"::: Install setup complete :::") From 3d62272510057a4deeff910ba4601ad144c4b3d1 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 27 Dec 2023 14:10:07 -0500 Subject: [PATCH 044/118] closing requests sessions. --- python/mosaic/api/fuse.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 5ce38e6c6..bb9b0a10f 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass import os import pkg_resources @@ -82,6 +82,7 @@ def configure(self) -> bool: script = None with requests.Session() as s: script = s.get(script_url, allow_redirects=True).text + s.close() # - tokens used in script SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added @@ -178,11 +179,12 @@ def configure(self) -> bool: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) resource_statuses[jar_filename] = r.status_code + s.close() # - handle so copy if self.jni_so_copy: - for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: - so_path = f'{self.to_fuse_dir}/{so_filename}' - with requests.Session() as s: + with requests.Session() as s: + for so_filename in ['libgdalalljni.so', 'libgdalalljni.so.30', 'libgdalalljni.so.30.0.3']: + so_path = f'{self.to_fuse_dir}/{so_filename}' r = s.get( f'{GITHUB_CONTENT_TAG_URL}/resources/gdal/jammy/{so_filename}', stream=True @@ -191,6 +193,7 @@ def configure(self) -> bool: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) resource_statuses[so_filename] = r.status_code + s.close() # - echo status print(f"::: Install setup complete :::") From b0edb58ea8844d6d27fae1db122063097eef464f Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 2 Jan 2024 13:22:10 -0500 Subject: [PATCH 045/118] update mosaic version, Workspace file as default. --- python/mosaic/__init__.py | 2 +- python/mosaic/api/fuse.py | 13 +++++-------- python/mosaic/api/gdal.py | 4 ++-- python/setup.cfg | 2 +- python/test/utils/setup_fuse.py | 2 +- python/test/utils/setup_gdal.py | 2 +- scripts/mosaic-gdal-init.sh | 11 +++++++---- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/python/mosaic/__init__.py b/python/mosaic/__init__.py index fd0d83624..23287839e 100644 --- a/python/mosaic/__init__.py +++ b/python/mosaic/__init__.py @@ -4,4 +4,4 @@ from .models import SpatialKNN from .readers import read -__version__ = "0.3.14" +__version__ = "0.4.0" diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index bb9b0a10f..77c5ba033 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -52,15 +52,15 @@ def configure(self) -> bool: release_version = None if ( - self.override_mosaic_version is not None and - set(self.override_mosaic_version).issubset(set('=0123456789.')) - ): - github_version = self.override_mosaic_version.replace('=','') - elif ( self.override_mosaic_version is not None and self.override_mosaic_version == 'main' ): github_version = 'main' + elif ( + self.override_mosaic_version is not None and + set(self.override_mosaic_version).issubset(set('=0123456789.')) + ): + github_version = self.override_mosaic_version.replace('=','') elif mosaic_version is None: github_version = 'main' @@ -82,7 +82,6 @@ def configure(self) -> bool: script = None with requests.Session() as s: script = s.get(script_url, allow_redirects=True).text - s.close() # - tokens used in script SCRIPT_FUSE_DIR_TOKEN= "FUSE_DIR='__FUSE_DIR__'" # <- ' added @@ -179,7 +178,6 @@ def configure(self) -> bool: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) resource_statuses[jar_filename] = r.status_code - s.close() # - handle so copy if self.jni_so_copy: with requests.Session() as s: @@ -193,7 +191,6 @@ def configure(self) -> bool: for ch in r.iter_content(chunk_size=CHUNK_SIZE): f.write(ch) resource_statuses[so_filename] = r.status_code - s.close() # - echo status print(f"::: Install setup complete :::") diff --git a/python/mosaic/api/gdal.py b/python/mosaic/api/gdal.py index be620ef71..9a44c446b 100644 --- a/python/mosaic/api/gdal.py +++ b/python/mosaic/api/gdal.py @@ -7,7 +7,7 @@ def setup_gdal( - to_fuse_dir: str = '/dbfs/FileStore/geospatial/mosaic/gdal/jammy', + to_fuse_dir: str = '/Workspace/Shared/geospatial/mosaic/gdal/jammy', with_mosaic_pip: bool = False, with_ubuntugis: bool = False, script_out_name: str = 'mosaic-gdal-init.sh', override_mosaic_version: str = None @@ -28,7 +28,7 @@ def setup_gdal( ---------- to_fuse_dir : str Path to write out the init script for GDAL installation; - default is '/dbfs/FileStore/geospatial/mosaic/gdal/jammy'. + default is '/Workspace/Shared/geospatial/mosaic/gdal/jammy'. with_mosaic_pip : bool Whether to configure a script that pip installs databricks-mosaic, fixed to the current version; default is False. diff --git a/python/setup.cfg b/python/setup.cfg index 06df8facc..01ca109a2 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -12,7 +12,7 @@ classifiers = Topic :: Scientific/Engineering :: GIS Programming Language :: Python Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.10 [options] packages = find: diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py index 4a57b6380..a10a4db66 100644 --- a/python/test/utils/setup_fuse.py +++ b/python/test/utils/setup_fuse.py @@ -45,5 +45,5 @@ def run_init_script(self) -> int: ) return result.returncode - def list_files(self) -> list[str]: + def list_files(self) ->list[str]: return os.listdir(self._temp_dir) diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py index d700d3b60..097ee77e2 100644 --- a/python/test/utils/setup_gdal.py +++ b/python/test/utils/setup_gdal.py @@ -17,7 +17,7 @@ def __del__(self): def do_op(self) -> bool: return api.setup_gdal( - self._temp_dir, + to_fuse_dir = self._temp_dir, override_mosaic_version="main", script_out_name=self.GDAL_INIT_SCRIPT_FILENAME ) diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 9c8647722..c3258ce4f 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -11,7 +11,7 @@ # - setup_gdal(...) # [4] this script has conditional logic based on variables # Author: Michael Johns | mjohns@databricks.com -# Last Modified: 27 DEC, 2023 +# Last Modified: 02 JAN, 2024 # TEMPLATE-BASED REPLACEMENT # - can also be manually specified @@ -51,11 +51,13 @@ then GDAL_VERSION=3.4.3 # <- update gdal version fi sudo apt-get update -y - - # - install numpy first + + # - install specific numpy version + # - install scipy version (dep conflict) pip install --upgrade pip + pip uninstall scipy pip install --no-cache-dir --force-reinstall numpy==$NUMPY_VERSION - pip install "scipy$SCIPY_VERSION" + pip install --no-cache-dir --force-reinstall "scipy$SCIPY_VERSION" # - install natives sudo apt-get install -y gdal-bin libgdal-dev python3-gdal @@ -72,6 +74,7 @@ then sudo cp -n $FUSE_DIR/libgdalalljni.so.30.0.3 /usr/lib else # copy from github + # TODO: in v0.4.1, include $GITHUB_VERSION GITHUB_REPO_PATH=databrickslabs/mosaic/main/resources/gdal/jammy sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so From f332f143717bd71f1476a3b23e1f78acaf017076 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 2 Jan 2024 13:54:46 -0500 Subject: [PATCH 046/118] Mosaic version in POM to 0.4.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ea9d6d4a1..4ffed0118 100644 --- a/pom.xml +++ b/pom.xml @@ -278,7 +278,7 @@ 2.12.10 2.12 3.4.0 - 0.3.14 + 0.4.0 From b08f806101d72306bdd1ceb59c60ab46d2b25e12 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 2 Jan 2024 17:22:20 -0500 Subject: [PATCH 047/118] adjusting enable_mosaic for UC + Volumes. --- python/mosaic/api/enable.py | 14 ++++++++++++-- python/mosaic/api/fuse.py | 3 --- python/mosaic/core/library_handler.py | 6 +++--- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/python/mosaic/api/enable.py b/python/mosaic/api/enable.py index a6c3c0285..53323b3e3 100644 --- a/python/mosaic/api/enable.py +++ b/python/mosaic/api/enable.py @@ -10,7 +10,7 @@ from mosaic.utils.notebook_utils import NotebookUtils -def enable_mosaic(spark: SparkSession, dbutils=None) -> None: +def enable_mosaic(spark: SparkSession, dbutils=None, jar_autoattach=True) -> None: """ Enable Mosaic functions. @@ -23,7 +23,13 @@ def enable_mosaic(spark: SparkSession, dbutils=None) -> None: The active SparkSession. dbutils : dbruntime.dbutils.DBUtils The dbutils object used for `display` and `displayHTML` functions. - Optional, only applicable to Databricks users. + Optional, only applicable to Databricks workspace users. + jar_autoattach : bool + Convenience when you need to turn off JAR auto-attach for Unity + Catalog Volumes with Shared Access clusters. + - False will not registers the JAR + - True will register the JAR; Default is True + Returns ------- @@ -43,6 +49,10 @@ def enable_mosaic(spark: SparkSession, dbutils=None) -> None: Explicitly specify the index system to use for optimized spatial joins. (Optional) """ + # Set spark session + # - also set conf for jar autoattach + if not jar_autoattach: + spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") config.mosaic_spark = spark _ = MosaicLibraryHandler(config.mosaic_spark) config.mosaic_context = MosaicContext(config.mosaic_spark) diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index 77c5ba033..cc9513b00 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -23,9 +23,6 @@ def get_install_mosaic_version() -> str: @dataclass class SetupMgr: - """ - Defaults mirror setup_gdal. - """ to_fuse_dir: str script_in_name: str = 'mosaic-gdal-init.sh' script_out_name: str = 'mosaic-fuse-init.sh' diff --git a/python/mosaic/core/library_handler.py b/python/mosaic/core/library_handler.py index 90b7eb5de..9b8fb18a2 100644 --- a/python/mosaic/core/library_handler.py +++ b/python/mosaic/core/library_handler.py @@ -15,7 +15,7 @@ class MosaicLibraryHandler: def __init__(self, spark): self.spark = spark self.sc = spark.sparkContext - self.sc.setLogLevel("info") + self.spark.setLogLevel("info") log4jLogger = self.sc._jvm.org.apache.log4j LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__) @@ -30,11 +30,11 @@ def __init__(self, spark): @property def auto_attach_enabled(self) -> bool: - if not self._auto_attached_enabled: + if self._auto_attached_enabled is None: try: result = ( self.spark.conf.get("spark.databricks.labs.mosaic.jar.autoattach") - == "true" + == 'true' ) except Py4JJavaError as e: result = True From e355fc38ce764418435cefd0a8eabba2107408b4 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 2 Jan 2024 18:13:51 -0500 Subject: [PATCH 048/118] try set spark log, optional jar path. --- python/mosaic/api/enable.py | 26 ++++++++++++++++++-------- python/mosaic/core/library_handler.py | 16 ++++++++++------ 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/python/mosaic/api/enable.py b/python/mosaic/api/enable.py index 53323b3e3..c380bae8f 100644 --- a/python/mosaic/api/enable.py +++ b/python/mosaic/api/enable.py @@ -10,7 +10,7 @@ from mosaic.utils.notebook_utils import NotebookUtils -def enable_mosaic(spark: SparkSession, dbutils=None, jar_autoattach=True) -> None: +def enable_mosaic(spark: SparkSession, dbutils=None, jar_path:str=None, jar_autoattach:bool=True) -> None: """ Enable Mosaic functions. @@ -22,15 +22,20 @@ def enable_mosaic(spark: SparkSession, dbutils=None, jar_autoattach=True) -> Non spark : pyspark.sql.SparkSession The active SparkSession. dbutils : dbruntime.dbutils.DBUtils - The dbutils object used for `display` and `displayHTML` functions. - Optional, only applicable to Databricks workspace users. + Optional, specify dbutils object used for `display` and `displayHTML` functions. + jar_path : str + Convenience when you need to change the JAR path for Unity Catalog + Volumes with Shared Access clusters + - Default is None; if provided, sets + "spark.databricks.labs.mosaic.jar.path" jar_autoattach : bool Convenience when you need to turn off JAR auto-attach for Unity - Catalog Volumes with Shared Access clusters. - - False will not registers the JAR + Catalog Volumes with Shared Access clusters. + - False will not registers the JAR; sets + "spark.databricks.labs.mosaic.jar.autoattach" to "false" - True will register the JAR; Default is True - + Returns ------- @@ -40,7 +45,7 @@ def enable_mosaic(spark: SparkSession, dbutils=None, jar_autoattach=True) -> Non - `spark.databricks.labs.mosaic.jar.autoattach`: 'true' (default) or 'false' Automatically attach the Mosaic JAR to the Databricks cluster? (Optional) - - `spark.databricks.labs.mosaic.jar.location` + - `spark.databricks.labs.mosaic.jar.path` Explicitly specify the path to the Mosaic JAR. (Optional and not required at all in a standard Databricks environment). - `spark.databricks.labs.mosaic.geometry.api`: 'JTS' @@ -50,9 +55,14 @@ def enable_mosaic(spark: SparkSession, dbutils=None, jar_autoattach=True) -> Non """ # Set spark session - # - also set conf for jar autoattach + # - also set confs for jar autoattach + # and jar path if not jar_autoattach: spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") + print("...set 'spark.databricks.labs.mosaic.jar.autoattach' to false") + if jar_path is not None: + spark.conf.set("spark.databricks.labs.mosaic.jar.path", jar_path) + print(f"...set 'spark.databricks.labs.mosaic.jar.path' to '{jar_path}'") config.mosaic_spark = spark _ = MosaicLibraryHandler(config.mosaic_spark) config.mosaic_context = MosaicContext(config.mosaic_spark) diff --git a/python/mosaic/core/library_handler.py b/python/mosaic/core/library_handler.py index 9b8fb18a2..a19907964 100644 --- a/python/mosaic/core/library_handler.py +++ b/python/mosaic/core/library_handler.py @@ -15,15 +15,20 @@ class MosaicLibraryHandler: def __init__(self, spark): self.spark = spark self.sc = spark.sparkContext - self.spark.setLogLevel("info") + try: + spark.sparkContext.setLogLevel("info") + except Exception as e: + print("...environment disallows adjusting Log Level (not setting to 'info').") + pass log4jLogger = self.sc._jvm.org.apache.log4j LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__) if self.auto_attach_enabled: - LOGGER.info(f"Looking for Mosaic JAR at {self.mosaic_library_location}.") - if not os.path.exists(self.mosaic_library_location): + jar_path = self.mosaic_library_location + LOGGER.info(f"Looking for Mosaic JAR at {jar_path}.") + if not os.path.exists(jar_path): raise FileNotFoundError( - f"Mosaic JAR package {self._jar_filename} could not be located at {self.mosaic_library_location}." + f"Mosaic JAR package {self._jar_filename} could not be located at {jar_path}." ) LOGGER.info(f"Automatically attaching Mosaic JAR to cluster.") self.auto_attach() @@ -43,7 +48,7 @@ def auto_attach_enabled(self) -> bool: @property def mosaic_library_location(self): - if not self._jar_path: + if self._jar_path is None: try: self._jar_path = self.spark.conf.get( "spark.databricks.labs.mosaic.jar.path" @@ -81,7 +86,6 @@ def auto_attach(self): converters = self.sc._jvm.scala.collection.JavaConverters JarURI = JavaURI.create("file:" + self._jar_path) - dbr_version = self.spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion").split("-")[0] try: # This will fix the exception when running on Databricks Runtime 13.x+ From 6efdd15115457735fb2fcf7d28235ebf3c1f4a50 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 2 Jan 2024 18:58:53 -0500 Subject: [PATCH 049/118] remove setLogLevel due to Py4JSecurity restrictions --- python/mosaic/core/library_handler.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/mosaic/core/library_handler.py b/python/mosaic/core/library_handler.py index a19907964..61218387d 100644 --- a/python/mosaic/core/library_handler.py +++ b/python/mosaic/core/library_handler.py @@ -15,11 +15,6 @@ class MosaicLibraryHandler: def __init__(self, spark): self.spark = spark self.sc = spark.sparkContext - try: - spark.sparkContext.setLogLevel("info") - except Exception as e: - print("...environment disallows adjusting Log Level (not setting to 'info').") - pass log4jLogger = self.sc._jvm.org.apache.log4j LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__) From e622be2525ba7d65a0f223a548e958a9ed27ced0 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 2 Jan 2024 21:36:31 -0500 Subject: [PATCH 050/118] Conditionally LOG during enable_mosaic. --- python/mosaic/api/enable.py | 21 ++++++++++++++++----- python/mosaic/core/library_handler.py | 14 ++++++++------ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/python/mosaic/api/enable.py b/python/mosaic/api/enable.py index c380bae8f..9cbc52136 100644 --- a/python/mosaic/api/enable.py +++ b/python/mosaic/api/enable.py @@ -10,7 +10,10 @@ from mosaic.utils.notebook_utils import NotebookUtils -def enable_mosaic(spark: SparkSession, dbutils=None, jar_path:str=None, jar_autoattach:bool=True) -> None: +def enable_mosaic( + spark: SparkSession, dbutils = None, log_info: bool = False, + jar_path: str = None, jar_autoattach: bool = True +) -> None: """ Enable Mosaic functions. @@ -23,6 +26,11 @@ def enable_mosaic(spark: SparkSession, dbutils=None, jar_path:str=None, jar_auto The active SparkSession. dbutils : dbruntime.dbutils.DBUtils Optional, specify dbutils object used for `display` and `displayHTML` functions. + log_info : bool + Logging cannot be adjusted with Unity Catalog Shared Access clusters; + if you try to do so, will throw a Py4JSecurityException. + - True will try to setLogLevel to 'info' + - False will not; Default is False jar_path : str Convenience when you need to change the JAR path for Unity Catalog Volumes with Shared Access clusters @@ -54,17 +62,20 @@ def enable_mosaic(spark: SparkSession, dbutils=None, jar_path:str=None, jar_auto Explicitly specify the index system to use for optimized spatial joins. (Optional) """ - # Set spark session - # - also set confs for jar autoattach - # and jar path + # Set spark session, conditionally: + # - set conf for jar autoattach + # - set conf for jar path + # - set log level to 'info' if not jar_autoattach: spark.conf.set("spark.databricks.labs.mosaic.jar.autoattach", "false") print("...set 'spark.databricks.labs.mosaic.jar.autoattach' to false") if jar_path is not None: spark.conf.set("spark.databricks.labs.mosaic.jar.path", jar_path) print(f"...set 'spark.databricks.labs.mosaic.jar.path' to '{jar_path}'") + if log_info: + spark.sparkContext.setLogLevel('info') config.mosaic_spark = spark - _ = MosaicLibraryHandler(config.mosaic_spark) + _ = MosaicLibraryHandler(config.mosaic_spark, log_info = log_info) config.mosaic_context = MosaicContext(config.mosaic_spark) # Register SQL functions diff --git a/python/mosaic/core/library_handler.py b/python/mosaic/core/library_handler.py index 61218387d..6568fd042 100644 --- a/python/mosaic/core/library_handler.py +++ b/python/mosaic/core/library_handler.py @@ -11,21 +11,23 @@ class MosaicLibraryHandler: _jar_path = None _jar_filename = None _auto_attached_enabled = None - - def __init__(self, spark): + + def __init__(self, spark, log_info: bool = True): self.spark = spark self.sc = spark.sparkContext - log4jLogger = self.sc._jvm.org.apache.log4j - LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__) + LOGGER = None + if log_info: + log4jLogger = self.sc._jvm.org.apache.log4j + LOGGER = log4jLogger.LogManager.getLogger(__class__.__name__) if self.auto_attach_enabled: jar_path = self.mosaic_library_location - LOGGER.info(f"Looking for Mosaic JAR at {jar_path}.") + LOGGER and LOGGER.info(f"Looking for Mosaic JAR at {jar_path}.") if not os.path.exists(jar_path): raise FileNotFoundError( f"Mosaic JAR package {self._jar_filename} could not be located at {jar_path}." ) - LOGGER.info(f"Automatically attaching Mosaic JAR to cluster.") + LOGGER and LOGGER.info(f"Automatically attaching Mosaic JAR to cluster.") self.auto_attach() @property From b05f3423f802cb27ac7f7bf8d563c3040a5e1221 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 14:36:26 -0500 Subject: [PATCH 051/118] PathUtils handle dbfs:/Volumes paths. --- python/mosaic/config/config.py | 1 - .../labs/mosaic/utils/PathUtils.scala | 26 +++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/python/mosaic/config/config.py b/python/mosaic/config/config.py index bc5f80c9f..a59979be6 100644 --- a/python/mosaic/config/config.py +++ b/python/mosaic/config/config.py @@ -10,4 +10,3 @@ display_handler: DisplayHandler ipython_hook: InteractiveShell notebook_utils = None -default_gdal_init_script_path: str = "/dbfs/FileStore/geospatial/mosaic/gdal/" diff --git a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala index f3fb9d7b9..965e3fdde 100644 --- a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala +++ b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala @@ -9,7 +9,18 @@ import java.nio.file.{Files, Paths} object PathUtils { def getCleanPath(path: String): String = { - val cleanPath = path.replace("file:/", "/").replace("dbfs:/", "/dbfs/") + //val cleanPath = path.replace("file:/", "/").replace("dbfs:/", "/dbfs/") + val cleanPath = { + if (path.startsWith("file:/")) { + path.replace("file:/", "/") + } else if (path.startsWith("dbfs:/Volumes")) { + path.replace("dbfs:/Volumes", "/Volumes") + } else if (path.startsWith("dbfs:/")) { + path.replace("dbfs:/", "/dbfs/") + } else { + path + } + } if (cleanPath.endsWith(".zip") || cleanPath.contains(".zip:")) { getZipPath(cleanPath) } else { @@ -59,7 +70,18 @@ object PathUtils { def copyToTmp(inPath: String): String = { val cleanPath = getCleanPath(inPath) - val copyFromPath = inPath.replace("file:/", "/").replace("dbfs:/", "/dbfs/") + //val copyFromPath = inPath.replace("file:/", "/").replace("dbfs:/", "/dbfs/") + val copyFromPath = { + if (inPath.startsWith("file:/")) { + inPath.replace("file:/", "/") + } else if (inPath.startsWith("dbfs:/Volumes")) { + inPath.replace("dbfs:/Volumes", "/Volumes") + } else if (inPath.startsWith("dbfs:/")) { + inPath.replace("dbfs:/", "/dbfs/") + } else { + inPath + } + } val driver = MosaicRasterGDAL.identifyDriver(cleanPath) val extension = if (inPath.endsWith(".zip")) "zip" else GDAL.getExtension(driver) val tmpPath = createTmpFilePath(extension) From 943aceac57be9be37dd91e3d89944f65fcd540d9 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 15:45:10 -0500 Subject: [PATCH 052/118] tweaks to init script. --- scripts/mosaic-gdal-init.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index c3258ce4f..8674edf27 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -11,7 +11,7 @@ # - setup_gdal(...) # [4] this script has conditional logic based on variables # Author: Michael Johns | mjohns@databricks.com -# Last Modified: 02 JAN, 2024 +# Last Modified: 03 JAN, 2024 # TEMPLATE-BASED REPLACEMENT # - can also be manually specified @@ -27,7 +27,6 @@ WITH_FUSE_SO=0 # <- use fuse dir shared objects (vs wget) # SPECIFIED VERSIONS # - may be changed by conditional logic -GDAL_VERSION=3.4.1 # <- matches Jammy (default) NUMPY_VERSION=1.26.2 # <- for GDAL SCIPY_VERSION='<1.12,>=1.11' # <- adjusted for numpy @@ -48,23 +47,18 @@ then if [ $WITH_UBUNTUGIS == 1 ] then sudo add-apt-repository ppa:ubuntugis/ppa - GDAL_VERSION=3.4.3 # <- update gdal version fi sudo apt-get update -y # - install specific numpy version # - install scipy version (dep conflict) pip install --upgrade pip - pip uninstall scipy pip install --no-cache-dir --force-reinstall numpy==$NUMPY_VERSION pip install --no-cache-dir --force-reinstall "scipy$SCIPY_VERSION" - # - install natives + # - install natives + python gdal sudo apt-get install -y gdal-bin libgdal-dev python3-gdal - # - install gdal with numpy - pip install --no-cache-dir --force-reinstall GDAL[numpy]==$GDAL_VERSION - # - add pre-build JNI shared object to the path if [ $WITH_FUSE_SO == 1 ] then From a871c8b195a622873716452e77f676576cf96e47 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 18:29:45 -0500 Subject: [PATCH 053/118] initial rst_fromcontent (no tests yet). --- python/mosaic/api/raster.py | 17 +++ .../expressions/raster/RST_FromContent.scala | 132 ++++++++++++++++++ .../labs/mosaic/functions/MosaicContext.scala | 15 ++ 3 files changed, 164 insertions(+) create mode 100644 src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index ffc29cebe..bbe31526e 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -15,6 +15,7 @@ "rst_combineavg", "rst_derivedband", "rst_frombands", + "rst_fromcontent", "rst_fromfile", "rst_georeference", "rst_getnodata", @@ -916,6 +917,22 @@ def rst_tessellate(raster: ColumnOrName, resolution: ColumnOrName) -> Column: ) +def rst_fromcontent(raster: ColumnOrName, driver: ColumnOrName, parentPath: ColumnOrName, sizeInMB: ColumnOrName) -> Column: + """ + Tiles the raster binary into tiles of the given size. + :param raster: + :param driver: + :param parentPath: + :param sizeInMB: + :return: + """ + + return config.mosaic_context.invoke_function( + "rst_fromcontent", pyspark_to_java_column(raster), pyspark_to_java_column(driver), + pyspark_to_java_column(parentPath), pyspark_to_java_column(sizeInMB) + ) + + def rst_fromfile(raster: ColumnOrName, sizeInMB: ColumnOrName) -> Column: """ Tiles the raster into tiles of the given size. diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala new file mode 100644 index 000000000..ad9a47bdd --- /dev/null +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -0,0 +1,132 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.{IndexSystem, IndexSystemFactory} +import com.databricks.labs.mosaic.core.raster.api.GDAL +import com.databricks.labs.mosaic.core.raster.gdal.MosaicRasterGDAL +import com.databricks.labs.mosaic.core.raster.io.RasterCleaner +import com.databricks.labs.mosaic.core.types.RasterTileType +import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile +import com.databricks.labs.mosaic.datasource.gdal.ReTileOnRead +import com.databricks.labs.mosaic.expressions.base.{GenericExpressionFactory, WithExpressionInfo} +import com.databricks.labs.mosaic.functions.MosaicExpressionConfig +import com.databricks.labs.mosaic.utils.PathUtils +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder +import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback +import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expression, Literal, NullIntolerant} +import org.apache.spark.sql.types.{DataType, IntegerType, StructField, StructType} +import org.apache.spark.unsafe.types.UTF8String + +import java.nio.file.{Files, Paths, StandardCopyOption, StandardOpenOption} + +/** + * The raster for construction of a raster tile. This should be the first + * expression in the expression tree for a raster tile. + */ +case class RST_FromContent( + rasterExpr: Expression, + driverExpr: Expression, + parentPathExpr: Expression, + sizeInMB: Expression, + expressionConfig: MosaicExpressionConfig +) extends CollectionGenerator + with Serializable + with NullIntolerant + with CodegenFallback { + + override def dataType: DataType = RasterTileType(expressionConfig.getCellIdType) + + protected val geometryAPI: GeometryAPI = GeometryAPI.apply(expressionConfig.getGeometryAPI) + + protected val indexSystem: IndexSystem = IndexSystemFactory.getIndexSystem(expressionConfig.getIndexSystem) + + protected val cellIdDataType: DataType = indexSystem.getCellIdDataType + + override def position: Boolean = false + + override def inline: Boolean = false + + override def children: Seq[Expression] = Seq(rasterExpr, driverExpr, parentPathExpr, sizeInMB) + + override def elementSchema: StructType = StructType(Array(StructField("tile", dataType))) + + /** + * subdivides raster binary content into tiles of the specified size (in MB). + * @param input + * The input file path. + * @return + * The tiles. + */ + override def eval(input: InternalRow): TraversableOnce[InternalRow] = { + GDAL.enable(expressionConfig) + //parentPath may be null (it is not used here as content may be different) + val parentPath = { + try { + parentPathExpr.eval(input).asInstanceOf[UTF8String].toString + } catch { + case _: Any => null + } + } + val driver = driverExpr.eval(input).asInstanceOf[UTF8String].toString + val ext = GDAL.getExtension(driver) + val raster = rasterExpr.eval(input).asInstanceOf[Array[Byte]] + val targetSize = sizeInMB.eval(input).asInstanceOf[Int] + if (targetSize <= 0 && raster.size <= Integer.MAX_VALUE) { + var tile = MosaicRasterTile(null, raster, parentPath, driver) + val row = tile.formatCellId(indexSystem).serialize() + RasterCleaner.dispose(raster) + RasterCleaner.dispose(tile) + raster = null + tile = null + Seq(InternalRow.fromSeq(Seq(row))) + } else { + // If target size is <0 and we are here that means the file is too big to fit in memory + // - write the initial raster to file (unsplit) + val rasterPath = PathUtils.createTmpFilePath(ext) + Files.write(Paths.get(rasterPath), raster, StandardOpenOption.TRUNCATE_EXISTING) + + // We split to tiles of size 64MB + val size = if (targetSize <= 0) 64 else targetSize + var tiles = ReTileOnRead.localSubdivide(rasterPath, parentPath, size) + val rows = tiles.map(_.formatCellId(indexSystem).serialize()) + tiles.foreach(RasterCleaner.dispose(_)) + Files.deleteIfExists(Paths.get(rasterPath)) + tiles = null + rows.map(row => InternalRow.fromSeq(Seq(row))) + } + } + + override def makeCopy(newArgs: Array[AnyRef]): Expression = + GenericExpressionFactory.makeCopyImpl[RST_FromContent](this, newArgs, children.length, expressionConfig) + + override def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = makeCopy(newChildren.toArray) + +} + +/** Expression info required for the expression registration for spark SQL. */ +object RST_FromContent extends WithExpressionInfo { + + override def name: String = "rst_fromcontent" + + override def usage: String = + """ + |_FUNC_(expr1, expr2, expr3, expr4) - Returns raster tiles from binary content within threshold in MBs. + |""".stripMargin + + override def example: String = + """ + | Examples: + | > SELECT _FUNC_(raster, driver, parentPath, sizeInMB); + | {index_id, raster, parent_path, driver} + | ... + | """.stripMargin + + override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { + (children: Seq[Expression]) => { + val sizeExpr = if (children.length == 3) new Literal(-1, IntegerType) else children(3) + RST_FromContent(children(0), children(1), children(2), sizeExpr, expressionConfig) + } + } + +} diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 29447c347..2222a6c98 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -294,6 +294,7 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends mosaicRegistry.registerExpression[RST_Subdatasets](expressionConfig) mosaicRegistry.registerExpression[RST_Summary](expressionConfig) mosaicRegistry.registerExpression[RST_Tessellate](expressionConfig) + mosaicRegistry.registerExpression[RST_FromContent](expressionConfig) mosaicRegistry.registerExpression[RST_FromFile](expressionConfig) mosaicRegistry.registerExpression[RST_ToOverlappingTiles](expressionConfig) mosaicRegistry.registerExpression[RST_TryOpen](expressionConfig) @@ -703,6 +704,20 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column, parentPath:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column, parentPath:Column, sizeInMB: Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, sizeInMB.expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column, parentPath:Column, sizeInMB: Int): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(sizeInMB).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String, parentPath:Column, sizeInMB: Int): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, lit(sizeInMB).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String, parentPath:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String, parentPath:String, sizeInMB: Int): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(sizeInMB).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String, parentPath:String): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) From 41a1a56a31f74706cd463bd3dc4aaa6ac2365a96 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 19:10:30 -0500 Subject: [PATCH 054/118] adjusted raster type to MosaicRasterGDAL. --- .../labs/mosaic/expressions/raster/RST_FromContent.scala | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index ad9a47bdd..18dfd4165 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -70,13 +70,15 @@ case class RST_FromContent( } val driver = driverExpr.eval(input).asInstanceOf[UTF8String].toString val ext = GDAL.getExtension(driver) - val raster = rasterExpr.eval(input).asInstanceOf[Array[Byte]] + val rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] val targetSize = sizeInMB.eval(input).asInstanceOf[Int] if (targetSize <= 0 && raster.size <= Integer.MAX_VALUE) { + raster = MosaicRasterGDAL.readRaster(rasterArr, parentPath, driver) var tile = MosaicRasterTile(null, raster, parentPath, driver) val row = tile.formatCellId(indexSystem).serialize() RasterCleaner.dispose(raster) RasterCleaner.dispose(tile) + rasterArr = null raster = null tile = null Seq(InternalRow.fromSeq(Seq(row))) @@ -84,7 +86,7 @@ case class RST_FromContent( // If target size is <0 and we are here that means the file is too big to fit in memory // - write the initial raster to file (unsplit) val rasterPath = PathUtils.createTmpFilePath(ext) - Files.write(Paths.get(rasterPath), raster, StandardOpenOption.TRUNCATE_EXISTING) + Files.write(Paths.get(rasterPath), rasterArr, StandardOpenOption.TRUNCATE_EXISTING) // We split to tiles of size 64MB val size = if (targetSize <= 0) 64 else targetSize @@ -92,6 +94,7 @@ case class RST_FromContent( val rows = tiles.map(_.formatCellId(indexSystem).serialize()) tiles.foreach(RasterCleaner.dispose(_)) Files.deleteIfExists(Paths.get(rasterPath)) + rasterArr = null tiles = null rows.map(row => InternalRow.fromSeq(Seq(row))) } From 932c60ce17a191bf233d066a2bda4d2cdbc79bb2 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 19:17:55 -0500 Subject: [PATCH 055/118] val to var for nulling out. --- .../labs/mosaic/expressions/raster/RST_FromContent.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index 18dfd4165..7fc5fa32b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -70,10 +70,10 @@ case class RST_FromContent( } val driver = driverExpr.eval(input).asInstanceOf[UTF8String].toString val ext = GDAL.getExtension(driver) - val rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] + var rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] val targetSize = sizeInMB.eval(input).asInstanceOf[Int] if (targetSize <= 0 && raster.size <= Integer.MAX_VALUE) { - raster = MosaicRasterGDAL.readRaster(rasterArr, parentPath, driver) + var raster = MosaicRasterGDAL.readRaster(rasterArr, parentPath, driver) var tile = MosaicRasterTile(null, raster, parentPath, driver) val row = tile.formatCellId(indexSystem).serialize() RasterCleaner.dispose(raster) From 608f401d25efd141cf0e031694e6f0cefe005d1e Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 19:21:29 -0500 Subject: [PATCH 056/118] fix variable name after change. --- .../labs/mosaic/expressions/raster/RST_FromContent.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index 7fc5fa32b..54c9b71ce 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -72,7 +72,7 @@ case class RST_FromContent( val ext = GDAL.getExtension(driver) var rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] val targetSize = sizeInMB.eval(input).asInstanceOf[Int] - if (targetSize <= 0 && raster.size <= Integer.MAX_VALUE) { + if (targetSize <= 0 && rasterArr.size <= Integer.MAX_VALUE) { var raster = MosaicRasterGDAL.readRaster(rasterArr, parentPath, driver) var tile = MosaicRasterTile(null, raster, parentPath, driver) val row = tile.formatCellId(indexSystem).serialize() From 7ca7d72f90b18f6f141d4f991b055e44517ebf35 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 22:01:59 -0500 Subject: [PATCH 057/118] Simplifying registered signature for initial test. --- .github/actions/scala_build/action.yml | 5 +-- .../expressions/raster/RST_FromContent.scala | 9 +++-- .../labs/mosaic/functions/MosaicContext.scala | 39 +++++++++++++------ 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 5ced8b753..b569d73ec 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -28,11 +28,10 @@ runs: # - install numpy first pip install --upgrade pip pip install 'numpy>=${{ matrix.numpy }}' - # - install natives + # - install natives + gdal python sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev python3-gdal - # - install gdal with numpy - pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}' + # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30.0.3 diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index 54c9b71ce..db32d00ee 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -18,7 +18,7 @@ import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expressio import org.apache.spark.sql.types.{DataType, IntegerType, StructField, StructType} import org.apache.spark.unsafe.types.UTF8String -import java.nio.file.{Files, Paths, StandardCopyOption, StandardOpenOption} +import java.nio.file.{Files, Paths, StandardOpenOption} /** * The raster for construction of a raster tile. This should be the first @@ -72,7 +72,7 @@ case class RST_FromContent( val ext = GDAL.getExtension(driver) var rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] val targetSize = sizeInMB.eval(input).asInstanceOf[Int] - if (targetSize <= 0 && rasterArr.size <= Integer.MAX_VALUE) { + if (targetSize <= 0 && rasterArr.length <= Integer.MAX_VALUE) { var raster = MosaicRasterGDAL.readRaster(rasterArr, parentPath, driver) var tile = MosaicRasterTile(null, raster, parentPath, driver) val row = tile.formatCellId(indexSystem).serialize() @@ -127,8 +127,9 @@ object RST_FromContent extends WithExpressionInfo { override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { (children: Seq[Expression]) => { - val sizeExpr = if (children.length == 3) new Literal(-1, IntegerType) else children(3) - RST_FromContent(children(0), children(1), children(2), sizeExpr, expressionConfig) + val sizeExpr = if (children.length < 4) new Literal(-1, IntegerType) else children(3) + val pathExpr = if (children.length < 3) new Literal(null, StringType) else children(2) + RST_FromContent(children(0), children(1), pathExpr, sizeExpr, expressionConfig) } } diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 2222a6c98..2e853b85b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,20 +704,35 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column, parentPath:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(None).cast('string').expr, lit(-1).expr, expressionConfig)) + // -- uncomment after initial testing + // def rst_fromcontent(raster: Column, driver:String): Column = + // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(None).cast('string').expr, lit(-1).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:Column, parentPath:Column): Column = + // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(-1).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:String, parentPath:Column): Column = + // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, lit(-1).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:Column, parentPath:String): Column = + // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(parentPath).expr, lit(-1).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:String, parentPath:String): Column = + // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(-1).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, parentPath:Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, sizeInMB.expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column, parentPath:Column, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(sizeInMB).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String, parentPath:Column, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, lit(sizeInMB).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String, parentPath:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, lit(-1).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String, parentPath:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(sizeInMB).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String, parentPath:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(-1).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:Column, parentPath:String, sizeInMB: Column): Column = + // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(parentPath).expr, sizeInMB.expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:String, parentPath:Column, sizeInMB: Column): Column = + // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, sizeInMB.expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:String, parentPath:String, sizeInMB: Column): Column = + // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, sizeInMB.expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:Column, parentPath:Column, sizeInMB: Int): Column = + // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(sizeInMB).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:Column, parentPath:String, sizeInMB: Int): Column = + // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(parentPath).expr, lit(sizeInMB).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:String, parentPath:Column, sizeInMB: Int): Column = + // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, lit(sizeInMB).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:String, parentPath:String, sizeInMB: Int): Column = + // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) From 1cd0201a17ded5d9302ba6f4b3197adc46278229 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 22:31:57 -0500 Subject: [PATCH 058/118] commenting out more for tests. --- .../databricks/labs/mosaic/functions/MosaicContext.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 2e853b85b..1a346b92d 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,11 +704,11 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(None).cast('string').expr, lit(-1).expr, expressionConfig)) + // def rst_fromcontent(raster: Column, driver:Column): Column = + // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(null).cast('string').expr, lit(-1).expr, expressionConfig)) // -- uncomment after initial testing // def rst_fromcontent(raster: Column, driver:String): Column = - // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(None).cast('string').expr, lit(-1).expr, expressionConfig)) + // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(null).cast('string').expr, lit(-1).expr, expressionConfig)) // def rst_fromcontent(raster: Column, driver:Column, parentPath:Column): Column = // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(-1).expr, expressionConfig)) // def rst_fromcontent(raster: Column, driver:String, parentPath:Column): Column = From e6bf978743b1e7d7c07b42b333dbea89d6f7fdea Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 22:36:17 -0500 Subject: [PATCH 059/118] Missing import. --- .../labs/mosaic/expressions/raster/RST_FromContent.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index db32d00ee..0706e0faa 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -15,7 +15,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expression, Literal, NullIntolerant} -import org.apache.spark.sql.types.{DataType, IntegerType, StructField, StructType} +import org.apache.spark.sql.types.{DataType, IntegerType, StringType, StructField, StructType} import org.apache.spark.unsafe.types.UTF8String import java.nio.file.{Files, Paths, StandardOpenOption} From 330e1d4037636593c6539ec55b687460489fe150 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 23:09:36 -0500 Subject: [PATCH 060/118] adjusting bash build. --- .github/actions/scala_build/action.yml | 2 +- .../labs/mosaic/expressions/raster/RST_FromContent.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index b569d73ec..31f481480 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -27,7 +27,7 @@ runs: sudo apt-get update -y # - install numpy first pip install --upgrade pip - pip install 'numpy>=${{ matrix.numpy }}' + pip install --no-cache-dir --force-reinstall numpy==${{ matrix.numpy }} # - install natives + gdal python sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev python3-gdal diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index 0706e0faa..ffe670ee6 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -120,7 +120,7 @@ object RST_FromContent extends WithExpressionInfo { override def example: String = """ | Examples: - | > SELECT _FUNC_(raster, driver, parentPath, sizeInMB); + | > SELECT _FUNC_(raster, driver, parent_path, sizeInMB); | {index_id, raster, parent_path, driver} | ... | """.stripMargin From 2aa77c0a8ef3bb64531a090bcdb39e67ffb4bcf5 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 3 Jan 2024 23:32:10 -0500 Subject: [PATCH 061/118] adjust test logging. --- .../mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala | 2 +- .../labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala index 0f896ad4e..328d4c6ef 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala @@ -11,7 +11,7 @@ trait RST_MapAlgebraBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { - spark.sparkContext.setLogLevel("FATAL") + spark.sparkContext.setLogLevel("ERROR") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala index 0ef995280..5e7915b46 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVIBehaviors.scala @@ -11,7 +11,7 @@ trait RST_NDVIBehaviors extends QueryTest { // noinspection MapGetGet def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { - spark.sparkContext.setLogLevel("FATAL") + spark.sparkContext.setLogLevel("ERROR") val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark From 611cbac32fc7f984d3162ddfbf1be66783acbf36 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 00:10:28 -0500 Subject: [PATCH 062/118] Adjust /Volumes logic. --- .../labs/mosaic/utils/PathUtils.scala | 37 +++++-------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala index 965e3fdde..1a902260d 100644 --- a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala +++ b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala @@ -9,18 +9,10 @@ import java.nio.file.{Files, Paths} object PathUtils { def getCleanPath(path: String): String = { - //val cleanPath = path.replace("file:/", "/").replace("dbfs:/", "/dbfs/") - val cleanPath = { - if (path.startsWith("file:/")) { - path.replace("file:/", "/") - } else if (path.startsWith("dbfs:/Volumes")) { - path.replace("dbfs:/Volumes", "/Volumes") - } else if (path.startsWith("dbfs:/")) { - path.replace("dbfs:/", "/dbfs/") - } else { - path - } - } + val cleanPath = path + .replace("file:/", "/") + .replace("dbfs:/Volumes", "/Volumes") + .replace("dbfs:/","/dbfs/") if (cleanPath.endsWith(".zip") || cleanPath.contains(".zip:")) { getZipPath(cleanPath) } else { @@ -68,21 +60,12 @@ object PathUtils { result } - def copyToTmp(inPath: String): String = { - val cleanPath = getCleanPath(inPath) - //val copyFromPath = inPath.replace("file:/", "/").replace("dbfs:/", "/dbfs/") - val copyFromPath = { - if (inPath.startsWith("file:/")) { - inPath.replace("file:/", "/") - } else if (inPath.startsWith("dbfs:/Volumes")) { - inPath.replace("dbfs:/Volumes", "/Volumes") - } else if (inPath.startsWith("dbfs:/")) { - inPath.replace("dbfs:/", "/dbfs/") - } else { - inPath - } - } - val driver = MosaicRasterGDAL.identifyDriver(cleanPath) + def copyToTmp(inPath: String): String = { + val copyFromPath = inPath + .replace("file:/", "/") + .replace("dbfs:/Volumes", "/Volumes") + .replace("dbfs:/","/dbfs/") + val driver = MosaicRasterGDAL.identifyDriver(getCleanPath(inPath)) val extension = if (inPath.endsWith(".zip")) "zip" else GDAL.getExtension(driver) val tmpPath = createTmpFilePath(extension) Files.copy(Paths.get(copyFromPath), Paths.get(tmpPath)) From 01f83de2a8d1ac7bd6a07de7b8addbd14c8cdaa6 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 00:19:25 -0500 Subject: [PATCH 063/118] set numpy to gdal required 1.26.2. --- .github/workflows/build_main.yml | 2 +- .github/workflows/build_python.yml | 2 +- .github/workflows/build_r.yml | 2 +- .github/workflows/build_scala.yml | 2 +- .github/workflows/pypi-release.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 7380897b7..af82a0900 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.26.2 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_python.yml b/.github/workflows/build_python.yml index fd2024669..a82b62405 100644 --- a/.github/workflows/build_python.yml +++ b/.github/workflows/build_python.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.26.2 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_r.yml b/.github/workflows/build_r.yml index c0953eb66..59cba5d95 100644 --- a/.github/workflows/build_r.yml +++ b/.github/workflows/build_r.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.26.2 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_scala.yml b/.github/workflows/build_scala.yml index 9505b71e6..c40f4b7b1 100644 --- a/.github/workflows/build_scala.yml +++ b/.github/workflows/build_scala.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.26.2 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index c844c44a7..c5a11c4f0 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.21.5 ] + numpy: [ 1.26.2 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] From 19f898bc8a3f7ef1581331bfcc55e34bcdf07acd Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 00:40:04 -0500 Subject: [PATCH 064/118] revert pip install gdal (added). --- .github/actions/scala_build/action.yml | 6 ++++-- scripts/mosaic-gdal-init.sh | 9 +++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 31f481480..7130fd7f2 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -28,9 +28,11 @@ runs: # - install numpy first pip install --upgrade pip pip install --no-cache-dir --force-reinstall numpy==${{ matrix.numpy }} - # - install natives + gdal python + # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 - sudo apt-get install -y gdal-bin libgdal-dev python3-gdal + sudo apt-get install -y gdal-bin libgdal-dev + # - install gdal with numpy + pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}' # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 8674edf27..407950c74 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -29,6 +29,7 @@ WITH_FUSE_SO=0 # <- use fuse dir shared objects (vs wget) # - may be changed by conditional logic NUMPY_VERSION=1.26.2 # <- for GDAL SCIPY_VERSION='<1.12,>=1.11' # <- adjusted for numpy +GDAL_VERSION=3.4.1 # <- ubuntugis is 3.4.3 # - optional: install Mosaic if [ $WITH_MOSAIC == 1 ] @@ -47,6 +48,7 @@ then if [ $WITH_UBUNTUGIS == 1 ] then sudo add-apt-repository ppa:ubuntugis/ppa + GDAL_VERSION=3.4.3 fi sudo apt-get update -y @@ -56,8 +58,11 @@ then pip install --no-cache-dir --force-reinstall numpy==$NUMPY_VERSION pip install --no-cache-dir --force-reinstall "scipy$SCIPY_VERSION" - # - install natives + python gdal - sudo apt-get install -y gdal-bin libgdal-dev python3-gdal + # - install natives + sudo apt-get install -y gdal-bin libgdal-dev + + # - install gdal with numpy + pip install --no-cache-dir --force-reinstall GDAL[numpy]==$GDAL_VERSION # - add pre-build JNI shared object to the path if [ $WITH_FUSE_SO == 1 ] From 312daab6d2def984400fdade3c5403c902bd49a5 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 01:09:16 -0500 Subject: [PATCH 065/118] temp comment out r build skip_tests:true --- .github/workflows/build_main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index af82a0900..b6fdf73d2 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -28,7 +28,7 @@ jobs: uses: ./.github/actions/scala_build - name: build python uses: ./.github/actions/python_build - - name: build R - uses: ./.github/actions/r_build + # - name: build R + # uses: ./.github/actions/r_build - name: upload artefacts uses: ./.github/actions/upload_artefacts From 075a9e5a3ce0535c05c0879d5ae5ed1578cf4a59 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 14:25:46 -0500 Subject: [PATCH 066/118] improved numpy handling, byte write fix. --- .github/actions/python_build/action.yml | 5 ++--- .github/actions/scala_build/action.yml | 4 ++-- .github/workflows/build_main.yml | 2 +- .github/workflows/build_python.yml | 2 +- .github/workflows/build_r.yml | 2 +- .github/workflows/build_scala.yml | 2 +- scripts/mosaic-gdal-init.sh | 11 ++++++----- .../mosaic/expressions/raster/RST_FromContent.scala | 6 ++++-- .../labs/mosaic/functions/MosaicContext.scala | 2 +- 9 files changed, 19 insertions(+), 17 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 97d9b3af2..b7a36a3a2 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -11,9 +11,8 @@ runs: shell: bash run: | cd python - pip install build wheel pyspark==${{ matrix.spark }} numpy==${{ matrix.numpy }} - pip install numpy==${{ matrix.numpy }} - pip install --no-build-isolation --no-cache-dir --force-reinstall gdal==${{ matrix.gdal }} + pip install build wheel pyspark==${{ matrix.spark }} numpy~=${{ matrix.numpy }} + pip install GDAL[numpy]==${{ matrix.gdal }} pip install . - name: Test and build python package shell: bash diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 7130fd7f2..401dcffe7 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -27,12 +27,12 @@ runs: sudo apt-get update -y # - install numpy first pip install --upgrade pip - pip install --no-cache-dir --force-reinstall numpy==${{ matrix.numpy }} + pip install --force-reinstall numpy~=${{ matrix.numpy }} # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev # - install gdal with numpy - pip install --no-cache-dir --force-reinstall 'GDAL[numpy]==${{ matrix.gdal }}' + pip install GDAL[numpy]>=${{ matrix.gdal }} # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index b6fdf73d2..419c05bad 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.2 ] + numpy: [ 1.26.3 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_python.yml b/.github/workflows/build_python.yml index a82b62405..9da376e0b 100644 --- a/.github/workflows/build_python.yml +++ b/.github/workflows/build_python.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.2 ] + numpy: [ 1.26.3 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_r.yml b/.github/workflows/build_r.yml index 59cba5d95..ee25dacf5 100644 --- a/.github/workflows/build_r.yml +++ b/.github/workflows/build_r.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.2 ] + numpy: [ 1.26.3 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_scala.yml b/.github/workflows/build_scala.yml index c40f4b7b1..e8da8373e 100644 --- a/.github/workflows/build_scala.yml +++ b/.github/workflows/build_scala.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.2 ] + numpy: [ 1.26.3 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 407950c74..bfd975fb8 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -11,7 +11,7 @@ # - setup_gdal(...) # [4] this script has conditional logic based on variables # Author: Michael Johns | mjohns@databricks.com -# Last Modified: 03 JAN, 2024 +# Last Modified: 04 JAN, 2024 # TEMPLATE-BASED REPLACEMENT # - can also be manually specified @@ -27,7 +27,8 @@ WITH_FUSE_SO=0 # <- use fuse dir shared objects (vs wget) # SPECIFIED VERSIONS # - may be changed by conditional logic -NUMPY_VERSION=1.26.2 # <- for GDAL +# - https://docs.scipy.org/doc/scipy/dev/toolchain.html#toolchain-roadmap +NUMPY_VERSION='<1.27,>=1.26' # <- for GDAL SCIPY_VERSION='<1.12,>=1.11' # <- adjusted for numpy GDAL_VERSION=3.4.1 # <- ubuntugis is 3.4.3 @@ -55,14 +56,14 @@ then # - install specific numpy version # - install scipy version (dep conflict) pip install --upgrade pip - pip install --no-cache-dir --force-reinstall numpy==$NUMPY_VERSION - pip install --no-cache-dir --force-reinstall "scipy$SCIPY_VERSION" + pip install --force-reinstall "numpy$NUMPY_VERSION" + pip install --force-reinstall "scipy$SCIPY_VERSION" # - install natives sudo apt-get install -y gdal-bin libgdal-dev # - install gdal with numpy - pip install --no-cache-dir --force-reinstall GDAL[numpy]==$GDAL_VERSION + pip install GDAL[numpy]==$GDAL_VERSION # - add pre-build JNI shared object to the path if [ $WITH_FUSE_SO == 1 ] diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index ffe670ee6..fa5b1f59b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -18,7 +18,7 @@ import org.apache.spark.sql.catalyst.expressions.{CollectionGenerator, Expressio import org.apache.spark.sql.types.{DataType, IntegerType, StringType, StructField, StructType} import org.apache.spark.unsafe.types.UTF8String -import java.nio.file.{Files, Paths, StandardOpenOption} +import java.nio.file.{Files, Paths} /** * The raster for construction of a raster tile. This should be the first @@ -85,8 +85,10 @@ case class RST_FromContent( } else { // If target size is <0 and we are here that means the file is too big to fit in memory // - write the initial raster to file (unsplit) + // - repeating the createDirectories for context isolation val rasterPath = PathUtils.createTmpFilePath(ext) - Files.write(Paths.get(rasterPath), rasterArr, StandardOpenOption.TRUNCATE_EXISTING) + Files.createDirectories(Paths.get(rasterPath).getParent) + Files.write(Paths.get(rasterPath), rasterArr) // We split to tiles of size 64MB val size = if (targetSize <= 0) 64 else targetSize diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 1a346b92d..157ba8c69 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -997,7 +997,7 @@ object MosaicContext extends Logging { val tmpDir: String = Files.createTempDirectory("mosaic").toAbsolutePath.toString - val mosaicVersion: String = "0.3.14" + val mosaicVersion: String = "0.4.0" private var instance: Option[MosaicContext] = None From b5cbf6ed1ce43d686e0f61329a3bffeb10c0fb1d Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 14:43:23 -0500 Subject: [PATCH 067/118] numpy install mods. --- .github/actions/python_build/action.yml | 3 ++- .github/actions/scala_build/action.yml | 2 +- scripts/mosaic-gdal-init.sh | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index b7a36a3a2..42fca1bbe 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -11,7 +11,8 @@ runs: shell: bash run: | cd python - pip install build wheel pyspark==${{ matrix.spark }} numpy~=${{ matrix.numpy }} + pip install build wheel pyspark==${{ matrix.spark }} + pip install --no-cache-dir --force-reinstall numpy~=${{ matrix.numpy }} pip install GDAL[numpy]==${{ matrix.gdal }} pip install . - name: Test and build python package diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 401dcffe7..3c800aa5d 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -27,7 +27,7 @@ runs: sudo apt-get update -y # - install numpy first pip install --upgrade pip - pip install --force-reinstall numpy~=${{ matrix.numpy }} + pip install --no-cache-dir --force-reinstall numpy~=${{ matrix.numpy }} # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index bfd975fb8..b97524545 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -56,8 +56,8 @@ then # - install specific numpy version # - install scipy version (dep conflict) pip install --upgrade pip - pip install --force-reinstall "numpy$NUMPY_VERSION" - pip install --force-reinstall "scipy$SCIPY_VERSION" + pip install --no-cache-dir --force-reinstall "numpy$NUMPY_VERSION" + pip install --no-cache-dir --force-reinstall "scipy$SCIPY_VERSION" # - install natives sudo apt-get install -y gdal-bin libgdal-dev From 1ff7fe87da6b58f9f0cb6c7930d1e22e5672599f Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 14:51:40 -0500 Subject: [PATCH 068/118] apt install python3-gdal (reverted). --- .github/actions/python_build/action.yml | 2 +- .github/actions/scala_build/action.yml | 4 ++-- scripts/mosaic-gdal-init.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 42fca1bbe..8ad37cc7a 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -12,7 +12,7 @@ runs: run: | cd python pip install build wheel pyspark==${{ matrix.spark }} - pip install --no-cache-dir --force-reinstall numpy~=${{ matrix.numpy }} + pip install numpy~=${{ matrix.numpy }} pip install GDAL[numpy]==${{ matrix.gdal }} pip install . - name: Test and build python package diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 3c800aa5d..5478d4737 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -27,10 +27,10 @@ runs: sudo apt-get update -y # - install numpy first pip install --upgrade pip - pip install --no-cache-dir --force-reinstall numpy~=${{ matrix.numpy }} + pip install numpy~=${{ matrix.numpy }} # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 - sudo apt-get install -y gdal-bin libgdal-dev + sudo apt-get install -y gdal-bin libgdal-dev python3-gdal # - install gdal with numpy pip install GDAL[numpy]>=${{ matrix.gdal }} # - add the so files diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index b97524545..d1cecd07f 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -60,7 +60,7 @@ then pip install --no-cache-dir --force-reinstall "scipy$SCIPY_VERSION" # - install natives - sudo apt-get install -y gdal-bin libgdal-dev + sudo apt-get install -y gdal-bin libgdal-dev python3-gdal # - install gdal with numpy pip install GDAL[numpy]==$GDAL_VERSION From 5c384d8874e04b93c92790f8cec37e60c73a92cd Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 15:00:06 -0500 Subject: [PATCH 069/118] removed flexible gdal version. --- .github/actions/scala_build/action.yml | 4 ++-- scripts/mosaic-gdal-init.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 5478d4737..e94c5c7db 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -30,9 +30,9 @@ runs: pip install numpy~=${{ matrix.numpy }} # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 - sudo apt-get install -y gdal-bin libgdal-dev python3-gdal + sudo apt-get install -y gdal-bin libgdal-dev # - install gdal with numpy - pip install GDAL[numpy]>=${{ matrix.gdal }} + pip install GDAL[numpy]==${{ matrix.gdal }} # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index d1cecd07f..b97524545 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -60,7 +60,7 @@ then pip install --no-cache-dir --force-reinstall "scipy$SCIPY_VERSION" # - install natives - sudo apt-get install -y gdal-bin libgdal-dev python3-gdal + sudo apt-get install -y gdal-bin libgdal-dev # - install gdal with numpy pip install GDAL[numpy]==$GDAL_VERSION From 481aa2464633d151c239648a1973b95858445f3a Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 15:25:02 -0500 Subject: [PATCH 070/118] force reinstall for gdal in python build. --- .github/actions/python_build/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 8ad37cc7a..72306a29b 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -13,7 +13,7 @@ runs: cd python pip install build wheel pyspark==${{ matrix.spark }} pip install numpy~=${{ matrix.numpy }} - pip install GDAL[numpy]==${{ matrix.gdal }} + pip install --no-cache-dir --force-reinstall GDAL[numpy]==${{ matrix.gdal }} pip install . - name: Test and build python package shell: bash From c4ce19b60da24fa02b9321d4a17781fecbaebd06 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 19:31:08 -0500 Subject: [PATCH 071/118] Shorter MapAlgebra + RST_FromContent tests. --- .../expressions/raster/RST_MapAlgebra.scala | 6 +- .../labs/mosaic/functions/MosaicContext.scala | 33 ++------- .../raster/RST_FromContentBehaviors.scala | 70 +++++++++++++++++++ .../raster/RST_FromContentTest.scala | 31 ++++++++ .../raster/RST_MapAlgebraBehaviors.scala | 1 + 5 files changed, 111 insertions(+), 30 deletions(-) create mode 100644 src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala create mode 100644 src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentTest.scala diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala index 9cb744128..53e84d96b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala @@ -13,7 +13,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant} import org.apache.spark.unsafe.types.UTF8String -/** The expression for computing NDVI index. */ +/** The expression for map algebra. */ case class RST_MapAlgebra( rastersExpr: Expression, jsonSpecExpr: Expression, @@ -29,13 +29,13 @@ case class RST_MapAlgebra( with CodegenFallback { /** - * Computes NDVI index. + * Map Algebra. * @param tiles * The raster to be used. * @param arg1 * The red band index. * @return - * The raster contains NDVI index. + * The raster (tile) from the calculation. */ override def rasterTransform(tiles: Seq[MosaicRasterTile], arg1: Any): Any = { val jsonSpec = arg1.asInstanceOf[UTF8String].toString diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 157ba8c69..55145e843 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,35 +704,14 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:Column): Column = - // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(null).cast('string').expr, lit(-1).expr, expressionConfig)) - // -- uncomment after initial testing - // def rst_fromcontent(raster: Column, driver:String): Column = - // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(null).cast('string').expr, lit(-1).expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:Column, parentPath:Column): Column = - // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(-1).expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:String, parentPath:Column): Column = - // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, lit(-1).expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:Column, parentPath:String): Column = - // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(parentPath).expr, lit(-1).expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:String, parentPath:String): Column = - // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(null).cast("string").expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column, parentPath:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(-1).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, parentPath:Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, sizeInMB.expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:Column, parentPath:String, sizeInMB: Column): Column = - // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(parentPath).expr, sizeInMB.expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:String, parentPath:Column, sizeInMB: Column): Column = - // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, sizeInMB.expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:String, parentPath:String, sizeInMB: Column): Column = - // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, sizeInMB.expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:Column, parentPath:Column, sizeInMB: Int): Column = - // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(sizeInMB).expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:Column, parentPath:String, sizeInMB: Int): Column = - // ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(parentPath).expr, lit(sizeInMB).expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:String, parentPath:Column, sizeInMB: Int): Column = - // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, parentPath.expr, lit(sizeInMB).expr, expressionConfig)) - // def rst_fromcontent(raster: Column, driver:String, parentPath:String, sizeInMB: Int): Column = - // ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(sizeInMB).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String, parentPath:Column, sizeInMB: Int): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala new file mode 100644 index 000000000..25185f23a --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -0,0 +1,70 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI +import com.databricks.labs.mosaic.core.index.IndexSystem +import com.databricks.labs.mosaic.functions.MosaicContext +import org.apache.spark.sql.QueryTest +import org.scalatest.matchers.should.Matchers._ + +trait RST_FromContentBehaviors extends QueryTest { + + // noinspection MapGetGet + def behaviors(indexSystem: IndexSystem, geometryAPI: GeometryAPI): Unit = { + spark.sparkContext.setLogLevel("ERROR") + val mc = MosaicContext.build(indexSystem, geometryAPI) + mc.register() + val sc = spark + import mc.functions._ + import sc.implicits._ + + val rastersInMemory = spark.read + .format("binaryFile") + .load("src/test/resources/modis") + + val gridTiles = rastersInMemory + .withColumn("tile", rst_fromcontent($"content"), lit("GTiff")) + .withColumn("bbox", rst_boundingbox($"tile")) + .withColumn("cent", st_centroid($"bbox")) + .withColumn("clip_region", st_buffer($"cent", 0.1)) + .withColumn("clip", rst_clip($"tile", $"clip_region")) + .withColumn("bbox2", rst_boundingbox($"clip")) + .withColumn("result", st_area($"bbox") =!= st_area($"bbox2")) + .select("result") + .as[Boolean] + .collect() + + gridTiles.forall(identity) should be(true) + + rastersInMemory.createOrReplaceTempView("source") + + val gridTilesSQL = spark + .sql(""" + |with subquery as ( + | select rst_fromcontent(content) as tile from source + |) + |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result + |from subquery + |""".stripMargin) + .as[Boolean] + .collect() + + gridTilesSQL.forall(identity) should be(true) + + + val gridTilesSQL2 = spark + .sql( + """ + |with subquery as ( + | select rst_fromcontent(content, 4) as tile from source + |) + |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result + |from subquery + |""".stripMargin) + .as[Boolean] + .collect() + + gridTilesSQL2.forall(identity) should be(true) + + } + +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentTest.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentTest.scala new file mode 100644 index 000000000..01b350973 --- /dev/null +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentTest.scala @@ -0,0 +1,31 @@ +package com.databricks.labs.mosaic.expressions.raster + +import com.databricks.labs.mosaic.core.geometry.api.JTS +import com.databricks.labs.mosaic.core.index.H3IndexSystem +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSessionGDAL + +import scala.util.Try + +class RST_FromContentTest extends QueryTest with SharedSparkSessionGDAL with RST_FromContentBehaviors { + + private val noCodegen = + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString + ) _ + + // Hotfix for SharedSparkSession afterAll cleanup. + override def afterAll(): Unit = Try(super.afterAll()) + + // These tests are not index system nor geometry API specific. + // Only testing one pairing is sufficient. + test("Testing RST_FromContent with manual GDAL registration (H3, JTS).") { + noCodegen { + assume(System.getProperty("os.name") == "Linux") + behaviors(H3IndexSystem, JTS) + } + } +} diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala index 328d4c6ef..e16e41d6a 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebraBehaviors.scala @@ -21,6 +21,7 @@ trait RST_MapAlgebraBehaviors extends QueryTest { val rastersInMemory = spark.read .format("gdal") .option("raster_storage", "in-memory") + .option("pathGlobFilter", "*_B01.TIF") .load("src/test/resources/modis") val gridTiles = rastersInMemory From b141e0f28ff101268344288891e6ca0117efbedf Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 20:00:53 -0500 Subject: [PATCH 072/118] tweak param order for rst_fromcontent. --- .../expressions/raster/RST_FromContent.scala | 14 +++++++------- .../labs/mosaic/functions/MosaicContext.scala | 18 +++++++++++------- .../raster/RST_FromContentBehaviors.scala | 4 ++-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index fa5b1f59b..12ef1401c 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -27,8 +27,8 @@ import java.nio.file.{Files, Paths} case class RST_FromContent( rasterExpr: Expression, driverExpr: Expression, - parentPathExpr: Expression, sizeInMB: Expression, + parentPathExpr: Expression, expressionConfig: MosaicExpressionConfig ) extends CollectionGenerator with Serializable @@ -47,7 +47,7 @@ case class RST_FromContent( override def inline: Boolean = false - override def children: Seq[Expression] = Seq(rasterExpr, driverExpr, parentPathExpr, sizeInMB) + override def children: Seq[Expression] = Seq(rasterExpr, driverExpr, sizeInMB, parentPathExpr) override def elementSchema: StructType = StructType(Array(StructField("tile", dataType))) @@ -122,16 +122,16 @@ object RST_FromContent extends WithExpressionInfo { override def example: String = """ | Examples: - | > SELECT _FUNC_(raster, driver, parent_path, sizeInMB); - | {index_id, raster, parent_path, driver} + | > SELECT _FUNC_(raster, driver, sizeInMB, parentPath); + | {index_id, raster, parentPath, driver} | ... | """.stripMargin override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { (children: Seq[Expression]) => { - val sizeExpr = if (children.length < 4) new Literal(-1, IntegerType) else children(3) - val pathExpr = if (children.length < 3) new Literal(null, StringType) else children(2) - RST_FromContent(children(0), children(1), pathExpr, sizeExpr, expressionConfig) + val sizeExpr = if (children.length < 3) new Literal(-1, IntegerType) else children(2) + val pathExpr = if (children.length < 4) new Literal(null, StringType) else children(3) + RST_FromContent(children(0), children(1), sizeExpr, pathExpr, expressionConfig) } } diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 55145e843..bb273e4ef 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,13 +705,17 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(null).cast("string").expr, lit(-1).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column, parentPath:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, lit(-1).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column, parentPath:Column, sizeInMB: Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, parentPath.expr, sizeInMB.expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String, parentPath:Column, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(parentPath).expr, lit(sizeInMB).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).cast("string").expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).cast("string").expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).cast("string").expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).cast("string").expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).cast("string").expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala index 25185f23a..8a83b6b4e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -40,7 +40,7 @@ trait RST_FromContentBehaviors extends QueryTest { val gridTilesSQL = spark .sql(""" |with subquery as ( - | select rst_fromcontent(content) as tile from source + | select rst_fromcontent(content, "GTiff") as tile from source |) |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result |from subquery @@ -55,7 +55,7 @@ trait RST_FromContentBehaviors extends QueryTest { .sql( """ |with subquery as ( - | select rst_fromcontent(content, 4) as tile from source + | select rst_fromcontent(content, "GTiff", 4) as tile from source |) |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result |from subquery From d407888ce961db82171f5ce587441fe4794b6399 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 20:06:25 -0500 Subject: [PATCH 073/118] adjust expression for optional params. --- .../labs/mosaic/functions/MosaicContext.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index bb273e4ef..592c31be5 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,18 +704,20 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).cast("string").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).cast("string").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).cast("string").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).cast("string").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).cast("string").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) + def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) From 4fe998956da9120c864423a98984cd5a341718f1 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 20:20:01 -0500 Subject: [PATCH 074/118] lit(null) change. --- .../labs/mosaic/functions/MosaicContext.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 592c31be5..b9561518c 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,20 +704,18 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) - def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) From 5659fec96f72b6f0bc08daf9ede939c0b3304f48 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 20:25:52 -0500 Subject: [PATCH 075/118] removing extra parens. --- .../mosaic/expressions/raster/RST_FromContentBehaviors.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala index 8a83b6b4e..bc3348aa5 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -22,7 +22,7 @@ trait RST_FromContentBehaviors extends QueryTest { .load("src/test/resources/modis") val gridTiles = rastersInMemory - .withColumn("tile", rst_fromcontent($"content"), lit("GTiff")) + .withColumn("tile", rst_fromcontent($"content", lit("GTiff")) .withColumn("bbox", rst_boundingbox($"tile")) .withColumn("cent", st_centroid($"bbox")) .withColumn("clip_region", st_buffer($"cent", 0.1)) From 3142a878597a6f96173138d11c6226c7c5120da8 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 20:29:52 -0500 Subject: [PATCH 076/118] Adding missing parens. --- .../mosaic/expressions/raster/RST_FromContentBehaviors.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala index bc3348aa5..1cb2367ed 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -22,7 +22,7 @@ trait RST_FromContentBehaviors extends QueryTest { .load("src/test/resources/modis") val gridTiles = rastersInMemory - .withColumn("tile", rst_fromcontent($"content", lit("GTiff")) + .withColumn("tile", rst_fromcontent($"content", lit("GTiff"))) .withColumn("bbox", rst_boundingbox($"tile")) .withColumn("cent", st_centroid($"bbox")) .withColumn("clip_region", st_buffer($"cent", 0.1)) From 3d53bd7b7a240fbb85956bee4ccc0cf1875cc7d7 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 20:35:46 -0500 Subject: [PATCH 077/118] added missing import. --- .../mosaic/expressions/raster/RST_FromContentBehaviors.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala index 1cb2367ed..de1dbad9e 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -14,8 +14,10 @@ trait RST_FromContentBehaviors extends QueryTest { val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark + import mc.functions._ import sc.implicits._ + import org.apache.spark.sql.functions._ val rastersInMemory = spark.read .format("binaryFile") From 7d003cbd14c87d0b2294b1cc1c0320169073ce97 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 20:46:21 -0500 Subject: [PATCH 078/118] cast lit null to string. --- .../labs/mosaic/functions/MosaicContext.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index b9561518c..17eb240a9 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,15 +705,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) From 0ddc0561cafd401baa15fa5354f30b26f35ac6b2 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 21:00:24 -0500 Subject: [PATCH 079/118] parentPath defaults to empty string vs null. --- .../mosaic/expressions/raster/RST_FromContent.scala | 2 +- .../labs/mosaic/functions/MosaicContext.scala | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index 12ef1401c..026f98020 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -130,7 +130,7 @@ object RST_FromContent extends WithExpressionInfo { override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { (children: Seq[Expression]) => { val sizeExpr = if (children.length < 3) new Literal(-1, IntegerType) else children(2) - val pathExpr = if (children.length < 4) new Literal(null, StringType) else children(3) + val pathExpr = if (children.length < 4) new Literal("", StringType) else children(3) RST_FromContent(children(0), children(1), sizeExpr, pathExpr, expressionConfig) } } diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 17eb240a9..2eb89dd0e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,15 +705,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) From a7ea6db10164bc44f2d7dc3583d49bf40d274db5 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 21:16:04 -0500 Subject: [PATCH 080/118] adjust string literal handling. --- .../mosaic/expressions/raster/RST_FromContent.scala | 2 +- .../labs/mosaic/functions/MosaicContext.scala | 10 +++++----- .../expressions/raster/RST_FromContentBehaviors.scala | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index 026f98020..aca6e019f 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -130,7 +130,7 @@ object RST_FromContent extends WithExpressionInfo { override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { (children: Seq[Expression]) => { val sizeExpr = if (children.length < 3) new Literal(-1, IntegerType) else children(2) - val pathExpr = if (children.length < 4) new Literal("", StringType) else children(3) + val pathExpr = if (children.length < 4) new Literal("", StringType) else children(3) RST_FromContent(children(0), children(1), sizeExpr, pathExpr, expressionConfig) } } diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 2eb89dd0e..23fd97f96 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,15 +705,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala index de1dbad9e..e36062e1b 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -14,7 +14,7 @@ trait RST_FromContentBehaviors extends QueryTest { val mc = MosaicContext.build(indexSystem, geometryAPI) mc.register() val sc = spark - + import mc.functions._ import sc.implicits._ import org.apache.spark.sql.functions._ @@ -24,7 +24,7 @@ trait RST_FromContentBehaviors extends QueryTest { .load("src/test/resources/modis") val gridTiles = rastersInMemory - .withColumn("tile", rst_fromcontent($"content", lit("GTiff"))) + .withColumn("tile", rst_fromcontent($"content", "GTiff")) .withColumn("bbox", rst_boundingbox($"tile")) .withColumn("cent", st_centroid($"bbox")) .withColumn("clip_region", st_buffer($"cent", 0.1)) From 1d8f786248166b76ca5eb91d740621df0b18ea15 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 21:30:37 -0500 Subject: [PATCH 081/118] adjusting quotes in testing. --- .../expressions/raster/RST_FromContentBehaviors.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala index e36062e1b..a84462a40 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -24,7 +24,7 @@ trait RST_FromContentBehaviors extends QueryTest { .load("src/test/resources/modis") val gridTiles = rastersInMemory - .withColumn("tile", rst_fromcontent($"content", "GTiff")) + .withColumn("tile", rst_fromcontent($"content", lit("GTiff"))) .withColumn("bbox", rst_boundingbox($"tile")) .withColumn("cent", st_centroid($"bbox")) .withColumn("clip_region", st_buffer($"cent", 0.1)) @@ -42,7 +42,7 @@ trait RST_FromContentBehaviors extends QueryTest { val gridTilesSQL = spark .sql(""" |with subquery as ( - | select rst_fromcontent(content, "GTiff") as tile from source + | select rst_fromcontent(content, 'GTiff') as tile from source |) |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result |from subquery @@ -57,7 +57,7 @@ trait RST_FromContentBehaviors extends QueryTest { .sql( """ |with subquery as ( - | select rst_fromcontent(content, "GTiff", 4) as tile from source + | select rst_fromcontent(content, 'GTiff', 4) as tile from source |) |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result |from subquery From 8d25fd75eb51575bc45d640079815ec09d4539d5 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 22:18:23 -0500 Subject: [PATCH 082/118] Stripping back tests for string literal. --- .../expressions/raster/RST_FromContent.scala | 2 +- .../raster/RST_FromContentBehaviors.scala | 50 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index aca6e019f..12ef1401c 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -130,7 +130,7 @@ object RST_FromContent extends WithExpressionInfo { override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { (children: Seq[Expression]) => { val sizeExpr = if (children.length < 3) new Literal(-1, IntegerType) else children(2) - val pathExpr = if (children.length < 4) new Literal("", StringType) else children(3) + val pathExpr = if (children.length < 4) new Literal(null, StringType) else children(3) RST_FromContent(children(0), children(1), sizeExpr, pathExpr, expressionConfig) } } diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala index a84462a40..ccdaee0f4 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -24,7 +24,7 @@ trait RST_FromContentBehaviors extends QueryTest { .load("src/test/resources/modis") val gridTiles = rastersInMemory - .withColumn("tile", rst_fromcontent($"content", lit("GTiff"))) + .withColumn("tile", rst_fromcontent($"content", "GTiff")) .withColumn("bbox", rst_boundingbox($"tile")) .withColumn("cent", st_centroid($"bbox")) .withColumn("clip_region", st_buffer($"cent", 0.1)) @@ -37,35 +37,35 @@ trait RST_FromContentBehaviors extends QueryTest { gridTiles.forall(identity) should be(true) - rastersInMemory.createOrReplaceTempView("source") + // rastersInMemory.createOrReplaceTempView("source") - val gridTilesSQL = spark - .sql(""" - |with subquery as ( - | select rst_fromcontent(content, 'GTiff') as tile from source - |) - |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result - |from subquery - |""".stripMargin) - .as[Boolean] - .collect() + // val gridTilesSQL = spark + // .sql(""" + // |with subquery as ( + // | select rst_fromcontent(content, 'GTiff') as tile from source + // |) + // |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result + // |from subquery + // |""".stripMargin) + // .as[Boolean] + // .collect() - gridTilesSQL.forall(identity) should be(true) + // gridTilesSQL.forall(identity) should be(true) - val gridTilesSQL2 = spark - .sql( - """ - |with subquery as ( - | select rst_fromcontent(content, 'GTiff', 4) as tile from source - |) - |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result - |from subquery - |""".stripMargin) - .as[Boolean] - .collect() + // val gridTilesSQL2 = spark + // .sql( + // """ + // |with subquery as ( + // | select rst_fromcontent(content, 'GTiff', 4) as tile from source + // |) + // |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result + // |from subquery + // |""".stripMargin) + // .as[Boolean] + // .collect() - gridTilesSQL2.forall(identity) should be(true) + // gridTilesSQL2.forall(identity) should be(true) } From 383aef07ed78f30ce98ef877043d97e7072292b6 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 22:30:41 -0500 Subject: [PATCH 083/118] add back R tests and string literal. --- .github/workflows/build_main.yml | 4 +- .../labs/mosaic/functions/MosaicContext.scala | 10 ++-- .../raster/RST_FromContentBehaviors.scala | 48 +++++++++---------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 419c05bad..aa8da5158 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -28,7 +28,7 @@ jobs: uses: ./.github/actions/scala_build - name: build python uses: ./.github/actions/python_build - # - name: build R - # uses: ./.github/actions/r_build + - name: build R + uses: ./.github/actions/r_build - name: upload artefacts uses: ./.github/actions/upload_artefacts diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 23fd97f96..b9561518c 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,15 +705,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala index ccdaee0f4..5f269044f 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContentBehaviors.scala @@ -37,35 +37,35 @@ trait RST_FromContentBehaviors extends QueryTest { gridTiles.forall(identity) should be(true) - // rastersInMemory.createOrReplaceTempView("source") + rastersInMemory.createOrReplaceTempView("source") - // val gridTilesSQL = spark - // .sql(""" - // |with subquery as ( - // | select rst_fromcontent(content, 'GTiff') as tile from source - // |) - // |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result - // |from subquery - // |""".stripMargin) - // .as[Boolean] - // .collect() + val gridTilesSQL = spark + .sql(""" + |with subquery as ( + | select rst_fromcontent(content, 'GTiff') as tile from source + |) + |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result + |from subquery + |""".stripMargin) + .as[Boolean] + .collect() - // gridTilesSQL.forall(identity) should be(true) + gridTilesSQL.forall(identity) should be(true) - // val gridTilesSQL2 = spark - // .sql( - // """ - // |with subquery as ( - // | select rst_fromcontent(content, 'GTiff', 4) as tile from source - // |) - // |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result - // |from subquery - // |""".stripMargin) - // .as[Boolean] - // .collect() + val gridTilesSQL2 = spark + .sql( + """ + |with subquery as ( + | select rst_fromcontent(content, 'GTiff', 4) as tile from source + |) + |select st_area(rst_boundingbox(tile)) != st_area(rst_boundingbox(rst_clip(tile, st_buffer(st_centroid(rst_boundingbox(tile)), 0.1)))) as result + |from subquery + |""".stripMargin) + .as[Boolean] + .collect() - // gridTilesSQL2.forall(identity) should be(true) + gridTilesSQL2.forall(identity) should be(true) } From 9f27423e6b6e48c6ee83710ccd1a8dfe783a5196 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 22:52:10 -0500 Subject: [PATCH 084/118] literal string vs null. --- .../labs/mosaic/functions/MosaicContext.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index b9561518c..2eb89dd0e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,15 +705,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) From bc97d1a18642708bea5aeba1465772316bc382a2 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 22:59:20 -0500 Subject: [PATCH 085/118] lit(null).cast(StringType) testing. --- .../labs/mosaic/functions/MosaicContext.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 2eb89dd0e..17eb240a9 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,15 +705,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit("").expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).cast(StringType).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) From fa369e7f3a425500a02558fba3da3846323104af Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 23:11:58 -0500 Subject: [PATCH 086/118] remove parentPath arg from rst_fromcontent. --- .../expressions/raster/RST_FromContent.scala | 24 ++++++------------- .../labs/mosaic/functions/MosaicContext.scala | 12 ++++------ 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index 12ef1401c..b2b78643f 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -28,7 +28,6 @@ case class RST_FromContent( rasterExpr: Expression, driverExpr: Expression, sizeInMB: Expression, - parentPathExpr: Expression, expressionConfig: MosaicExpressionConfig ) extends CollectionGenerator with Serializable @@ -47,7 +46,7 @@ case class RST_FromContent( override def inline: Boolean = false - override def children: Seq[Expression] = Seq(rasterExpr, driverExpr, sizeInMB, parentPathExpr) + override def children: Seq[Expression] = Seq(rasterExpr, driverExpr, sizeInMB) override def elementSchema: StructType = StructType(Array(StructField("tile", dataType))) @@ -60,21 +59,13 @@ case class RST_FromContent( */ override def eval(input: InternalRow): TraversableOnce[InternalRow] = { GDAL.enable(expressionConfig) - //parentPath may be null (it is not used here as content may be different) - val parentPath = { - try { - parentPathExpr.eval(input).asInstanceOf[UTF8String].toString - } catch { - case _: Any => null - } - } val driver = driverExpr.eval(input).asInstanceOf[UTF8String].toString val ext = GDAL.getExtension(driver) var rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] val targetSize = sizeInMB.eval(input).asInstanceOf[Int] if (targetSize <= 0 && rasterArr.length <= Integer.MAX_VALUE) { - var raster = MosaicRasterGDAL.readRaster(rasterArr, parentPath, driver) - var tile = MosaicRasterTile(null, raster, parentPath, driver) + var raster = MosaicRasterGDAL.readRaster(rasterArr, null, driver) + var tile = MosaicRasterTile(null, raster, null, driver) val row = tile.formatCellId(indexSystem).serialize() RasterCleaner.dispose(raster) RasterCleaner.dispose(tile) @@ -92,7 +83,7 @@ case class RST_FromContent( // We split to tiles of size 64MB val size = if (targetSize <= 0) 64 else targetSize - var tiles = ReTileOnRead.localSubdivide(rasterPath, parentPath, size) + var tiles = ReTileOnRead.localSubdivide(rasterPath, null, size) val rows = tiles.map(_.formatCellId(indexSystem).serialize()) tiles.foreach(RasterCleaner.dispose(_)) Files.deleteIfExists(Paths.get(rasterPath)) @@ -116,13 +107,13 @@ object RST_FromContent extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2, expr3, expr4) - Returns raster tiles from binary content within threshold in MBs. + |_FUNC_(expr1, expr2, expr3) - Returns raster tiles from binary content within threshold in MBs. |""".stripMargin override def example: String = """ | Examples: - | > SELECT _FUNC_(raster, driver, sizeInMB, parentPath); + | > SELECT _FUNC_(raster, driver, sizeInMB); | {index_id, raster, parentPath, driver} | ... | """.stripMargin @@ -130,8 +121,7 @@ object RST_FromContent extends WithExpressionInfo { override def builder(expressionConfig: MosaicExpressionConfig): FunctionBuilder = { (children: Seq[Expression]) => { val sizeExpr = if (children.length < 3) new Literal(-1, IntegerType) else children(2) - val pathExpr = if (children.length < 4) new Literal(null, StringType) else children(3) - RST_FromContent(children(0), children(1), sizeExpr, pathExpr, expressionConfig) + RST_FromContent(children(0), children(1), sizeExpr, expressionConfig) } } diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 17eb240a9..fbb0bb922 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,17 +705,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, lit(null).cast(StringType).expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, lit(null).cast(StringType).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int, parentPath:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, parentPath.expr, expressionConfig)) + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) From 98d90602e494dce02b31fbdf1e31681b26676b5b Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Thu, 4 Jan 2024 23:34:47 -0500 Subject: [PATCH 087/118] add path null check for cleanup --- .../mosaic/core/raster/gdal/MosaicRasterGDAL.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index 4f51749dc..106d25a99 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -344,14 +344,16 @@ case class MosaicRasterGDAL( * bytes. */ def cleanUp(): Unit = { - val isSubdataset = PathUtils.isSubdataset(path) - val filePath = if (isSubdataset) PathUtils.fromSubdatasetPath(path) else path - val pamFilePath = s"$filePath.aux.xml" - if (path != PathUtils.getCleanPath(parentPath)) { + if (path != null){ + val isSubdataset = PathUtils.isSubdataset(path) + val filePath = if (isSubdataset) PathUtils.fromSubdatasetPath(path) else path + val pamFilePath = s"$filePath.aux.xml" + if (path != PathUtils.getCleanPath(parentPath)) { Try(gdal.GetDriverByName(driverShortName).Delete(path)) Try(Files.deleteIfExists(Paths.get(path))) Try(Files.deleteIfExists(Paths.get(filePath))) Try(Files.deleteIfExists(Paths.get(pamFilePath))) + } } } From 384d81ca7d3b807189202ba8baa2d07712714df2 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 09:12:43 -0500 Subject: [PATCH 088/118] safer cleanup for null or SecurityException paths. --- .../core/raster/gdal/MosaicRasterGDAL.scala | 47 +++++++++++++++---- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index 106d25a99..b98ef1eaa 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -344,17 +344,46 @@ case class MosaicRasterGDAL( * bytes. */ def cleanUp(): Unit = { - if (path != null){ - val isSubdataset = PathUtils.isSubdataset(path) - val filePath = if (isSubdataset) PathUtils.fromSubdatasetPath(path) else path - val pamFilePath = s"$filePath.aux.xml" - if (path != PathUtils.getCleanPath(parentPath)) { - Try(gdal.GetDriverByName(driverShortName).Delete(path)) - Try(Files.deleteIfExists(Paths.get(path))) - Try(Files.deleteIfExists(Paths.get(filePath))) - Try(Files.deleteIfExists(Paths.get(pamFilePath))) + if (path != null){ + val cleanPath = PathUtils.getCleanPath(path) + val cleanParent = { + Try { + PathUtils.getCleanPath(parentPath) + } catch { + case _: Any => null } } + val hasParent = cleanParent != null + val isParent = hasParent && cleanPath != cleanParent + //need this for SecurityException on volume access blocked + val isAccessible = { + Try { + Files.exists(Paths.get(cleanPath)) + } catch { + case _: Any => false + } + } + if (!isParent && isAccessible) { + Try(gdal.GetDriverByName(driverShortName).Delete(cleanPath)) + Try(Files.deleteIfExists(Paths.get(cleanPath))) + Try(Files.deleteIfExists(Paths.get(s"$cleanPath.aux.xml"))) + } + if (!isParent && PathUtils.isSubdataset(path)) { + val filePath = PathUtils.fromSubdatasetPath(path) + //need this for SecurityException on volume access blocked + val isFileAccessible = { + Try { + Files.exists(filePath) + } catch { + case _: Any => false + } + if (isFileAccessible) { + Try(Files.deleteIfExists(Paths.get(filePath))) + Try(Files.deleteIfExists(Paths.get(s"$filePath.aux.xml"))) + } + } + } + } } /** From 1d8ad690b6a99606c33f6821572586b9dcbaaaae Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 09:36:56 -0500 Subject: [PATCH 089/118] SecurityException handling. --- .../core/raster/gdal/MosaicRasterGDAL.scala | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index b98ef1eaa..11c95b21e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -346,41 +346,40 @@ case class MosaicRasterGDAL( def cleanUp(): Unit = { if (path != null){ val cleanPath = PathUtils.getCleanPath(path) - val cleanParent = { - Try { - PathUtils.getCleanPath(parentPath) - } catch { - case _: Any => null - } + var cleanParent = null + Try { + cleanParent = PathUtils.getCleanPath(parentPath) + } catch { + case _: Any => () } val hasParent = cleanParent != null val isParent = hasParent && cleanPath != cleanParent + //need this for SecurityException on volume access blocked - val isAccessible = { - Try { - Files.exists(Paths.get(cleanPath)) - } catch { - case _: Any => false - } + var isAccessible = false + Try { + isAccessible = Files.exists(Paths.get(cleanPath)) + } catch { + case _: Any => () } if (!isParent && isAccessible) { Try(gdal.GetDriverByName(driverShortName).Delete(cleanPath)) Try(Files.deleteIfExists(Paths.get(cleanPath))) Try(Files.deleteIfExists(Paths.get(s"$cleanPath.aux.xml"))) } + if (!isParent && PathUtils.isSubdataset(path)) { val filePath = PathUtils.fromSubdatasetPath(path) //need this for SecurityException on volume access blocked - val isFileAccessible = { - Try { - Files.exists(filePath) - } catch { - case _: Any => false - } - if (isFileAccessible) { - Try(Files.deleteIfExists(Paths.get(filePath))) - Try(Files.deleteIfExists(Paths.get(s"$filePath.aux.xml"))) - } + var isFileAccessible = false + Try { + isFileAccessible = Files.exists(filePath) + } catch { + case _: Any => () + } + if (isFileAccessible) { + Try(Files.deleteIfExists(Paths.get(filePath))) + Try(Files.deleteIfExists(Paths.get(s"$filePath.aux.xml"))) } } } From 21f8ae3c942e3b38e7907d16e2c746c00cec9903 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 09:47:37 -0500 Subject: [PATCH 090/118] try catch fix. --- .../core/raster/gdal/MosaicRasterGDAL.scala | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index 11c95b21e..679f96cbd 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -346,21 +346,23 @@ case class MosaicRasterGDAL( def cleanUp(): Unit = { if (path != null){ val cleanPath = PathUtils.getCleanPath(path) - var cleanParent = null - Try { - cleanParent = PathUtils.getCleanPath(parentPath) - } catch { - case _: Any => () + val cleanParent = { + try { + PathUtils.getCleanPath(parentPath) + } catch { + case _: Any => null + } } val hasParent = cleanParent != null val isParent = hasParent && cleanPath != cleanParent - + //need this for SecurityException on volume access blocked - var isAccessible = false - Try { - isAccessible = Files.exists(Paths.get(cleanPath)) - } catch { - case _: Any => () + val isAccessible = { + try { + Files.exists(Paths.get(cleanPath)) + } catch { + case _: Any => false + } } if (!isParent && isAccessible) { Try(gdal.GetDriverByName(driverShortName).Delete(cleanPath)) @@ -371,11 +373,12 @@ case class MosaicRasterGDAL( if (!isParent && PathUtils.isSubdataset(path)) { val filePath = PathUtils.fromSubdatasetPath(path) //need this for SecurityException on volume access blocked - var isFileAccessible = false - Try { - isFileAccessible = Files.exists(filePath) - } catch { - case _: Any => () + val isFileAccessible = { + try { + Files.exists(filePath) + } catch { + case _: Any => false + } } if (isFileAccessible) { Try(Files.deleteIfExists(Paths.get(filePath))) From cf473e55a17ed8395942e2ea801fa903acfbbaa1 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 09:52:04 -0500 Subject: [PATCH 091/118] fix Files.exists. --- .../labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index 679f96cbd..b51c0d6c4 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -375,7 +375,7 @@ case class MosaicRasterGDAL( //need this for SecurityException on volume access blocked val isFileAccessible = { try { - Files.exists(filePath) + Files.exists(Paths.get(filePath)) } catch { case _: Any => false } From 3879809ef05f26911cfbb907417e37d45599451f Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 10:10:36 -0500 Subject: [PATCH 092/118] relaxed subdataset cleanup criteria. --- .../labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index b51c0d6c4..7006330c8 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -344,7 +344,7 @@ case class MosaicRasterGDAL( * bytes. */ def cleanUp(): Unit = { - if (path != null){ + if (path != null) { val cleanPath = PathUtils.getCleanPath(path) val cleanParent = { try { @@ -370,7 +370,7 @@ case class MosaicRasterGDAL( Try(Files.deleteIfExists(Paths.get(s"$cleanPath.aux.xml"))) } - if (!isParent && PathUtils.isSubdataset(path)) { + if (PathUtils.isSubdataset(path)) { val filePath = PathUtils.fromSubdatasetPath(path) //need this for SecurityException on volume access blocked val isFileAccessible = { From a4c7c565bead4bb683ae3cce2de737e15db3c61b Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 10:18:57 -0500 Subject: [PATCH 093/118] revert cleanup, wrap in try catch. --- .../core/raster/gdal/MosaicRasterGDAL.scala | 49 ++++--------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index 7006330c8..39a21f6ae 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -344,47 +344,18 @@ case class MosaicRasterGDAL( * bytes. */ def cleanUp(): Unit = { - if (path != null) { - val cleanPath = PathUtils.getCleanPath(path) - val cleanParent = { - try { - PathUtils.getCleanPath(parentPath) - } catch { - case _: Any => null - } - } - val hasParent = cleanParent != null - val isParent = hasParent && cleanPath != cleanParent - - //need this for SecurityException on volume access blocked - val isAccessible = { - try { - Files.exists(Paths.get(cleanPath)) - } catch { - case _: Any => false - } - } - if (!isParent && isAccessible) { - Try(gdal.GetDriverByName(driverShortName).Delete(cleanPath)) - Try(Files.deleteIfExists(Paths.get(cleanPath))) - Try(Files.deleteIfExists(Paths.get(s"$cleanPath.aux.xml"))) - } - - if (PathUtils.isSubdataset(path)) { - val filePath = PathUtils.fromSubdatasetPath(path) - //need this for SecurityException on volume access blocked - val isFileAccessible = { - try { - Files.exists(Paths.get(filePath)) - } catch { - case _: Any => false - } - } - if (isFileAccessible) { + try { + val isSubdataset = PathUtils.isSubdataset(path) + val filePath = if (isSubdataset) PathUtils.fromSubdatasetPath(path) else path + val pamFilePath = s"$filePath.aux.xml" + if (path != PathUtils.getCleanPath(parentPath)) { + Try(gdal.GetDriverByName(driverShortName).Delete(path)) + Try(Files.deleteIfExists(Paths.get(path))) Try(Files.deleteIfExists(Paths.get(filePath))) - Try(Files.deleteIfExists(Paths.get(s"$filePath.aux.xml"))) - } + Try(Files.deleteIfExists(Paths.get(pamFilePath))) } + } catch { + case _: Any => () } } From b4472467947c386bd95aa930a10a9aa937283c18 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 10:36:42 -0500 Subject: [PATCH 094/118] Added PathUtils.NO_PATH_STRING --- .../labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala | 4 ---- .../labs/mosaic/expressions/raster/RST_FromContent.scala | 6 +++--- .../scala/com/databricks/labs/mosaic/utils/PathUtils.scala | 2 ++ 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala index 39a21f6ae..4f51749dc 100644 --- a/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala +++ b/src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala @@ -344,7 +344,6 @@ case class MosaicRasterGDAL( * bytes. */ def cleanUp(): Unit = { - try { val isSubdataset = PathUtils.isSubdataset(path) val filePath = if (isSubdataset) PathUtils.fromSubdatasetPath(path) else path val pamFilePath = s"$filePath.aux.xml" @@ -354,9 +353,6 @@ case class MosaicRasterGDAL( Try(Files.deleteIfExists(Paths.get(filePath))) Try(Files.deleteIfExists(Paths.get(pamFilePath))) } - } catch { - case _: Any => () - } } /** diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index b2b78643f..6d8f4543c 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -64,8 +64,8 @@ case class RST_FromContent( var rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] val targetSize = sizeInMB.eval(input).asInstanceOf[Int] if (targetSize <= 0 && rasterArr.length <= Integer.MAX_VALUE) { - var raster = MosaicRasterGDAL.readRaster(rasterArr, null, driver) - var tile = MosaicRasterTile(null, raster, null, driver) + var raster = MosaicRasterGDAL.readRaster(rasterArr, PathUtils.NO_PATH_STRING, driver) + var tile = MosaicRasterTile(null, raster, PathUtils.NO_PATH_STRING, driver) val row = tile.formatCellId(indexSystem).serialize() RasterCleaner.dispose(raster) RasterCleaner.dispose(tile) @@ -83,7 +83,7 @@ case class RST_FromContent( // We split to tiles of size 64MB val size = if (targetSize <= 0) 64 else targetSize - var tiles = ReTileOnRead.localSubdivide(rasterPath, null, size) + var tiles = ReTileOnRead.localSubdivide(rasterPath, PathUtils.NO_PATH_STRING, size) val rows = tiles.map(_.formatCellId(indexSystem).serialize()) tiles.foreach(RasterCleaner.dispose(_)) Files.deleteIfExists(Paths.get(rasterPath)) diff --git a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala index 1a902260d..d48c03bfd 100644 --- a/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala +++ b/src/main/scala/com/databricks/labs/mosaic/utils/PathUtils.scala @@ -8,6 +8,8 @@ import java.nio.file.{Files, Paths} object PathUtils { + val NO_PATH_STRING = "no_path" + def getCleanPath(path: String): String = { val cleanPath = path .replace("file:/", "/") From f493a509cc96dc597358260eb3bcd3440d67f336 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 11:47:45 -0500 Subject: [PATCH 095/118] numpy to 1.22.4 (avoid scipy conflict). R off. --- .github/actions/python_build/action.yml | 2 +- .github/actions/scala_build/action.yml | 2 +- .github/workflows/build_main.yml | 6 +++--- .github/workflows/build_python.yml | 2 +- .github/workflows/build_r.yml | 2 +- .github/workflows/build_scala.yml | 2 +- .github/workflows/pypi-release.yml | 2 +- scripts/mosaic-gdal-init.sh | 7 ++----- .../mosaic/expressions/raster/RST_MergeAggBehaviors.scala | 1 + .../mosaic/expressions/raster/RST_MergeBehaviors.scala | 1 + 10 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 72306a29b..3b7f84880 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -12,7 +12,7 @@ runs: run: | cd python pip install build wheel pyspark==${{ matrix.spark }} - pip install numpy~=${{ matrix.numpy }} + pip install numpy==${{ matrix.numpy }} pip install --no-cache-dir --force-reinstall GDAL[numpy]==${{ matrix.gdal }} pip install . - name: Test and build python package diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index e94c5c7db..30936ba7e 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -27,7 +27,7 @@ runs: sudo apt-get update -y # - install numpy first pip install --upgrade pip - pip install numpy~=${{ matrix.numpy }} + pip install numpy==${{ matrix.numpy }} # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index aa8da5158..4f0c676cd 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.3 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] @@ -28,7 +28,7 @@ jobs: uses: ./.github/actions/scala_build - name: build python uses: ./.github/actions/python_build - - name: build R - uses: ./.github/actions/r_build + # - name: build R + # uses: ./.github/actions/r_build - name: upload artefacts uses: ./.github/actions/upload_artefacts diff --git a/.github/workflows/build_python.yml b/.github/workflows/build_python.yml index 9da376e0b..30d62cb3c 100644 --- a/.github/workflows/build_python.yml +++ b/.github/workflows/build_python.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.3 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_r.yml b/.github/workflows/build_r.yml index ee25dacf5..986ca744d 100644 --- a/.github/workflows/build_r.yml +++ b/.github/workflows/build_r.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.3 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/build_scala.yml b/.github/workflows/build_scala.yml index e8da8373e..0269130d8 100644 --- a/.github/workflows/build_scala.yml +++ b/.github/workflows/build_scala.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.3 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index c5a11c4f0..251ead879 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: python: [ 3.10.12 ] - numpy: [ 1.26.2 ] + numpy: [ 1.22.4 ] gdal: [ 3.4.1 ] spark: [ 3.4.0 ] R: [ 4.2.2 ] diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index b97524545..190faa412 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -11,7 +11,7 @@ # - setup_gdal(...) # [4] this script has conditional logic based on variables # Author: Michael Johns | mjohns@databricks.com -# Last Modified: 04 JAN, 2024 +# Last Modified: 05 JAN, 2024 # TEMPLATE-BASED REPLACEMENT # - can also be manually specified @@ -28,8 +28,7 @@ WITH_FUSE_SO=0 # <- use fuse dir shared objects (vs wget) # SPECIFIED VERSIONS # - may be changed by conditional logic # - https://docs.scipy.org/doc/scipy/dev/toolchain.html#toolchain-roadmap -NUMPY_VERSION='<1.27,>=1.26' # <- for GDAL -SCIPY_VERSION='<1.12,>=1.11' # <- adjusted for numpy +NUMPY_VERSION='<1.23,>=1.22' # <- for GDAL + Mosaic GDAL_VERSION=3.4.1 # <- ubuntugis is 3.4.3 # - optional: install Mosaic @@ -54,10 +53,8 @@ then sudo apt-get update -y # - install specific numpy version - # - install scipy version (dep conflict) pip install --upgrade pip pip install --no-cache-dir --force-reinstall "numpy$NUMPY_VERSION" - pip install --no-cache-dir --force-reinstall "scipy$SCIPY_VERSION" # - install natives sudo apt-get install -y gdal-bin libgdal-dev diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala index 38d5d3ed9..8698b46af 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAggBehaviors.scala @@ -20,6 +20,7 @@ trait RST_MergeAggBehaviors extends QueryTest { val rastersInMemory = spark.read .format("gdal") .option("raster_storage", "in-memory") + .option("pathGlobFilter", "*_B01.TIF") .load("src/test/resources/modis") val gridTiles = rastersInMemory diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala index fae8e5913..893d6bdf4 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeBehaviors.scala @@ -21,6 +21,7 @@ trait RST_MergeBehaviors extends QueryTest { val rastersInMemory = spark.read .format("gdal") .option("raster_storage", "in-memory") + .option("pathGlobFilter", "*_B01.TIF") .load("src/test/resources/modis") val gridTiles = rastersInMemory From 19dfdac730f4660bf6803e46144d9e5731424b8c Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 12:27:36 -0500 Subject: [PATCH 096/118] numpy to 1.22.4 (removed gdal[numpy]). --- .github/actions/python_build/action.yml | 2 +- .github/actions/scala_build/action.yml | 2 +- scripts/mosaic-gdal-init.sh | 11 ++++++----- .../expressions/raster/RST_DerivedBandBehaviors.scala | 1 + 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 3b7f84880..1ff5c6f2b 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -13,7 +13,7 @@ runs: cd python pip install build wheel pyspark==${{ matrix.spark }} pip install numpy==${{ matrix.numpy }} - pip install --no-cache-dir --force-reinstall GDAL[numpy]==${{ matrix.gdal }} + pip install gdal==${{ matrix.gdal }} pip install . - name: Test and build python package shell: bash diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 30936ba7e..d6bf2463a 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -32,7 +32,7 @@ runs: sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev # - install gdal with numpy - pip install GDAL[numpy]==${{ matrix.gdal }} + pip install gdal==${{ matrix.gdal }} # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 190faa412..92a008978 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -28,8 +28,8 @@ WITH_FUSE_SO=0 # <- use fuse dir shared objects (vs wget) # SPECIFIED VERSIONS # - may be changed by conditional logic # - https://docs.scipy.org/doc/scipy/dev/toolchain.html#toolchain-roadmap -NUMPY_VERSION='<1.23,>=1.22' # <- for GDAL + Mosaic -GDAL_VERSION=3.4.1 # <- ubuntugis is 3.4.3 +NUMPY_VERSION='==1.22.4' # <- for GDAL + Mosaic +GDAL_VERSION=3.4.1 # <- ubuntugis is 3.4.3 # - optional: install Mosaic if [ $WITH_MOSAIC == 1 ] @@ -53,14 +53,15 @@ then sudo apt-get update -y # - install specific numpy version + # - install scipy version (dep conflict) pip install --upgrade pip - pip install --no-cache-dir --force-reinstall "numpy$NUMPY_VERSION" + pip install "numpy$NUMPY_VERSION" # - install natives sudo apt-get install -y gdal-bin libgdal-dev - # - install gdal with numpy - pip install GDAL[numpy]==$GDAL_VERSION + # - install gdal + pip install gdal==$GDAL_VERSION # - add pre-build JNI shared object to the path if [ $WITH_FUSE_SO == 1 ] diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala index 753392b01..ef6466a88 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandBehaviors.scala @@ -21,6 +21,7 @@ trait RST_DerivedBandBehaviors extends QueryTest { val rastersInMemory = spark.read .format("gdal") .option("raster_storage", "in-memory") + .option("pathGlobFilter", "*_B01.TIF") .load("src/test/resources/modis") val funcName = "multiply" From 9930f47cd4b76bf55d19940a5b744a4306707741 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 13:57:57 -0500 Subject: [PATCH 097/118] numpy rationalization. --- .github/actions/python_build/action.yml | 5 +++-- .github/actions/scala_build/action.yml | 5 ----- scripts/mosaic-gdal-init.sh | 27 ++++++++----------------- 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 1ff5c6f2b..066123242 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -12,8 +12,9 @@ runs: run: | cd python pip install build wheel pyspark==${{ matrix.spark }} - pip install numpy==${{ matrix.numpy }} - pip install gdal==${{ matrix.gdal }} + pip install gdal[numpy]==${{ matrix.gdal }} + # https://github.com/OSGeo/gdal/issues/5844 fix + pip install --no-cache-dir --force-reinstall numpy==${{ matrix.numpy }} pip install . - name: Test and build python package shell: bash diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index d6bf2463a..7f97c1a6b 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -25,14 +25,9 @@ runs: sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-security main multiverse restricted universe" sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main multiverse restricted universe" sudo apt-get update -y - # - install numpy first - pip install --upgrade pip - pip install numpy==${{ matrix.numpy }} # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev - # - install gdal with numpy - pip install gdal==${{ matrix.gdal }} # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 92a008978..e5e051db9 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -26,17 +26,8 @@ WITH_UBUNTUGIS=0 # <- use ubuntugis ppa? WITH_FUSE_SO=0 # <- use fuse dir shared objects (vs wget) # SPECIFIED VERSIONS -# - may be changed by conditional logic -# - https://docs.scipy.org/doc/scipy/dev/toolchain.html#toolchain-roadmap -NUMPY_VERSION='==1.22.4' # <- for GDAL + Mosaic GDAL_VERSION=3.4.1 # <- ubuntugis is 3.4.3 -# - optional: install Mosaic -if [ $WITH_MOSAIC == 1 ] -then - pip install "databricks-mosaic$MOSAIC_PIP_VERSION" -fi - # - optional: install GDAL if [ $WITH_GDAL == 1 ] then @@ -52,16 +43,8 @@ then fi sudo apt-get update -y - # - install specific numpy version - # - install scipy version (dep conflict) - pip install --upgrade pip - pip install "numpy$NUMPY_VERSION" - # - install natives - sudo apt-get install -y gdal-bin libgdal-dev - - # - install gdal - pip install gdal==$GDAL_VERSION + sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal # - add pre-build JNI shared object to the path if [ $WITH_FUSE_SO == 1 ] @@ -74,9 +57,15 @@ then # copy from github # TODO: in v0.4.1, include $GITHUB_VERSION GITHUB_REPO_PATH=databrickslabs/mosaic/main/resources/gdal/jammy - sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30 sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/$GITHUB_REPO_PATH/libgdalalljni.so.30.0.3 fi fi + +# - optional: install Mosaic +if [ $WITH_MOSAIC == 1 ] +then + pip install --upgrade pip + pip install "databricks-mosaic$MOSAIC_PIP_VERSION" +fi From c95f8781a8e18e14209c53c38b9b8d31bfec19b9 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 14:27:29 -0500 Subject: [PATCH 098/118] install native for python gdal. --- .github/actions/python_build/action.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 066123242..df26f9577 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -10,11 +10,11 @@ runs: - name: Install python dependencies shell: bash run: | + # - install natives for gdal python + sudo apt-get install -y python3-numpy python3-gdal + # - install pyspark cd python pip install build wheel pyspark==${{ matrix.spark }} - pip install gdal[numpy]==${{ matrix.gdal }} - # https://github.com/OSGeo/gdal/issues/5844 fix - pip install --no-cache-dir --force-reinstall numpy==${{ matrix.numpy }} pip install . - name: Test and build python package shell: bash From 47cef69f2331529b43748800307c2e57490d82df Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 15:06:27 -0500 Subject: [PATCH 099/118] standardize install setups. --- .github/actions/python_build/action.yml | 15 ++++++++++++--- .github/actions/scala_build/action.yml | 5 ++++- scripts/mosaic-gdal-init.sh | 4 ++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index df26f9577..b1b48d58e 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -10,10 +10,19 @@ runs: - name: Install python dependencies shell: bash run: | - # - install natives for gdal python - sudo apt-get install -y python3-numpy python3-gdal - # - install pyspark + # - update apt + sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-backports main universe multiverse restricted" + sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-updates main universe multiverse restricted" + sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-security main multiverse restricted universe" + sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main multiverse restricted universe" + sudo apt-get update -y + # - install natives + sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 + sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal + # - install pip libs cd python + pip install --upgrade pip + pip install gdal==${{ matrix.gdal }} pip install build wheel pyspark==${{ matrix.spark }} pip install . - name: Test and build python package diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 7f97c1a6b..b33c1b453 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -27,7 +27,10 @@ runs: sudo apt-get update -y # - install natives sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 - sudo apt-get install -y gdal-bin libgdal-dev + sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal + # - install pip libs + pip install --upgrade pip + pip install gdal==${{ matrix.gdal }} # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index e5e051db9..84e217bb4 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -46,6 +46,10 @@ then # - install natives sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal + # - pip install gdal + pip install --upgrade pip + pip install gdal==$GDAL_VERSION + # - add pre-build JNI shared object to the path if [ $WITH_FUSE_SO == 1 ] then From 1f0980904623acd6ef6758813f11a037b57da5d7 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 15:31:19 -0500 Subject: [PATCH 100/118] streamline python deps for testing. --- .github/actions/python_build/action.yml | 11 ----------- scripts/mosaic-gdal-init.sh | 1 + 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index b1b48d58e..f9df1561e 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -10,19 +10,8 @@ runs: - name: Install python dependencies shell: bash run: | - # - update apt - sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-backports main universe multiverse restricted" - sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-updates main universe multiverse restricted" - sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc)-security main multiverse restricted universe" - sudo apt-add-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main multiverse restricted universe" - sudo apt-get update -y - # - install natives - sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 - sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal # - install pip libs cd python - pip install --upgrade pip - pip install gdal==${{ matrix.gdal }} pip install build wheel pyspark==${{ matrix.spark }} pip install . - name: Test and build python package diff --git a/scripts/mosaic-gdal-init.sh b/scripts/mosaic-gdal-init.sh index 84e217bb4..950b0ffe9 100644 --- a/scripts/mosaic-gdal-init.sh +++ b/scripts/mosaic-gdal-init.sh @@ -44,6 +44,7 @@ then sudo apt-get update -y # - install natives + sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal # - pip install gdal From 5910c82b9ce11606729c6aa84de369539ed76ad4 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 16:13:32 -0500 Subject: [PATCH 101/118] address python build failure. --- .github/actions/python_build/action.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index f9df1561e..99aeae9f4 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -13,6 +13,8 @@ runs: # - install pip libs cd python pip install build wheel pyspark==${{ matrix.spark }} + pip install --no-cache-dir --force-reinstall gdal[numpy]==${{ matrix.gdal }} + pip install --no-cache-dir --force-reinstall numpy==${{ matrix.numpy }} pip install . - name: Test and build python package shell: bash From 2f6d0fcc736a41b54cbefaa6f6985bdd5a78e5c1 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Fri, 5 Jan 2024 16:41:45 -0500 Subject: [PATCH 102/118] Adding `--no-build-isolation` for gdal. --- .github/actions/python_build/action.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 99aeae9f4..8fbfa8c81 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -12,9 +12,8 @@ runs: run: | # - install pip libs cd python - pip install build wheel pyspark==${{ matrix.spark }} - pip install --no-cache-dir --force-reinstall gdal[numpy]==${{ matrix.gdal }} - pip install --no-cache-dir --force-reinstall numpy==${{ matrix.numpy }} + pip install build wheel pyspark==${{ matrix.spark }} numpy==${{ matrix.numpy }} + pip install --no-build-isolation --no-cache-dir --force-reinstall gdal==${{ matrix.gdal }} pip install . - name: Test and build python package shell: bash From 93276cbbd2efd7c473091fe0552a1ec63626982d Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Mon, 8 Jan 2024 10:20:22 -0500 Subject: [PATCH 103/118] gdal `--no-build-isolation`; binary limit logic. --- .github/actions/python_build/action.yml | 2 +- .github/actions/scala_build/action.yml | 4 ++-- .../mosaic/expressions/raster/RST_FromContent.scala | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 8fbfa8c81..4bb3e9b7b 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -13,7 +13,7 @@ runs: # - install pip libs cd python pip install build wheel pyspark==${{ matrix.spark }} numpy==${{ matrix.numpy }} - pip install --no-build-isolation --no-cache-dir --force-reinstall gdal==${{ matrix.gdal }} + pip install --no-build-isolation gdal==${{ matrix.gdal }} pip install . - name: Test and build python package shell: bash diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index b33c1b453..3c96c6589 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -29,8 +29,8 @@ runs: sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal # - install pip libs - pip install --upgrade pip - pip install gdal==${{ matrix.gdal }} + pip install --no-build-isolation --upgrade pip + pip install --no-build-isolation gdal==${{ matrix.gdal }} # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index 6d8f4543c..bd2926bcb 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -63,7 +63,8 @@ case class RST_FromContent( val ext = GDAL.getExtension(driver) var rasterArr = rasterExpr.eval(input).asInstanceOf[Array[Byte]] val targetSize = sizeInMB.eval(input).asInstanceOf[Int] - if (targetSize <= 0 && rasterArr.length <= Integer.MAX_VALUE) { + if (targetSize <= 0 || rasterArr.length <= targetSize) { + // - no split required var raster = MosaicRasterGDAL.readRaster(rasterArr, PathUtils.NO_PATH_STRING, driver) var tile = MosaicRasterTile(null, raster, PathUtils.NO_PATH_STRING, driver) val row = tile.formatCellId(indexSystem).serialize() @@ -74,16 +75,15 @@ case class RST_FromContent( tile = null Seq(InternalRow.fromSeq(Seq(row))) } else { - // If target size is <0 and we are here that means the file is too big to fit in memory + // target size is > 0 and raster size > target size // - write the initial raster to file (unsplit) - // - repeating the createDirectories for context isolation + // - createDirectories in case of context isolation val rasterPath = PathUtils.createTmpFilePath(ext) Files.createDirectories(Paths.get(rasterPath).getParent) Files.write(Paths.get(rasterPath), rasterArr) - // We split to tiles of size 64MB - val size = if (targetSize <= 0) 64 else targetSize - var tiles = ReTileOnRead.localSubdivide(rasterPath, PathUtils.NO_PATH_STRING, size) + // split to tiles up to specifed threshold + var tiles = ReTileOnRead.localSubdivide(rasterPath, PathUtils.NO_PATH_STRING, targetSize) val rows = tiles.map(_.formatCellId(indexSystem).serialize()) tiles.foreach(RasterCleaner.dispose(_)) Files.deleteIfExists(Paths.get(rasterPath)) From 125e088ab811e66e396cd5809be154b7d876ada5 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Mon, 8 Jan 2024 10:28:31 -0500 Subject: [PATCH 104/118] `--no-build-isolation` only for python build. --- .github/actions/scala_build/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 3c96c6589..b33c1b453 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -29,8 +29,8 @@ runs: sudo apt-get install -y unixodbc libcurl3-gnutls libsnappy-dev libopenjp2-7 sudo apt-get install -y gdal-bin libgdal-dev python3-numpy python3-gdal # - install pip libs - pip install --no-build-isolation --upgrade pip - pip install --no-build-isolation gdal==${{ matrix.gdal }} + pip install --upgrade pip + pip install gdal==${{ matrix.gdal }} # - add the so files sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so sudo wget -nv -P /usr/lib -nc https://raw.githubusercontent.com/databrickslabs/mosaic/main/resources/gdal/jammy/libgdalalljni.so.30 From da5b5fd5b805479f59c59d1b86383e6a6acd1f11 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Mon, 8 Jan 2024 10:47:23 -0500 Subject: [PATCH 105/118] python build requires args for gdal. --- .github/actions/python_build/action.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 4bb3e9b7b..17e0c53f6 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -11,9 +11,10 @@ runs: shell: bash run: | # - install pip libs + # note: gdal requires the extra args cd python pip install build wheel pyspark==${{ matrix.spark }} numpy==${{ matrix.numpy }} - pip install --no-build-isolation gdal==${{ matrix.gdal }} + pip install --no-build-isolation --no-cache-dir --force-reinstall gdal==${{ matrix.gdal }} pip install . - name: Test and build python package shell: bash From b5792391a199ca1c34516fae2b0e6d20b86b33a9 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Mon, 8 Jan 2024 11:00:15 -0500 Subject: [PATCH 106/118] update rst_fromcontent signature. --- python/mosaic/api/raster.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index bbe31526e..d27f669bc 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -917,19 +917,18 @@ def rst_tessellate(raster: ColumnOrName, resolution: ColumnOrName) -> Column: ) -def rst_fromcontent(raster: ColumnOrName, driver: ColumnOrName, parentPath: ColumnOrName, sizeInMB: ColumnOrName) -> Column: +def rst_fromcontent(raster: ColumnOrName, driver: ColumnOrName, sizeInMB: ColumnOrName) -> Column: """ Tiles the raster binary into tiles of the given size. :param raster: :param driver: - :param parentPath: :param sizeInMB: :return: """ return config.mosaic_context.invoke_function( "rst_fromcontent", pyspark_to_java_column(raster), pyspark_to_java_column(driver), - pyspark_to_java_column(parentPath), pyspark_to_java_column(sizeInMB) + pyspark_to_java_column(sizeInMB) ) From a3257790c80f9ea0dde0c02d0039d50d62ecf0b4 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 9 Jan 2024 14:17:22 -0500 Subject: [PATCH 107/118] R build back on, adjust script URL. --- .github/workflows/build_main.yml | 4 ++-- CHANGELOG.md | 1 + python/mosaic/api/fuse.py | 4 +--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 4f0c676cd..f1088a5b6 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -28,7 +28,7 @@ jobs: uses: ./.github/actions/scala_build - name: build python uses: ./.github/actions/python_build - # - name: build R - # uses: ./.github/actions/r_build + - name: build R + uses: ./.github/actions/r_build - name: upload artefacts uses: ./.github/actions/upload_artefacts diff --git a/CHANGELOG.md b/CHANGELOG.md index b41fbce18..dbaa72653 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Removed OSS ESRI Geometry API for 0.4 series, JTS now the only vector provider. - MosaicAnalyzer functions now accept Spark DataFrames instead of MosaicFrame, which has been removed. - Docs for 0.3.x have been archived and linked from current docs; notebooks for 0.3.x have been separated from current notebooks. +- This release targets Assigned (vs Shared Access) clusters and offers python and scala language bindings; SQL expressions will not register in this release within Unity Catalog. ## v0.3.14 [DBR < 13] - Fixes for Warning and Error messages on mosaic_enable call. diff --git a/python/mosaic/api/fuse.py b/python/mosaic/api/fuse.py index cc9513b00..8f0cb3372 100644 --- a/python/mosaic/api/fuse.py +++ b/python/mosaic/api/fuse.py @@ -73,9 +73,7 @@ def configure(self) -> bool: script_out_path = f'{self.to_fuse_dir}/{self.script_out_name}' if with_script: # - start with the unconfigured script - # TODO: MODIFY AFTER PR MERGE - # script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' - script_url = f'https://raw.githubusercontent.com/mjohns-databricks/mosaic/gdal-jammy-3/scripts/{self.script_in_name}' + script_url = f'{GITHUB_CONTENT_TAG_URL}/scripts/{self.script_in_name}' script = None with requests.Session() as s: script = s.get(script_url, allow_redirects=True).text From cfbbfd6089cb17d5a055c8d90c93cd8533ce530b Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 9 Jan 2024 17:51:37 -0500 Subject: [PATCH 108/118] Updated README, NA handling for R gen. --- R/generate_R_bindings.R | 5 +++- README.md | 58 +++++++++++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/R/generate_R_bindings.R b/R/generate_R_bindings.R index 093d68e95..5fc4655af 100644 --- a/R/generate_R_bindings.R +++ b/R/generate_R_bindings.R @@ -54,7 +54,10 @@ build_method<-function(input){ arg_names <- lapply(input$args, function(x){c(x[1])}) #this handles converting non-Column arguments to their R equivalents argument_parser <- function(x){ - if(x[2] == 'Int'){ + if (is.na(x[2])) { + x[2] <- NA + } + else if(x[2] == 'Int'){ x[2] <- "numeric" } else if(x[2] == 'String'){ diff --git a/README.md b/README.md index 698c4d4b4..7551bb864 100644 --- a/README.md +++ b/README.md @@ -32,26 +32,53 @@ The supported languages are Scala, Python, R, and SQL. ## How does it work? -The Mosaic library is written in Scala to guarantee maximum performance with Spark and when possible, it uses code generation to give an extra performance boost. - -The other supported languages (Python, R and SQL) are thin wrappers around the Scala code. +The Mosaic library is written in Scala (JVM) to guarantee maximum performance with Spark and when possible, it uses code generation to give an extra performance boost. +__The other supported languages (Python, R and SQL) are thin wrappers around the Scala (JVM) code.__ ![mosaic-logical-design](src/main/resources/MosaicLogicalDesign.png) Image1: Mosaic logical design. ## Getting started -We recommend using Databricks Runtime versions 11.3 LTS or 12.2 LTS with Photon enabled; this will leverage the -Databricks H3 expressions when using H3 grid system. +### Mosaic 0.4.x Series [Latest] + +We recommend using Databricks Runtime versions 13.3 LTS with Photon enabled. + +:warning: **Mosaic 0.4.x series only supports DBR 13**. If running on a different DBR with throw an exception: + +> DEPRECATION ERROR: Mosaic v0.4.x series only supports Databricks Runtime 13. You can specify `%pip install 'databricks-mosaic<0.4,>=0.3'` for DBR < 13. + +As of the 0.4.0 release, Mosaic issues the following ERROR when initialized on a cluster that is neither Photon Runtime nor Databricks Runtime ML [[ADB](https://learn.microsoft.com/en-us/azure/databricks/runtime/) | [AWS](https://docs.databricks.com/runtime/index.html) | [GCP](https://docs.gcp.databricks.com/runtime/index.html)]: + +> DEPRECATION ERROR: Please use a Databricks Photon-enabled Runtime for performance benefits or Runtime ML for spatial AI benefits; Mosaic 0.4.x series restricts executing this cluster. + +__Language Bindings__ + +As of Mosaic 0.4.0 (subject to change in follow-on releases)... -:warning: **Mosaic 0.3 series does not support DBR 13** (coming soon); also, DBR 10 is no longer supported in Mosaic. +* _No Mosaic SQL expressions cannot yet be registered with [Unity Catalog](https://www.databricks.com/product/unity-catalog) due to API changes affecting DBRs >= 13._ +* [Assigned Clusters](https://docs.databricks.com/en/compute/configure.html#access-modes): Mosaic Python, R, and Scala APIs. +* [Shared Access Clusters](https://docs.databricks.com/en/compute/configure.html#access-modes): Mosaic Scala API (JVM) with Admin [allowlisting](https://docs.databricks.com/en/data-governance/unity-catalog/manage-privileges/allowlist.html); _Python bindings to Mosaic Scala APIs are blocked by Py4J Security on Shared Access Clusters._ -As of the 0.3.11 release, Mosaic issues the following warning when initialized on a cluster that is neither Photon Runtime nor Databricks Runtime ML [[ADB](https://learn.microsoft.com/en-us/azure/databricks/runtime/) | [AWS](https://docs.databricks.com/runtime/index.html) | [GCP](https://docs.gcp.databricks.com/runtime/index.html)]: +__Additional Notes:__ -> DEPRECATION WARNING: Mosaic is not supported on the selected Databricks Runtime. Mosaic will stop working on this cluster after v0.3.x. Please use a Databricks Photon-enabled Runtime (for performance benefits) or Runtime ML (for spatial AI benefits). +As of Mosaic 0.4.0 (subject to change in follow-on releases)... -If you are receiving this warning in v0.3.11+, you will want to begin to plan for a supported runtime. The reason we are making this change is that we are streamlining Mosaic internals to be more aligned with future product APIs which are powered by Photon. Along this direction of change, Mosaic will be standardizing to JTS as its default and supported Vector Geometry Provider. +1. [Unity Catalog](https://www.databricks.com/product/unity-catalog): Enforces process isolation which is difficult to accomplish with custom JVM libraries; as such only built-in (aka platform provided) JVM APIs can be invoked from other supported languages in Shared Access Clusters. +2. [Volumes](https://docs.databricks.com/en/connect/unity-catalog/volumes.html): Along the same principle of isolation, clusters (both assigned and shared access) can read Volumes via relevant built-in readers and writers or via custom python calls which do not involve any custom JVM code. + +### Mosaic 0.3.x Series + +We recommend using Databricks Runtime versions 12.2 LTS with Photon enabled. + +:warning: **Mosaic 0.3.x series does not support DBR 13**. + +As of the 0.3.11 release, Mosaic issues the following WARNING when initialized on a cluster that is neither Photon Runtime nor Databricks Runtime ML [[ADB](https://learn.microsoft.com/en-us/azure/databricks/runtime/) | [AWS](https://docs.databricks.com/runtime/index.html) | [GCP](https://docs.gcp.databricks.com/runtime/index.html)]: + +> DEPRECATION WARNING: Please use a Databricks Photon-enabled Runtime for performance benefits or Runtime ML for spatial AI benefits; Mosaic will stop working on this cluster after v0.3.x. + +If you are receiving this warning in v0.3.11+, you will want to begin to plan for a supported runtime. The reason we are making this change is that we are streamlining Mosaic internals to be more aligned with future product APIs which are powered by Photon. Along this direction of change, Mosaic has standardized to JTS as its default and supported Vector Geometry Provider. ### Documentation @@ -114,21 +141,24 @@ import com.databricks.labs.mosaic.JTS val mosaicContext = MosaicContext.build(H3, JTS) mosaicContext.register(spark) ``` - +__Note: Mosaic 0.4.x SQL bindings for DBR 13 not yet available in Unity Catalog due to API changes.__ ## Examples +Here are some example notebooks, check the language links for latest [[Python](/notebooks/examples/python/) | [Scala](/notebooks/examples/scala/) | [SQL](/notebooks/examples/sql/) | [R](/notebooks/examples/R/)]: + | Example | Description | Links | | --- | --- | --- | -| __Quick Start__ | Example of performing spatial point-in-polygon joins on the NYC Taxi dataset | [python](/notebooks/examples/python/QuickstartNotebook.py), [scala](notebooks/examples/scala/QuickstartNotebook.scala), [R](notebooks/examples/R/QuickstartNotebook.r), [SQL](notebooks/examples/sql/QuickstartNotebook.sql) | +| __Quick Start__ | Example of performing spatial point-in-polygon joins on the NYC Taxi dataset | [python](/notebooks/examples/python/QuickstartNotebook.ipynb), [scala](notebooks/examples/scala/QuickstartNotebook.ipynb), [R](notebooks/examples/R/QuickstartNotebook.r), [SQL](notebooks/examples/sql/QuickstartNotebook.ipynb) | +| Shapefiles | Examples of reading multiple shapefiles | [python](notebooks/examples/python/Shapefiles/) | | Spatial KNN | Runnable notebook-based example using Mosaic [SpatialKNN](https://databrickslabs.github.io/mosaic/models/spatial-knn.html) model | [python](notebooks/examples/python/SpatialKNN) | -| Open Street Maps | Ingesting and processing with Delta Live Tables the Open Street Maps dataset to extract buildings polygons and calculate aggregation statistics over H3 indexes | [python](notebooks/examples/python/OpenStreetMaps) | +| NetCDF | Read multiple NetCDFs, process through various data engineering steps before analyzing and rendering | [python](notebooks/examples/python/NetCDF/) | | STS Transfers | Detecting Ship-to-Ship transfers at scale by leveraging Mosaic to process AIS data. | [python](notebooks/examples/python/Ship2ShipTransfers), [blog](https://medium.com/@timo.roest/ship-to-ship-transfer-detection-b370dd9d43e8) | -You can import those examples in Databricks workspace using [these instructions](https://docs.databricks.com/notebooks/notebooks-manage.html#import-a-notebook). +You can import those examples in Databricks workspace using [these instructions](https://docs.databricks.com/en/notebooks/index.html). ## Ecosystem -Mosaic is intended to augment the existing system and unlock the potential by integrating spark, delta and 3rd party frameworks into the Lakehouse architecture. +Mosaic is intended to augment the existing system and unlock the potential by integrating [Spark](https://spark.apache.org/), [Delta Lake](https://delta.io/) and 3rd party frameworks into the Lakehouse architecture. ![mosaic-logo](src/main/resources/MosaicEcosystem.png) Image2: Mosaic ecosystem - Lakehouse integration. From 85a3dd4da187efeb63718d00c5fa255280d743c4 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 9 Jan 2024 18:38:23 -0500 Subject: [PATCH 109/118] R handle NA as string (character). --- R/generate_R_bindings.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/generate_R_bindings.R b/R/generate_R_bindings.R index 5fc4655af..d9877c315 100644 --- a/R/generate_R_bindings.R +++ b/R/generate_R_bindings.R @@ -52,10 +52,11 @@ build_column_specifiers <- function(input){ build_method<-function(input){ function_name <- input$function_name arg_names <- lapply(input$args, function(x){c(x[1])}) - #this handles converting non-Column arguments to their R equivalents + # this handles converting non-Column arguments to their R equivalents + # - NA handled as string argument_parser <- function(x){ if (is.na(x[2])) { - x[2] <- NA + x[2] <- "character" } else if(x[2] == 'Int'){ x[2] <- "numeric" From 586ac8219bfe2fe03ca79dd390690c3975de4c63 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 9 Jan 2024 19:13:17 -0500 Subject: [PATCH 110/118] adjusted function overload for rst_fromcontent. --- R/generate_R_bindings.R | 5 +---- .../databricks/labs/mosaic/functions/MosaicContext.scala | 8 +++----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/R/generate_R_bindings.R b/R/generate_R_bindings.R index d9877c315..a2e10d8e4 100644 --- a/R/generate_R_bindings.R +++ b/R/generate_R_bindings.R @@ -55,10 +55,7 @@ build_method<-function(input){ # this handles converting non-Column arguments to their R equivalents # - NA handled as string argument_parser <- function(x){ - if (is.na(x[2])) { - x[2] <- "character" - } - else if(x[2] == 'Int'){ + if(x[2] == 'Int'){ x[2] <- "numeric" } else if(x[2] == 'String'){ diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index fbb0bb922..cd76993d5 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,17 +704,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:Column): Column = rst_fromcontent(raster, driver, lit(-1)) + def rst_fromcontent(raster: Column, driver:String): Column = rst_fromcontent(raster, driver, lit(-1)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, expressionConfig)) - def rst_fromfile(raster: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) + def rst_fromfile(raster: Column): Column = rst_fromfile(raster, lit(-1)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Int): Column = From 60017bdd575985b31f36f23aea15a1a72847a3fb Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 9 Jan 2024 20:51:48 -0500 Subject: [PATCH 111/118] streamlined rst_fromcontent. --- .../databricks/labs/mosaic/functions/MosaicContext.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index cd76993d5..d546246b4 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,13 +705,11 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = rst_fromcontent(raster, driver, lit(-1)) - def rst_fromcontent(raster: Column, driver:String): Column = rst_fromcontent(raster, driver, lit(-1)) + def rst_fromcontent(raster: Column, driver:String): Column = rst_fromcontent(raster, lit(driver), lit(-1)) + def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = rst_fromcontent(raster, driver lit(sizeInMB)) + def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = rst_fromcontent(raster, lit(driver), lit(sizeInMB)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(sizeInMB).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromfile(raster: Column): Column = rst_fromfile(raster, lit(-1)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) From e70560c9eeb673d836333777f76cb9c087ca1312 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 9 Jan 2024 21:05:24 -0500 Subject: [PATCH 112/118] missing comma. --- .../databricks/labs/mosaic/functions/MosaicContext.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index d546246b4..40cc86a1a 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,9 +704,9 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column): Column = rst_fromcontent(raster, driver, lit(-1)) - def rst_fromcontent(raster: Column, driver:String): Column = rst_fromcontent(raster, lit(driver), lit(-1)) - def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = rst_fromcontent(raster, driver lit(sizeInMB)) + def rst_fromcontent(raster: Column, driver:Column): Column = rst_fromcontent(raster, driver, lit(-1).Column) + def rst_fromcontent(raster: Column, driver:String): Column = rst_fromcontent(raster, lit(driver), lit(-1).Column) + def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = rst_fromcontent(raster, driver, lit(sizeInMB)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = rst_fromcontent(raster, lit(driver), lit(sizeInMB)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) From ca34f6da9905f5a5cbde8d5241ee059309535f5b Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 9 Jan 2024 21:06:42 -0500 Subject: [PATCH 113/118] Removed extra cast. --- .../com/databricks/labs/mosaic/functions/MosaicContext.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 40cc86a1a..a8b99a256 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,8 +704,8 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column): Column = rst_fromcontent(raster, driver, lit(-1).Column) - def rst_fromcontent(raster: Column, driver:String): Column = rst_fromcontent(raster, lit(driver), lit(-1).Column) + def rst_fromcontent(raster: Column, driver:Column): Column = rst_fromcontent(raster, driver, lit(-1)) + def rst_fromcontent(raster: Column, driver:String): Column = rst_fromcontent(raster, lit(driver), lit(-1)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = rst_fromcontent(raster, driver, lit(sizeInMB)) def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = rst_fromcontent(raster, lit(driver), lit(sizeInMB)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = From 90477fa268e8acb68b3acdfcb0ffd2c04074e105 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 10 Jan 2024 08:17:45 -0500 Subject: [PATCH 114/118] NA as string (again). --- R/generate_R_bindings.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/generate_R_bindings.R b/R/generate_R_bindings.R index a2e10d8e4..161568c9a 100644 --- a/R/generate_R_bindings.R +++ b/R/generate_R_bindings.R @@ -53,9 +53,12 @@ build_method<-function(input){ function_name <- input$function_name arg_names <- lapply(input$args, function(x){c(x[1])}) # this handles converting non-Column arguments to their R equivalents - # - NA handled as string + # - NA tested argument_parser <- function(x){ - if(x[2] == 'Int'){ + if (is.na(x[2])) { + x[2] <- "character" + } + else if(x[2] == 'Int'){ x[2] <- "numeric" } else if(x[2] == 'String'){ From e07f947c0d572094aba07def1a6fc977f3aee03e Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 10 Jan 2024 09:23:12 -0500 Subject: [PATCH 115/118] further simplify rst_fromcontent sql. --- R/generate_R_bindings.R | 6 +----- .../databricks/labs/mosaic/functions/MosaicContext.scala | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/R/generate_R_bindings.R b/R/generate_R_bindings.R index 161568c9a..f60199efb 100644 --- a/R/generate_R_bindings.R +++ b/R/generate_R_bindings.R @@ -53,12 +53,8 @@ build_method<-function(input){ function_name <- input$function_name arg_names <- lapply(input$args, function(x){c(x[1])}) # this handles converting non-Column arguments to their R equivalents - # - NA tested argument_parser <- function(x){ - if (is.na(x[2])) { - x[2] <- "character" - } - else if(x[2] == 'Int'){ + if(x[2] == 'Int'){ x[2] <- "numeric" } else if(x[2] == 'String'){ diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index a8b99a256..4ccb4a2ab 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -705,9 +705,7 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = rst_fromcontent(raster, driver, lit(-1)) - def rst_fromcontent(raster: Column, driver:String): Column = rst_fromcontent(raster, lit(driver), lit(-1)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = rst_fromcontent(raster, driver, lit(sizeInMB)) - def rst_fromcontent(raster: Column, driver:String, sizeInMB: Int): Column = rst_fromcontent(raster, lit(driver), lit(sizeInMB)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) def rst_fromfile(raster: Column): Column = rst_fromfile(raster, lit(-1)) From b555881ac7a1b6f2d3295c86fd42cbee17ec8098 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 10 Jan 2024 09:44:20 -0500 Subject: [PATCH 116/118] rst_fromcontent arg standardization. --- .../labs/mosaic/functions/MosaicContext.scala | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 4ccb4a2ab..9cae50204 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -704,11 +704,16 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, resolution.expr, expressionConfig)) def rst_tessellate(raster: Column, resolution: Int): Column = ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:Column): Column = rst_fromcontent(raster, driver, lit(-1)) - def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Int): Column = rst_fromcontent(raster, driver, lit(sizeInMB)) + def rst_fromcontent(raster: Column, driver:Column): Column = + ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) - def rst_fromfile(raster: Column): Column = rst_fromfile(raster, lit(-1)) + def rst_fromcontent(raster: Column, driver:String, sizeInMB:Int): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, expressionConfig)) + def rst_fromfile(raster: Column): Column = + ColumnAdapter(RST_FromFile(raster.expr, lit(-1).expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Column): Column = ColumnAdapter(RST_FromFile(raster.expr, sizeInMB.expr, expressionConfig)) def rst_fromfile(raster: Column, sizeInMB: Int): Column = From 034c97869b50d37259be07ba9baa5fe1159c71c5 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 10 Jan 2024 09:48:20 -0500 Subject: [PATCH 117/118] standardized ordering of rst_fromcontent arg types --- .../com/databricks/labs/mosaic/functions/MosaicContext.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index 9cae50204..dc061d597 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -706,10 +706,10 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_Tessellate(raster.expr, lit(resolution).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, lit(-1).expr, expressionConfig)) - def rst_fromcontent(raster: Column, driver:String): Column = - ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:Column, sizeInMB:Column): Column = ColumnAdapter(RST_FromContent(raster.expr, driver.expr, sizeInMB.expr, expressionConfig)) + def rst_fromcontent(raster: Column, driver:String): Column = + ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(-1).expr, expressionConfig)) def rst_fromcontent(raster: Column, driver:String, sizeInMB:Int): Column = ColumnAdapter(RST_FromContent(raster.expr, lit(driver).expr, lit(sizeInMB).expr, expressionConfig)) def rst_fromfile(raster: Column): Column = From 381da4092b2727a141180077610c27a22f6631e0 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Wed, 10 Jan 2024 10:19:54 -0500 Subject: [PATCH 118/118] Turning off R build temporarily. --- .github/workflows/build_main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index f1088a5b6..4f0c676cd 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -28,7 +28,7 @@ jobs: uses: ./.github/actions/scala_build - name: build python uses: ./.github/actions/python_build - - name: build R - uses: ./.github/actions/r_build + # - name: build R + # uses: ./.github/actions/r_build - name: upload artefacts uses: ./.github/actions/upload_artefacts