diff --git a/python/mosaic/api/functions.py b/python/mosaic/api/functions.py index 4e7d450db..d1f61fae8 100644 --- a/python/mosaic/api/functions.py +++ b/python/mosaic/api/functions.py @@ -1,7 +1,8 @@ from typing import Any from pyspark.sql import Column -from pyspark.sql.functions import _to_java_column as pyspark_to_java_column + +from pyspark.sql.column import _to_java_column as pyspark_to_java_column from pyspark.sql.functions import lit from mosaic.config import config diff --git a/python/mosaic/api/gdal.py b/python/mosaic/api/gdal.py index 9ef06c0b4..3ef739873 100644 --- a/python/mosaic/api/gdal.py +++ b/python/mosaic/api/gdal.py @@ -8,7 +8,6 @@ from .fuse import SetupMgr __all__ = [ - "setup_gdal", "enable_gdal", "update_checkpoint_path", "set_checkpoint_on", @@ -21,48 +20,6 @@ ] -def setup_gdal( - to_fuse_dir: str = "/Workspace/Shared/geospatial/mosaic/gdal/jammy/0.4.2", - script_out_name: str = "mosaic-gdal-init.sh", - jni_so_copy: bool = False, - test_mode: bool = False, -) -> bool: - """ - Prepare GDAL init script and shared objects required for GDAL to run on spark. - This function will generate the init script that will install GDAL on each worker node. - After the setup_gdal is run, the init script must be added to the cluster; also, - a cluster restart is required. - - Notes: - (a) This is close in behavior to Mosaic < 0.4 series (prior to DBR 13), - now using jammy default (3.4.1) - (b) `to_fuse_dir` can be one of `/Volumes/..`, `/Workspace/..`, `/dbfs/..`; - however, you should use `setup_fuse_install()` for Volume based installs - - Parameters - ---------- - to_fuse_dir : str - Path to write out the init script for GDAL installation; - default is '/Workspace/Shared/geospatial/mosaic/gdal/jammy/0.4.2'. - script_out_name : str - name of the script to be written; - default is 'mosaic-gdal-init.sh'. - jni_so_copy : bool - if True, copy shared object to fuse dir and config script to use; - default is False - test_mode : bool - Only for unit tests. - - Returns - ------- - True unless resources fail to download. - """ - setup_mgr = SetupMgr( - to_fuse_dir, script_out_name=script_out_name, jni_so_copy=jni_so_copy - ) - return setup_mgr.configure(test_mode=test_mode) - - def enable_gdal(spark: SparkSession, with_checkpoint_path: str = None) -> None: """ Enable GDAL at runtime on a cluster with GDAL installed using init script, @@ -103,12 +60,6 @@ def enable_gdal(spark: SparkSession, with_checkpoint_path: str = None) -> None: print( "GDAL not enabled. Mosaic with GDAL requires that GDAL be installed on the cluster.\n" ) - print( - "You can run `setup_gdal()` or `setup_fuse_install()` to generate the init script for GDAL install.\n" - ) - print( - "After the init script is generated, you need to add it to your cluster and restart.\n" - ) print("Error: " + str(e)) diff --git a/python/mosaic/gdal/gdal_3.10.0-1_amd64.deb b/python/mosaic/gdal/gdal_3.10.0-1_amd64.deb index b3f24276c..ce016f0fe 100644 Binary files a/python/mosaic/gdal/gdal_3.10.0-1_amd64.deb and b/python/mosaic/gdal/gdal_3.10.0-1_amd64.deb differ diff --git a/python/setup.cfg b/python/setup.cfg index 2e0b94f06..46176fc4c 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -12,11 +12,11 @@ classifiers = Topic :: Scientific/Engineering :: GIS Programming Language :: Python Programming Language :: Python :: 3 - Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 [options] packages = find: -python_requires = <3.11,>=3.10 +python_requires = <3.12,>=3.11 install_requires = geopandas<0.14.4,>=0.14 h3<4.0,>=3.7 diff --git a/python/setup.py b/python/setup.py index 8f6800925..508c156b9 100644 --- a/python/setup.py +++ b/python/setup.py @@ -8,15 +8,66 @@ from distutils.core import setup from setuptools.command.install import install + class CustomInstallCommand(install): """Custom install command to install .deb file.""" + required_os_packages = [ + "gpsbabel", + "libavif-dev", + "libblosc-dev", + "libboost-dev", + "libcairo2-dev", + "libcfitsio-dev", + "libcrypto++-dev", + "libcurl4-gnutls-dev", + "libexpat-dev", + "libfcgi-dev", + "libfyba-dev", + "libfreexl-dev", + "libgeos-dev", + "libgeotiff-dev", + "libgif-dev", + "libhdf4-alt-dev", + "libhdf5-serial-dev", + "libjpeg-dev", + "libkml-dev", + "liblcms2-2", + "liblz4-dev", + "liblzma-dev", + "libmysqlclient-dev", + "libnetcdf-dev", + "libogdi-dev", + "libopenexr-dev", + "libopenjp2-7-dev", + "libpcre3-dev", + "libpng-dev", + "libpoppler-dev", + "libpoppler-private-dev", + "libpq-dev", + "libproj-dev", + "librasterlite2-dev", + "libspatialite-dev", + "libssl-dev", + "libwebp-dev", + "libxerces-c-dev", + "libxml2-dev", + "libxslt-dev", + "libzstd-dev", + "locales", + "mysql-client-core-8.0", + "netcdf-bin", + ] + def run(self): - # Run the standard installation process - install.run(self) + # Install base dependencies + subprocess.check_call(["apt-get", "update"]) + subprocess.check_call(["apt-get", "install", "-y", *self.required_os_packages]) # Install the .deb file - deb_file = os.path.join(os.path.dirname(__file__), 'mosaic', 'gdal', 'gdal_3.10.0-1_amd64.deb') + deb_file = os.path.join( + os.path.dirname(__file__), "mosaic", "gdal", "gdal_3.10.0-1_amd64.deb" + ) if os.path.exists(deb_file): try: @@ -28,19 +79,20 @@ def run(self): # Run dpkg to install the .deb file try: - subprocess.check_call(['dpkg', '-i', deb_file]) + subprocess.check_call(["dpkg", "-i", deb_file]) except subprocess.CalledProcessError as e: - subprocess.check_call(['apt-get', 'install', '-f', '-y']) # Fix dependencies if needed - subprocess.check_call(['dpkg', '-i', deb_file]) + subprocess.check_call( + ["apt-get", "install", "-f", "-y"] + ) # Fix dependencies if needed + subprocess.check_call(["dpkg", "-i", deb_file]) except subprocess.CalledProcessError as e: print(f"Error installing .deb package: {e}") sys.exit(1) else: print(f"Error: {deb_file} not found.") sys.exit(1) + # Run the standard installation process + install.run(self) + -setup( - cmdclass={ - "install": CustomInstallCommand - } -) +setup(cmdclass={"install": CustomInstallCommand}) diff --git a/python/test/test_fuse_install.py b/python/test/test_fuse_install.py deleted file mode 100644 index ed1581fd9..000000000 --- a/python/test/test_fuse_install.py +++ /dev/null @@ -1,26 +0,0 @@ -from .utils import FuseInstaller, SparkTestCase - - -class TestFuseInstall(SparkTestCase): - def setUp(self) -> None: - return super().setUp() - - def test_setup_script_only(self): - installer = FuseInstaller(jar_copy=False, jni_so_copy=False) - try: - self.assertTrue(installer.do_op()) - except Exception: - self.fail("Executing `setup_fuse_install()` raised an exception.") - - self.assertEqual(len(installer.list_files()), 1) # <- script generated - - def test_setup_all(self): - installer = FuseInstaller(jar_copy=True, jni_so_copy=True) - try: - self.assertTrue(installer.do_op()) - except Exception: - self.fail("Executing `setup_fuse_install()` raised an exception.") - - self.assertEqual( - len(installer.list_files()), 5 - ) # <- init script jar, and so files diff --git a/python/test/test_gdal_install.py b/python/test/test_gdal_install.py deleted file mode 100644 index d20511ffa..000000000 --- a/python/test/test_gdal_install.py +++ /dev/null @@ -1,15 +0,0 @@ -from .utils import GDALInstaller, SparkTestCase - - -class TestGDALInstall(SparkTestCase): - def setUp(self) -> None: - return super().setUp() - - def test_setup_gdal(self): - installer = GDALInstaller() - try: - self.assertTrue(installer.do_op()) - except Exception: - self.fail("Copying objects with `setup_gdal()` raised an exception.") - - self.assertEqual(len(installer.list_files()), 1) # <- init script diff --git a/python/test/test_raster_functions.py b/python/test/test_raster_functions.py index 21cc7dd2b..9fe3d36a1 100644 --- a/python/test/test_raster_functions.py +++ b/python/test/test_raster_functions.py @@ -225,9 +225,7 @@ def test_netcdf_load_tessellate_clip_merge(self): .repartition(self.spark.sparkContext.defaultParallelism) .withColumn( "timestep", - element_at( - api.rst_metadata("tile"), "NC_GLOBAL#GDAL_MOSAIC_BAND_INDEX" - ), + element_at(col("tile.metadata"), "bandIndex"), ) .withColumn("tile", api.rst_setsrid("tile", lit(4326))) .where(col("timestep") == 21) diff --git a/python/test/utils/__init__.py b/python/test/utils/__init__.py index a8eb0e81d..96a703ab1 100644 --- a/python/test/utils/__init__.py +++ b/python/test/utils/__init__.py @@ -1,4 +1,2 @@ from .mosaic_test_case import * from .mosaic_test_case_with_gdal import * -from .setup_fuse import FuseInstaller -from .setup_gdal import GDALInstaller diff --git a/python/test/utils/setup_fuse.py b/python/test/utils/setup_fuse.py deleted file mode 100644 index 09071deb8..000000000 --- a/python/test/utils/setup_fuse.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -import shutil -import subprocess -import tempfile -from test.context import api - -from pkg_resources import Requirement, working_set - - -class FuseInstaller: - def __init__(self, jar_copy=False, jni_so_copy=False): - self._site_packages = working_set.find(Requirement("keplergl")).location - self._temp_dir = tempfile.mkdtemp() - self.jar_copy = jar_copy - self.jni_so_copy = jni_so_copy - self.FUSE_INIT_SCRIPT_FILENAME = "mosaic-fuse-init.sh" - - def __del__(self): - shutil.rmtree(self._temp_dir) - - def do_op(self) -> bool: - return api.setup_fuse_install( - self._temp_dir, - jar_copy=self.jar_copy, - jni_so_copy=self.jni_so_copy, - script_out_name=self.FUSE_INIT_SCRIPT_FILENAME, - test_mode=True, - ) - - def run_init_script(self) -> int: - fuse_install_script_target = os.path.join( - self._temp_dir, self.FUSE_INIT_SCRIPT_FILENAME - ) - os.chmod(fuse_install_script_target, mode=0x744) - result = subprocess.run( - [fuse_install_script_target], - stdout=subprocess.DEVNULL, - env=dict(os.environ, DATABRICKS_ROOT_VIRTUALENV_ENV=self._site_packages), - ) - return result.returncode - - def list_files(self) -> list[str]: - return os.listdir(self._temp_dir) diff --git a/python/test/utils/setup_gdal.py b/python/test/utils/setup_gdal.py deleted file mode 100644 index 31e8b89e1..000000000 --- a/python/test/utils/setup_gdal.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -import shutil -import tempfile -from test.context import api - -from pkg_resources import Requirement, working_set - - -class GDALInstaller: - def __init__(self): - self._site_packages = working_set.find(Requirement("keplergl")).location - self._temp_dir = tempfile.mkdtemp() - self.GDAL_INIT_SCRIPT_FILENAME = "mosaic-gdal-init.sh" - - def __del__(self): - shutil.rmtree(self._temp_dir) - - def do_op(self) -> bool: - return api.setup_gdal( - to_fuse_dir=self._temp_dir, - script_out_name=self.GDAL_INIT_SCRIPT_FILENAME, - jni_so_copy=False, - test_mode=True, - ) - - def list_files(self) -> list[str]: - return os.listdir(self._temp_dir)