Skip to content

Commit

Permalink
updated python bindings, install from .deb
Browse files Browse the repository at this point in the history
  • Loading branch information
sllynn committed Nov 28, 2024
1 parent e62c017 commit 5a25f8e
Show file tree
Hide file tree
Showing 11 changed files with 68 additions and 179 deletions.
3 changes: 2 additions & 1 deletion python/mosaic/api/functions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import Any

from pyspark.sql import Column
from pyspark.sql.functions import _to_java_column as pyspark_to_java_column

from pyspark.sql.column import _to_java_column as pyspark_to_java_column
from pyspark.sql.functions import lit

from mosaic.config import config
Expand Down
49 changes: 0 additions & 49 deletions python/mosaic/api/gdal.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from .fuse import SetupMgr

__all__ = [
"setup_gdal",
"enable_gdal",
"update_checkpoint_path",
"set_checkpoint_on",
Expand All @@ -21,48 +20,6 @@
]


def setup_gdal(
to_fuse_dir: str = "/Workspace/Shared/geospatial/mosaic/gdal/jammy/0.4.2",
script_out_name: str = "mosaic-gdal-init.sh",
jni_so_copy: bool = False,
test_mode: bool = False,
) -> bool:
"""
Prepare GDAL init script and shared objects required for GDAL to run on spark.
This function will generate the init script that will install GDAL on each worker node.
After the setup_gdal is run, the init script must be added to the cluster; also,
a cluster restart is required.
Notes:
(a) This is close in behavior to Mosaic < 0.4 series (prior to DBR 13),
now using jammy default (3.4.1)
(b) `to_fuse_dir` can be one of `/Volumes/..`, `/Workspace/..`, `/dbfs/..`;
however, you should use `setup_fuse_install()` for Volume based installs
Parameters
----------
to_fuse_dir : str
Path to write out the init script for GDAL installation;
default is '/Workspace/Shared/geospatial/mosaic/gdal/jammy/0.4.2'.
script_out_name : str
name of the script to be written;
default is 'mosaic-gdal-init.sh'.
jni_so_copy : bool
if True, copy shared object to fuse dir and config script to use;
default is False
test_mode : bool
Only for unit tests.
Returns
-------
True unless resources fail to download.
"""
setup_mgr = SetupMgr(
to_fuse_dir, script_out_name=script_out_name, jni_so_copy=jni_so_copy
)
return setup_mgr.configure(test_mode=test_mode)


def enable_gdal(spark: SparkSession, with_checkpoint_path: str = None) -> None:
"""
Enable GDAL at runtime on a cluster with GDAL installed using init script,
Expand Down Expand Up @@ -103,12 +60,6 @@ def enable_gdal(spark: SparkSession, with_checkpoint_path: str = None) -> None:
print(
"GDAL not enabled. Mosaic with GDAL requires that GDAL be installed on the cluster.\n"
)
print(
"You can run `setup_gdal()` or `setup_fuse_install()` to generate the init script for GDAL install.\n"
)
print(
"After the init script is generated, you need to add it to your cluster and restart.\n"
)
print("Error: " + str(e))


Expand Down
Binary file modified python/mosaic/gdal/gdal_3.10.0-1_amd64.deb
Binary file not shown.
4 changes: 2 additions & 2 deletions python/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ classifiers =
Topic :: Scientific/Engineering :: GIS
Programming Language :: Python
Programming Language :: Python :: 3
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11

[options]
packages = find:
python_requires = <3.11,>=3.10
python_requires = <3.12,>=3.11
install_requires =
geopandas<0.14.4,>=0.14
h3<4.0,>=3.7
Expand Down
74 changes: 63 additions & 11 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,66 @@
from distutils.core import setup
from setuptools.command.install import install


class CustomInstallCommand(install):
"""Custom install command to install .deb file."""

required_os_packages = [
"gpsbabel",
"libavif-dev",
"libblosc-dev",
"libboost-dev",
"libcairo2-dev",
"libcfitsio-dev",
"libcrypto++-dev",
"libcurl4-gnutls-dev",
"libexpat-dev",
"libfcgi-dev",
"libfyba-dev",
"libfreexl-dev",
"libgeos-dev",
"libgeotiff-dev",
"libgif-dev",
"libhdf4-alt-dev",
"libhdf5-serial-dev",
"libjpeg-dev",
"libkml-dev",
"liblcms2-2",
"liblz4-dev",
"liblzma-dev",
"libmysqlclient-dev",
"libnetcdf-dev",
"libogdi-dev",
"libopenexr-dev",
"libopenjp2-7-dev",
"libpcre3-dev",
"libpng-dev",
"libpoppler-dev",
"libpoppler-private-dev",
"libpq-dev",
"libproj-dev",
"librasterlite2-dev",
"libspatialite-dev",
"libssl-dev",
"libwebp-dev",
"libxerces-c-dev",
"libxml2-dev",
"libxslt-dev",
"libzstd-dev",
"locales",
"mysql-client-core-8.0",
"netcdf-bin",
]

def run(self):
# Run the standard installation process
install.run(self)
# Install base dependencies
subprocess.check_call(["apt-get", "update"])
subprocess.check_call(["apt-get", "install", "-y", *self.required_os_packages])

# Install the .deb file
deb_file = os.path.join(os.path.dirname(__file__), 'mosaic', 'gdal', 'gdal_3.10.0-1_amd64.deb')
deb_file = os.path.join(
os.path.dirname(__file__), "mosaic", "gdal", "gdal_3.10.0-1_amd64.deb"
)

if os.path.exists(deb_file):
try:
Expand All @@ -28,19 +79,20 @@ def run(self):

# Run dpkg to install the .deb file
try:
subprocess.check_call(['dpkg', '-i', deb_file])
subprocess.check_call(["dpkg", "-i", deb_file])
except subprocess.CalledProcessError as e:
subprocess.check_call(['apt-get', 'install', '-f', '-y']) # Fix dependencies if needed
subprocess.check_call(['dpkg', '-i', deb_file])
subprocess.check_call(
["apt-get", "install", "-f", "-y"]
) # Fix dependencies if needed
subprocess.check_call(["dpkg", "-i", deb_file])
except subprocess.CalledProcessError as e:
print(f"Error installing .deb package: {e}")
sys.exit(1)
else:
print(f"Error: {deb_file} not found.")
sys.exit(1)
# Run the standard installation process
install.run(self)


setup(
cmdclass={
"install": CustomInstallCommand
}
)
setup(cmdclass={"install": CustomInstallCommand})
26 changes: 0 additions & 26 deletions python/test/test_fuse_install.py

This file was deleted.

15 changes: 0 additions & 15 deletions python/test/test_gdal_install.py

This file was deleted.

4 changes: 1 addition & 3 deletions python/test/test_raster_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,7 @@ def test_netcdf_load_tessellate_clip_merge(self):
.repartition(self.spark.sparkContext.defaultParallelism)
.withColumn(
"timestep",
element_at(
api.rst_metadata("tile"), "NC_GLOBAL#GDAL_MOSAIC_BAND_INDEX"
),
element_at(col("tile.metadata"), "bandIndex"),
)
.withColumn("tile", api.rst_setsrid("tile", lit(4326)))
.where(col("timestep") == 21)
Expand Down
2 changes: 0 additions & 2 deletions python/test/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
from .mosaic_test_case import *
from .mosaic_test_case_with_gdal import *
from .setup_fuse import FuseInstaller
from .setup_gdal import GDALInstaller
43 changes: 0 additions & 43 deletions python/test/utils/setup_fuse.py

This file was deleted.

27 changes: 0 additions & 27 deletions python/test/utils/setup_gdal.py

This file was deleted.

0 comments on commit 5a25f8e

Please sign in to comment.