From f50e8e948c283351fb5a35839b710e0fdee360ad Mon Sep 17 00:00:00 2001 From: Nathan Franklin Date: Mon, 28 Oct 2024 20:46:10 +0100 Subject: [PATCH] task/WG-285: update potree converter (#218) * Rework worker Dockerfile and bump PotreeConverter * Fix docker compose commands * Update and improve custom html template * Activate conda when starting bash on running container * Add an additional test * Improve nginx.conf to allow range requests for potree bin files * Fix adding of nsf_logo.png * Fix unit test * Add vim package * Do not need to install laszip * Clean up dockerfile * Update deployed nginx.conf to allow range requests for potree bin files * Simplify entrypoint script * Refactor oder of Dockerfile * Fix nginx.conf so range requests work on Firefox * Remove unused background image in template Also, fixes nsf log snippet application. * Improve comment * Fix PYTHONPATH * Fix .bin settings * Unify how some settings are set * Lower max body size This was high as we used to support direct file upload instead of using TAPIS * Improve error handling to log memory issues --- devops/Dockerfile | 3 +- devops/Dockerfile.worker | 100 +++++++++++++----- devops/geoapi-services/nginx.conf | 23 ++-- devops/local_conf/nginx.conf | 27 +++-- .../potree/page_template/nsf_logo_snippet.txt | 3 + .../potree/page_template/viewer_template.html | 67 ------------ geoapi/models/feature.py | 1 + geoapi/tasks/external_data.py | 30 ++++-- geoapi/tasks/lidar.py | 57 +++++++--- .../external_data_tests/test_external_data.py | 34 +++++- 10 files changed, 206 insertions(+), 139 deletions(-) create mode 100644 devops/misc/potree/page_template/nsf_logo_snippet.txt delete mode 100644 devops/misc/potree/page_template/viewer_template.html diff --git a/devops/Dockerfile b/devops/Dockerfile index 4e512bba..23a67739 100644 --- a/devops/Dockerfile +++ b/devops/Dockerfile @@ -19,5 +19,6 @@ RUN poetry install RUN mkdir /app COPY geoapi /app/geoapi -ENV PYTHONPATH "${PYTHONPATH}:/app" +ENV PYTHONPATH=/app + WORKDIR /app/geoapi diff --git a/devops/Dockerfile.worker b/devops/Dockerfile.worker index 5035dbb8..36465af8 100644 --- a/devops/Dockerfile.worker +++ b/devops/Dockerfile.worker @@ -1,46 +1,92 @@ -FROM pdal/pdal:2.4.2 +FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ -libtiff-dev libgeotiff-dev libgdal-dev \ -libboost-system-dev libboost-thread-dev libboost-filesystem-dev \ -libboost-program-options-dev libboost-regex-dev libboost-iostreams-dev \ -git cmake build-essential python3.9 python3-pip python3-dev ffmpeg \ -unzip git wget libc6-dev gcc-multilib + vim \ + libtiff-dev \ + libgeotiff-dev \ + libgdal-dev \ + libboost-system-dev \ + libboost-thread-dev \ + libboost-filesystem-dev \ + libboost-program-options-dev \ + libboost-regex-dev \ + libboost-iostreams-dev \ + git \ + cmake \ + build-essential \ + python3.9 \ + python3-pip \ + python3-dev \ + ffmpeg \ + unzip \ + wget \ + libc6-dev \ + libtbb-dev\ + libcgal-dev WORKDIR /opt -RUN git clone --depth 1 https://github.com/m-schuetz/LAStools.git && cd LAStools/LASzip && mkdir build && cd build && \ -cmake -DCMAKE_BUILD_TYPE=Release .. && make && make install && ldconfig - -RUN git clone -b develop https://github.com/potree/PotreeConverter.git && cd PotreeConverter && git checkout 685ef56a7864ea2a9781b2ab61580f11f0983d29 && \ +# Install PotreeConverter +# c2328c4 is v2.1.1 and some additional fixes +RUN git clone -b develop https://github.com/potree/PotreeConverter.git && cd PotreeConverter && git checkout c2328c4 && \ mkdir build && cd build && \ -cmake -DCMAKE_BUILD_TYPE=Release -DLASZIP_INCLUDE_DIRS=/opt/LAStools/LASzip/dll/ -DLASZIP_LIBRARY=/usr/local/lib/liblaszip.so .. && \ -make && make install && cp -r /opt/PotreeConverter/PotreeConverter/resources /resources -ADD devops/misc/potree/page_template /resources/page_template +cmake .. -DCMAKE_BUILD_TYPE=Release && \ +make -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 2 +# Setup our page template for PotreeConverter +ADD devops/misc/potree/page_template/nsf_logo.png /opt/PotreeConverter/build/resources/page_template/ +ADD devops/misc/potree/page_template/nsf_logo_snippet.txt /tmp/ +# - add nsf logo +RUN sed -i '//r /tmp/nsf_logo_snippet.txt' /opt/PotreeConverter/build/resources/page_template/viewer_template.html +# - remove reference to background image +RUN sed -i 's/style="[^"]*background-image:[^"]*"//' /opt/PotreeConverter/build/resources/page_template/viewer_template.html -RUN pip3 install --upgrade pip -ENV POETRY_VERSION=1.8.3 -ENV POETRY_HOME=/opt/poetry -ENV PATH="$POETRY_HOME/bin:$PATH" -RUN curl -sSL https://install.python-poetry.org | python3 - -RUN poetry config virtualenvs.create false -COPY devops/pyproject.toml devops/poetry.lock ./ +# Install Miniforge for our Python environment (provides easier PDAL installation) +RUN wget -q -O miniforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-$(uname -m).sh && \ + sh miniforge.sh -b -p /opt/conda && \ + rm miniforge.sh +ENV PATH="/opt/conda/bin:${PATH}" -WORKDIR /opt +# Create a conda environment with Python 3.9 and activate it +RUN conda create -n py39env python=3.9 -y +SHELL ["conda", "run", "-n", "py39env", "/bin/bash", "-c"] -# install geos into condo the base pdal image is using -RUN conda install setuptools geos -y -n base +# Install PDAL using conda +RUN conda install -c conda-forge pdal -y +# Install needed python packages using poetry +RUN pip install poetry==1.8.3 +RUN poetry config virtualenvs.create false +COPY devops/pyproject.toml devops/poetry.lock ./ RUN poetry install -ENV PYTHONPATH "${PYTHONPATH}:/app" - -WORKDIR / +# Populate image with geoapi and set PYTHONPATH RUN mkdir app COPY geoapi /app/geoapi WORKDIR /app/geoapi +ENV PYTHONPATH=/app + +# Create an entrypoint script that activates our conda environment +RUN echo '#!/bin/bash' > /usr/local/bin/entrypoint.sh && \ + echo 'set -e' >> /usr/local/bin/entrypoint.sh && \ + echo '' >> /usr/local/bin/entrypoint.sh && \ + echo '# Activate conda and the specific environment' >> /usr/local/bin/entrypoint.sh && \ + echo '. /opt/conda/etc/profile.d/conda.sh' >> /usr/local/bin/entrypoint.sh && \ + echo 'conda activate py39env' >> /usr/local/bin/entrypoint.sh && \ + echo '' >> /usr/local/bin/entrypoint.sh && \ + echo '# Execute the passed command' >> /usr/local/bin/entrypoint.sh && \ + echo 'exec "$@"' >> /usr/local/bin/entrypoint.sh && \ + chmod +x /usr/local/bin/entrypoint.sh + +# Set the entrypoint +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] + +# activate conda (to handle when user starts a bash via docker exec) +RUN echo '. /opt/conda/etc/profile.d/conda.sh' >> /root/.bashrc && \ + echo 'conda activate py39env' >> /root/.bashrc + +# Set a default command (can be overridden by docker-compose) +CMD ["bash"] diff --git a/devops/geoapi-services/nginx.conf b/devops/geoapi-services/nginx.conf index 77823823..71454a39 100644 --- a/devops/geoapi-services/nginx.conf +++ b/devops/geoapi-services/nginx.conf @@ -61,21 +61,28 @@ http { } location /assets { - max_ranges 0; + alias /assets/; expires 30d; - add_header "Access-Control-Allow-Origin" *; + add_header "Access-Control-Allow-Origin" * always; + add_header "Access-Control-Allow-Headers" * always; + + # Allow range requests for .bin files for potree point clouds + # Also, disable gzip for .bin files as it causes some browsers + # to send entire compressed file + location ~ \.bin$ { + add_header Accept-Ranges bytes; + gzip off; + } # Preflighted requests if ($request_method = OPTIONS ) { - add_header "Access-Control-Allow-Origin" *; - add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD, PUT, DELETE"; - add_header "Access-Control-Allow-Headers" "*"; - add_header 'Access-Control-Max-Age' 86400; - add_header 'Content-Length' 0; + add_header "Access-Control-Allow-Origin" * always; + add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD, PUT, DELETE" always; + add_header "Access-Control-Max-Age" 86400 always; + add_header "Content-Length" "0" always; return 204; } - alias /assets/; } } } diff --git a/devops/local_conf/nginx.conf b/devops/local_conf/nginx.conf index 0507dda3..4cc248fa 100644 --- a/devops/local_conf/nginx.conf +++ b/devops/local_conf/nginx.conf @@ -12,7 +12,7 @@ http { server { include /etc/nginx/mime.types; - client_max_body_size 10g; + client_max_body_size 1g; location / { add_header 'Access-Control-Allow-Origin' '*' always; @@ -39,20 +39,27 @@ http { } location /assets { - max_ranges 0; + alias /assets/; expires 30d; - add_header 'Access-Control-Allow-Origin' '*'; + add_header "Access-Control-Allow-Origin" * always; + add_header "Access-Control-Allow-Headers" * always; + + # Allow range requests for .bin files for potree point clouds + # Also, disable gzip for .bin files as it causes some browsers + # to send entire compressed file + location ~ \.bin$ { + add_header Accept-Ranges bytes; + gzip off; + } # Preflighted requests - if ($request_method = OPTIONS ) { - add_header 'Access-Control-Allow-Origin' '*'; - add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS, HEAD, PUT, DELETE'; - add_header 'Access-Control-Allow-Headers' '*'; - add_header 'Access-Control-Max-Age' 86400; - add_header 'Content-Length' 0; + if ($request_method = OPTIONS) { + add_header "Access-Control-Allow-Origin" "*" always; + add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD, PUT, DELETE" always; + add_header "Access-Control-Max-Age" "86400" always; + add_header "Content-Length" "0" always; return 204; } - alias /assets/; } } } diff --git a/devops/misc/potree/page_template/nsf_logo_snippet.txt b/devops/misc/potree/page_template/nsf_logo_snippet.txt new file mode 100644 index 00000000..f9a8b775 --- /dev/null +++ b/devops/misc/potree/page_template/nsf_logo_snippet.txt @@ -0,0 +1,3 @@ + diff --git a/devops/misc/potree/page_template/viewer_template.html b/devops/misc/potree/page_template/viewer_template.html deleted file mode 100644 index c3953660..00000000 --- a/devops/misc/potree/page_template/viewer_template.html +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - Potree Viewer - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-
- - - - - diff --git a/geoapi/models/feature.py b/geoapi/models/feature.py index 30550dc1..69333b77 100644 --- a/geoapi/models/feature.py +++ b/geoapi/models/feature.py @@ -57,6 +57,7 @@ class FeatureAsset(Base): id = Column(Integer, primary_key=True) feature_id = Column(ForeignKey('features.id', ondelete="CASCADE", onupdate="CASCADE"), index=True) uuid = Column(UUID(as_uuid=True), default=uuid.uuid4, nullable=False) + # system or project id or both path = Column(String(), nullable=False) original_name = Column(String(), nullable=True) original_path = Column(String(), nullable=True, index=True) diff --git a/geoapi/tasks/external_data.py b/geoapi/tasks/external_data.py index debcbf27..1a044b52 100644 --- a/geoapi/tasks/external_data.py +++ b/geoapi/tasks/external_data.py @@ -20,7 +20,7 @@ from geoapi.services.imports import ImportsService from geoapi.services.vectors import SHAPEFILE_FILE_ADDITIONAL_FILES import geoapi.services.point_cloud as pointcloud -from geoapi.tasks.lidar import convert_to_potree, check_point_cloud, get_point_cloud_info +from geoapi.tasks.lidar import convert_to_potree, check_point_cloud, get_point_cloud_info, PointCloudConversionException from geoapi.db import create_task_session from geoapi.services.notifications import NotificationsService from geoapi.services.users import UserService @@ -154,6 +154,19 @@ def _update_point_cloud_task(database_session, pointCloudId: int, description: s database_session.commit() +def _handle_point_cloud_conversion_error(pointCloudId, userId, files, error_description): + with create_task_session() as session: + user = session.query(User).get(userId) + logger.exception( + f"point cloud:{pointCloudId} conversion failed for user:{user.username} and files:{files}. " + f"error: {error_description}") + _update_point_cloud_task(session, pointCloudId, description=error_description, status="FAILED") + NotificationsService.create(session, + user, + "error", + f"Processing failed for point cloud ({pointCloudId})!") + + @app.task(rate_limit="1/s") def import_point_clouds_from_agave(userId: int, files, pointCloudId: int): with create_task_session() as session: @@ -220,13 +233,12 @@ def import_point_clouds_from_agave(userId: int, files, pointCloudId: int): NotificationsService.create(session, user, "error", failed_message) return - _update_point_cloud_task(session, pointCloudId, description="Running potree converter", status="RUNNING") - point_cloud.files_info = get_point_cloud_info(session, pointCloudId) session.add(point_cloud) session.commit() + _update_point_cloud_task(session, pointCloudId, description="Running potree converter", status="RUNNING") NotificationsService.create(session, user, "success", @@ -243,12 +255,12 @@ def import_point_clouds_from_agave(userId: int, files, pointCloudId: int): user, "success", "Completed potree converter (for point cloud {}).".format(pointCloudId)) - except: # noqa: E722 - with create_task_session() as session: - user = session.query(User).get(userId) - logger.exception(f"point cloud:{pointCloudId} conversion failed for user:{user.username} and files:{files}") - _update_point_cloud_task(session, pointCloudId, description="", status="FAILED") - NotificationsService.create(session, user, "error", "Processing failed for point cloud ({})!".format(pointCloudId)) + except PointCloudConversionException as e: + error_description = e.message + _handle_point_cloud_conversion_error(pointCloudId, userId, files, error_description) + except Exception: + error_description = "Unknown error occurred" + _handle_point_cloud_conversion_error(pointCloudId, userId, files, error_description) @app.task(rate_limit="5/s") diff --git a/geoapi/tasks/lidar.py b/geoapi/tasks/lidar.py index da3dc545..10d5fd03 100644 --- a/geoapi/tasks/lidar.py +++ b/geoapi/tasks/lidar.py @@ -67,6 +67,34 @@ def on_failure(self, exc, task_id, args, kwargs, einfo): session.commit() +class PointCloudConversionException(Exception): + def __init__(self, message="Unknown error"): + self.message = message + super().__init__(self.message) + + +def run_potree_converter(pointCloudId, + path_to_original_point_clouds, + path_temp_processed_point_cloud_path, + conversion_parameters=None): + """ Run potree converter as external process """ + command = [ + "/opt/PotreeConverter/build/PotreeConverter", + "--verbose", + "-i", + path_to_original_point_clouds, + "-o", + path_temp_processed_point_cloud_path, + "--overwrite", + "--generate-page", + "index" + ] + if conversion_parameters: + command.extend(conversion_parameters.split()) + logger.info("Processing point cloud (#{}). command:{}".format(pointCloudId, " ".join(command))) + subprocess.run(command, check=True, capture_output=True, text=True) + + @app.task(bind=True, base=PointCloudProcessingTask) def convert_to_potree(self, pointCloudId: int) -> None: """ @@ -74,8 +102,12 @@ def convert_to_potree(self, pointCloudId: int) -> None: Note: this operation is memory-intensive and time-consuming. Large LAS files (>8 Gb) can use >50gb of memory. + if process killed (e.g. due to memory constraints), PointCloudTaskException is raised + :param pointCloudId: int + :param userId: int :return: None + :raises PointCloudTaskException: if conversion fails """ from geoapi.models import Feature, FeatureAsset from geoapi.services.point_cloud import PointCloudService @@ -92,22 +124,15 @@ def convert_to_potree(self, pointCloudId: int) -> None: outline = get_bounding_box_2d(input_files) - command = [ - "PotreeConverter", - "--verbose", - "-i", - path_to_original_point_clouds, - "-o", - path_temp_processed_point_cloud_path, - "--overwrite", - "--generate-page", - "index" - ] - if conversion_parameters: - command.extend(conversion_parameters.split()) - logger.info("Processing point cloud (#{}): {}".format(pointCloudId, " ".join(command))) - - subprocess.run(command, check=True, capture_output=True, text=True) + try: + run_potree_converter(pointCloudId, path_to_original_point_clouds, path_temp_processed_point_cloud_path, conversion_parameters) + except subprocess.CalledProcessError as e: + error_description = "Point cloud conversion failed" + if e.returncode == -9: # SIGKILL; most likely ran out of memory + error_description += "; process killed due to insufficient memory" + logger.exception(f"Processing point cloud failed (point_cloud:{pointCloudId} " + f"path_to_original_point_clouds:{path_to_original_point_clouds} ).") + raise PointCloudConversionException(error_description) with create_task_session() as session: point_cloud = PointCloudService.get(session, pointCloudId) diff --git a/geoapi/tests/external_data_tests/test_external_data.py b/geoapi/tests/external_data_tests/test_external_data.py index aed315cf..a7f59b90 100644 --- a/geoapi/tests/external_data_tests/test_external_data.py +++ b/geoapi/tests/external_data_tests/test_external_data.py @@ -2,6 +2,7 @@ import pytest import os import re +import subprocess from geoapi.models import Feature, ImportedFile from geoapi.db import db_session, create_task_session @@ -260,6 +261,10 @@ def test_import_point_clouds_from_agave(MockAgaveUtils, assert len(os.listdir( get_asset_path(point_cloud.feature.assets[0].path))) == 5 # index.html, preview.html, pointclouds, libs, logo assert len(os.listdir(get_asset_path(point_cloud_fixture.path, PointCloudService.ORIGINAL_FILES_DIR))) == 1 + assert os.path.isfile(os.path.join(get_asset_path(point_cloud.feature.assets[0].path), "index.html")) + with open(os.path.join(get_asset_path(point_cloud.feature.assets[0].path), "index.html"), 'r+') as f: + index = f.read() + assert "nsf_logo" in index assert os.path.isfile(os.path.join(get_asset_path(point_cloud.feature.assets[0].path), "preview.html")) with open(os.path.join(get_asset_path(point_cloud.feature.assets[0].path), "preview.html"), 'r+') as f: preview = f.read() @@ -329,7 +334,34 @@ def test_import_point_clouds_from_agave_conversion_error(MockAgaveUtils, db_session.refresh(point_cloud_fixture) point_cloud = point_cloud_fixture assert point_cloud.task.status == "FAILED" - assert point_cloud.task.description == "" + assert point_cloud.task.description == "Unknown error occurred" + assert len(os.listdir(get_asset_path(point_cloud_fixture.path, PointCloudService.ORIGINAL_FILES_DIR))) == 1 + + +@pytest.mark.worker +@patch("geoapi.tasks.external_data.AgaveUtils") +@patch("geoapi.tasks.lidar.run_potree_converter") +def test_import_point_clouds_from_agave_conversion_error_due_to_memory_sigterm(mock_run_potree_converter, + MockAgaveUtils, + user1, + projects_fixture, + point_cloud_fixture, + lidar_las1pt2_file_fixture): + MockAgaveUtils().getFile.return_value = lidar_las1pt2_file_fixture + + # Mock subprocess.run to raise CalledProcessError with returncode -9 (SIGKILL) + mock_run_potree_converter.side_effect = subprocess.CalledProcessError( + returncode=-9, + cmd=["/opt/PotreeConverter/build/PotreeConverter", "--verbose", "-i", "/path/to/input", "-o", "/path/to/output"] + ) + + files = [{"system": "designsafe.storage.default", "path": "file1.las"}] + import_point_clouds_from_agave(user1.id, files, point_cloud_fixture.id) + + db_session.refresh(point_cloud_fixture) + point_cloud = point_cloud_fixture + assert point_cloud.task.status == "FAILED" + assert point_cloud.task.description == "Point cloud conversion failed; process killed due to insufficient memory" assert len(os.listdir(get_asset_path(point_cloud_fixture.path, PointCloudService.ORIGINAL_FILES_DIR))) == 1