From fec8cbb35e18a730d9d99c0ef4fe22c6426d9bca Mon Sep 17 00:00:00 2001 From: Benjamin Kiah Stroud <32469930+bkiahstroud@users.noreply.github.com> Date: Mon, 1 Apr 2024 18:09:24 -0700 Subject: [PATCH] WIP: update Docker image to use different base lambci does not have a ruby3.2 image because Amazon started making their own --- layers/process_documents/Dockerfile | 295 +++++++++++++++------------- 1 file changed, 154 insertions(+), 141 deletions(-) diff --git a/layers/process_documents/Dockerfile b/layers/process_documents/Dockerfile index 0ef6c8a..00f010f 100644 --- a/layers/process_documents/Dockerfile +++ b/layers/process_documents/Dockerfile @@ -7,8 +7,8 @@ # Core ############################################################################### -# Use AWS Lambda ruby2.7 build environment -FROM lambci/lambda:build-ruby2.7 +# Use AWS Lambda ruby3.2 build environment +FROM amazon/aws-lambda-ruby:3.2 # Update all existing packages RUN yum update -y @@ -25,13 +25,26 @@ ENV CXXFLAGS $CFLAGS WORKDIR /root # Leptonica image-reading dependencies -RUN yum install -y libjpeg-devel libpng-devel libtiff-devel - -RUN curl -O http://www.leptonica.org/source/leptonica-1.79.0.tar.gz +RUN yum install -y \ + libjpeg-devel \ + libpng-devel \ + libtiff-devel \ + tar \ + gzip \ + gcc-c++ \ + make \ + libtool + +# Clean up YUM caches to keep the image size down +RUN yum clean all + +RUN curl -LO https://github.com/DanBloomberg/leptonica/releases/download/1.79.0/leptonica-1.79.0.tar.gz RUN tar zxvf leptonica-1.79.0.tar.gz -WORKDIR leptonica-1.79.0 -RUN ./configure --prefix=/opt +WORKDIR /root/leptonica-1.79.0 +# FIXME: figure out how to run without --disable-dependency-tracking flag +# RUN ./configure --prefix=/opt +RUN ./configure --prefix=/opt --disable-dependency-tracking RUN make install ############################################################################### @@ -49,7 +62,7 @@ RUN curl -Lo tesseract-4.1.1.tar.gz \ https://github.com/tesseract-ocr/tesseract/archive/4.1.1.tar.gz RUN tar zxvf tesseract-4.1.1.tar.gz -WORKDIR tesseract-4.1.1 +WORKDIR /root/tesseract-4.1.1 RUN ./autogen.sh --prefix=/opt # These ENV vars have to be set or it will not build ENV LEPTONICA_CFLAGS -I/opt/include/leptonica @@ -57,138 +70,138 @@ ENV LEPTONICA_LIBS -L/opt/lib -lleptonica RUN ./configure --prefix=/opt RUN make install -# English training data -WORKDIR /opt/share/tessdata -RUN curl -LO https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata - -############################################################################### -# GhostScript -############################################################################### - -WORKDIR /root - -RUN curl -LO \ - https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs952/ghostscript-9.52.tar.gz -RUN tar zxvf ghostscript-9.52.tar.gz - -WORKDIR ghostscript-9.52 -RUN ./configure --prefix=/opt -RUN make install - -############################################################################### -# libvips -############################################################################### - +# # English training data +# WORKDIR /opt/share/tessdata +# RUN curl -LO https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata +# +# ############################################################################### +# # GhostScript +# ############################################################################### +# # WORKDIR /root - -# RUN yum install -y gtk-doc gobject-introspection-devel expat-devel - -# RUN curl -o libvips-8.9.1.tar.gz \ -# https://codeload.github.com/libvips/libvips/tar.gz/v8.9.1 -# RUN tar zxvf libvips-8.9.1.tar.gz - -# WORKDIR libvips-8.9.1 -# RUN ./autogen.sh --prefix=/opt/libvips -# RUN ./configure --prefix=/opt/libvips +# +# RUN curl -LO \ +# https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs952/ghostscript-9.52.tar.gz +# RUN tar zxvf ghostscript-9.52.tar.gz +# +# WORKDIR ghostscript-9.52 +# RUN ./configure --prefix=/opt # RUN make install - -# WORKDIR /opt/libvips -# RUN zip -r /root/zips/libvips.zip * - -############################################################################### -# ImageMagick -############################################################################### - -WORKDIR /root - -RUN yum install -y ghostscript-devel openjpeg2 opentjpeg2-devel openjpeg2-tools - -RUN curl -Lo ImageMagick-7.0.10-6.tar.gz \ - https://github.com/ImageMagick/ImageMagick/archive/7.0.10-6.tar.gz -RUN tar zxvf ImageMagick-7.0.10-6.tar.gz - -WORKDIR ImageMagick-7.0.10-6 -RUN ./configure --prefix=/opt -RUN make install - -############################################################################### -# Gems -############################################################################### - -WORKDIR /root - -# Phashion dependencies -# Can skip this step because they are already installed above for Leptonica -#RUN yum install -y libjpeg-devel libpng-devel - -# Copy Gemfile from host into container's current directory -COPY Gemfile . - -RUN bundle config set path vendor/bundle -RUN bundle - -# Modify directory structure for Lambda load path -WORKDIR vendor/bundle -RUN mkdir ruby/gems -RUN mv ruby/2.* ruby/gems -RUN mv ruby /opt -WORKDIR /root - -############################################################################### -# RPM dependencies -############################################################################### - -WORKDIR /root - -# Install yumdownloader and rpmdev-extract -RUN yum install -y yum-utils rpmdevtools - -RUN mkdir rpms -WORKDIR rpms - -# Download dependency RPMs -RUN yumdownloader libjpeg-turbo.x86_64 libpng.x86_64 libtiff.x86_64 \ - libgomp.x86_64 libwebp.x86_64 jbigkit-libs.x86_64 openjpeg2.x86_64 -# GhostScript and dependencies -# To reduce dependencies, we are compiling GhostScript from source instead -# RUN yumdownloader ghostscript.x86_64 cups-libs.x86_64 fontconfig.x86_64 \ -# fontpackages-filesystem freetype.x86_64 ghostscript-fonts jasper-libs.x86_64 \ -# lcms2.x86_64 libICE.x86_64 libSM.x86_64 libX11.x86_64 libX11-common \ -# libXau.x86_64 libXext.x86_64 libXt.x86_64 libfontenc.x86_64 libxcb.x86_64 \ -# poppler-data stix-fonts urw-fonts xorg-x11-font-utils.x86_64 avahi-libs.x86_64 \ -# acl.x86_64 audit-libs.x86_64 cracklib.x86_64 cracklib-dicts.x86_64 cryptsetup-libs.x86_64 \ -# dbus.x86_64 dbus-libs.x86_64 device-mapper.x86_64 device-mapper-libs.x86_64 \ -# elfutils-default-yama-scope elfutils-libs.x86_64 gzip.x86_64 kmod.x86_64 kmod-libs.x86_64 \ -# libcap-ng.x86_64 libfdisk.x86_64 libpwquality.x86_64 libsemanage.x86_64 \ -# libsmartcols.x86_64 libutempter.x86_64 lz4.x86_64 pam.x86_64 qrencode-libs.x86_64 \ -# shadow-utils.x86_64 systemd.x86_64 systemd-libs.x86_64 ustr.x86_64 util-linux.x86_64 \ -# expat.x86_64 xz-libs.x86_64 libgcrypt.x86_64 libgpg-error.x86_64 elfutils-libelf.x86_64 \ -# bzip2-libs.x86_64 - -# Extract RPMs -RUN rpmdev-extract *.rpm -RUN rm *.rpm - -# Copy all package files into /opt/rpms -RUN cp -vR */usr/* /opt - -# The x86_64 packages extract as lib64, we need to move these files to lib -RUN yum install -y rsync -RUN rsync -av /opt/lib64/ /opt/lib/ -RUN rm -r /opt/lib64 - -WORKDIR /opt/share/tessdata/configs -RUN mkdir -p /opt/share/tessdata/configs && curl -LO https://raw.githubusercontent.com/tesseract-ocr/tessconfigs/3decf1c8252ba6dbeef0bf908f4b0aab7f18d113/configs/hocr - -############################################################################### -# Zip all dependencies -############################################################################### - -WORKDIR /opt -RUN zip -r /root/ProcessDocumentLayer.zip * - -############################################################################### -# Entrypoint: Copy zip file to host -############################################################################### - -ENTRYPOINT ["/bin/cp", "/root/ProcessDocumentLayer.zip", "/output"] \ No newline at end of file +# +# ############################################################################### +# # libvips +# ############################################################################### +# +# # WORKDIR /root +# +# # RUN yum install -y gtk-doc gobject-introspection-devel expat-devel +# +# # RUN curl -o libvips-8.9.1.tar.gz \ +# # https://codeload.github.com/libvips/libvips/tar.gz/v8.9.1 +# # RUN tar zxvf libvips-8.9.1.tar.gz +# +# # WORKDIR libvips-8.9.1 +# # RUN ./autogen.sh --prefix=/opt/libvips +# # RUN ./configure --prefix=/opt/libvips +# # RUN make install +# +# # WORKDIR /opt/libvips +# # RUN zip -r /root/zips/libvips.zip * +# +# ############################################################################### +# # ImageMagick +# ############################################################################### +# +# WORKDIR /root +# +# RUN yum install -y ghostscript-devel openjpeg2 opentjpeg2-devel openjpeg2-tools +# +# RUN curl -Lo ImageMagick-7.0.10-6.tar.gz \ +# https://github.com/ImageMagick/ImageMagick/archive/7.0.10-6.tar.gz +# RUN tar zxvf ImageMagick-7.0.10-6.tar.gz +# +# WORKDIR ImageMagick-7.0.10-6 +# RUN ./configure --prefix=/opt +# RUN make install +# +# ############################################################################### +# # Gems +# ############################################################################### +# +# WORKDIR /root +# +# # Phashion dependencies +# # Can skip this step because they are already installed above for Leptonica +# #RUN yum install -y libjpeg-devel libpng-devel +# +# # Copy Gemfile from host into container's current directory +# COPY Gemfile . +# +# RUN bundle config set path vendor/bundle +# RUN bundle +# +# # Modify directory structure for Lambda load path +# WORKDIR vendor/bundle +# RUN mkdir ruby/gems +# RUN mv ruby/3.* ruby/gems +# RUN mv ruby /opt +# WORKDIR /root +# +# ############################################################################### +# # RPM dependencies +# ############################################################################### +# +# WORKDIR /root +# +# # Install yumdownloader and rpmdev-extract +# RUN yum install -y yum-utils rpmdevtools +# +# RUN mkdir rpms +# WORKDIR rpms +# +# # Download dependency RPMs +# RUN yumdownloader libjpeg-turbo.x86_64 libpng.x86_64 libtiff.x86_64 \ +# libgomp.x86_64 libwebp.x86_64 jbigkit-libs.x86_64 openjpeg2.x86_64 +# # GhostScript and dependencies +# # To reduce dependencies, we are compiling GhostScript from source instead +# # RUN yumdownloader ghostscript.x86_64 cups-libs.x86_64 fontconfig.x86_64 \ +# # fontpackages-filesystem freetype.x86_64 ghostscript-fonts jasper-libs.x86_64 \ +# # lcms2.x86_64 libICE.x86_64 libSM.x86_64 libX11.x86_64 libX11-common \ +# # libXau.x86_64 libXext.x86_64 libXt.x86_64 libfontenc.x86_64 libxcb.x86_64 \ +# # poppler-data stix-fonts urw-fonts xorg-x11-font-utils.x86_64 avahi-libs.x86_64 \ +# # acl.x86_64 audit-libs.x86_64 cracklib.x86_64 cracklib-dicts.x86_64 cryptsetup-libs.x86_64 \ +# # dbus.x86_64 dbus-libs.x86_64 device-mapper.x86_64 device-mapper-libs.x86_64 \ +# # elfutils-default-yama-scope elfutils-libs.x86_64 gzip.x86_64 kmod.x86_64 kmod-libs.x86_64 \ +# # libcap-ng.x86_64 libfdisk.x86_64 libpwquality.x86_64 libsemanage.x86_64 \ +# # libsmartcols.x86_64 libutempter.x86_64 lz4.x86_64 pam.x86_64 qrencode-libs.x86_64 \ +# # shadow-utils.x86_64 systemd.x86_64 systemd-libs.x86_64 ustr.x86_64 util-linux.x86_64 \ +# # expat.x86_64 xz-libs.x86_64 libgcrypt.x86_64 libgpg-error.x86_64 elfutils-libelf.x86_64 \ +# # bzip2-libs.x86_64 +# +# # Extract RPMs +# RUN rpmdev-extract *.rpm +# RUN rm *.rpm +# +# # Copy all package files into /opt/rpms +# RUN cp -vR */usr/* /opt +# +# # The x86_64 packages extract as lib64, we need to move these files to lib +# RUN yum install -y rsync +# RUN rsync -av /opt/lib64/ /opt/lib/ +# RUN rm -r /opt/lib64 +# +# WORKDIR /opt/share/tessdata/configs +# RUN mkdir -p /opt/share/tessdata/configs && curl -LO https://raw.githubusercontent.com/tesseract-ocr/tessconfigs/3decf1c8252ba6dbeef0bf908f4b0aab7f18d113/configs/hocr +# +# ############################################################################### +# # Zip all dependencies +# ############################################################################### +# +# WORKDIR /opt +# RUN zip -r /root/ProcessDocumentLayer.zip * +# +# ############################################################################### +# # Entrypoint: Copy zip file to host +# ############################################################################### +# +# ENTRYPOINT ["/bin/cp", "/root/ProcessDocumentLayer.zip", "/output"]