Skip to content

Commit

Permalink
upgrading to Apache Spark 3.5.1
Browse files Browse the repository at this point in the history
  • Loading branch information
sdaberdaku committed Jun 8, 2024
1 parent 879ef64 commit fdadad3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 10 deletions.
12 changes: 5 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# I am using the image defined in https://github.com/sebastiandaberdaku/spark-with-glue-builder/releases/tag/spark-v3.5.0
# I am using the image defined in https://github.com/sebastiandaberdaku/spark-with-glue-builder/releases/tag/spark-v3.5.1
FROM sdaberdaku/spark-with-glue-builder:v3.5.1 AS builder

# Starting with a clean image
Expand All @@ -17,8 +17,9 @@ RUN apt-get update; \
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
ENV SPARK_HOME=/opt/spark
ENV HADOOP_HOME=/opt/hadoop

ENV PATH=${PATH}:/home/spark/.local/bin:${JAVA_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${HADOOP_HOME}/bin
ENV HADOOP_COMMON_LIB_NATIVE_DIR="${HADOOP_HOME}/lib/native"
ENV HADOOP_OPTS="${HADOOP_OPTS} -Djava.library.path=${HADOOP_HOME}/lib/native"
ENV PATH="${PATH}:/home/spark/.local/bin:${JAVA_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${HADOOP_HOME}/bin"

COPY --from=builder /opt/spark/dist/ ${SPARK_HOME}/
COPY --from=builder /opt/hadoop/ ${HADOOP_HOME}/
Expand All @@ -35,10 +36,7 @@ RUN cp ${SPARK_HOME}/kubernetes/dockerfiles/spark/entrypoint.sh /opt/entrypoint.
USER spark
WORKDIR /home/spark

# first install pyspark from local dist folder
RUN pip install --no-cache-dir --trusted-host pypi.python.org --editable ${SPARK_HOME}/python
# then, install the other dependencies
COPY ./requirements.txt .
RUN pip install --no-cache-dir --trusted-host pypi.python.org -r requirements.txt
RUN pip install --no-cache-dir --trusted-host pypi.python.org --editable ${SPARK_HOME}/python -r requirements.txt

ENTRYPOINT ["/opt/entrypoint.sh"]
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pandas>=1.0.5
pyarrow>=4.0.0
numpy>=1.15
grpcio>=1.48,<1.57
grpcio-status>=1.48,<1.57
googleapis-common-protos==1.56.4
grpcio>=1.56.0
grpcio-status>=1.56.0
googleapis-common-protos>=1.56.4
delta-spark==3.2.0

0 comments on commit fdadad3

Please sign in to comment.