Skip to content

Commit

Permalink
Connect to spark independent of the host name
Browse files Browse the repository at this point in the history
  • Loading branch information
hfxbse committed Nov 19, 2024
1 parent 1105e5e commit 7087c5e
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions airflow.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.4.jar -P /home/ai
RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/plugins /home/airflow/airflow
RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/dags /home/airflow/airflow
RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/python /home/airflow/airflow

RUN sed -i 's/SPARK_MASTER_IP=hadoop/SPARK_MASTER_IP=$HADOOP_HOST/' /home/airflow/spark/conf/spark-env.sh
RUN sed -i 's/SPARK_LOCAL_IP="airflow"/SPARK_LOCAL_IP="localhost"/' /home/airflow/spark/conf/spark-env.sh
# Pyarrow fails to interperet placeholder, subsitute instead
RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/core-site.xml" /startup.sh
RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/yarn-site.xml" /startup.sh
Expand Down

0 comments on commit 7087c5e

Please sign in to comment.