Skip to content

Commit

Permalink
Connect to spark independent of the host name
Browse files Browse the repository at this point in the history
  • Loading branch information
hfxbse committed Nov 19, 2024
1 parent d28b012 commit 3f5505f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
7 changes: 5 additions & 2 deletions airflow.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.4.jar -P /home/ai
RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/plugins /home/airflow/airflow
RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/dags /home/airflow/airflow
RUN mv /tmp/upstream/exercises/winter_semester_2024-2025/05_airflow/python /home/airflow/airflow

RUN sed -i '34 i sed -i "s/SPARK_MASTER_IP=hadoop/SPARK_MASTER_IP=\$HADOOP_HOST/" /home/airflow/spark/conf/spark-env.sh' /startup.sh
RUN sed -i '34 i sed -i "s/SPARK_LOCAL_IP=\\"airflow\\"/SPARK_LOCAL_IP=\$SPARK_HOST/" /home/airflow/spark/conf/spark-env.sh' /startup.sh
# Pyarrow fails to interperet placeholder, subsitute instead
RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/core-site.xml" /startup.sh
RUN sed -i "34 i sed -i 's/hadoop:/$HADOOP_HOST:/g' /home/airflow/hadoop/etc/hadoop/yarn-site.xml" /startup.sh
RUN sed -i '34 i sed -i "s/hadoop:/\$HADOOP_HOST:/g" /home/airflow/hadoop/etc/hadoop/core-site.xml' /startup.sh
RUN sed -i '34 i sed -i "s/hadoop:/\$HADOOP_HOST:/g" /home/airflow/hadoop/etc/hadoop/yarn-site.xml' /startup.sh
# Setting AIRFLOW__WEBSERVER__BASE_URL did not get applied for unknown reasons, update the config file instead
RUN sed -i "34 i sed -i 's#base_url = http://localhost:8080#base_url = http://localhost:8080/airflow#' /home/airflow/airflow/airflow.cfg" /startup.sh

Expand Down
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,6 @@ services:
condition: service_started
hadoop:
condition: service_healthy
environment:
- HADOOP_HOST=hadoop
- SPARK_HOST=airflow # Define the address under which spark is reachable externally

0 comments on commit 3f5505f

Please sign in to comment.