Skip to content
This repository has been archived by the owner on Nov 22, 2024. It is now read-only.

Commit

Permalink
update flink to 1.17.2, spark 3.3.4 (#171)
Browse files Browse the repository at this point in the history
* update to flink 1.17.2 and cloudflow 2.3.2
* update sbt-native-packager to resolve dependency error
* resolve scala-xml conflict with sbt uplift
* upgrade spark to 3.3.4 so sbt scripted succeeds
* support new flink-connector-kafka versioning
  • Loading branch information
fieldera-da authored Jan 20, 2024
1 parent 48c8edd commit 66341bb
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 30 deletions.
16 changes: 8 additions & 8 deletions cloudflow-it/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ prepare-swiss-knife:
prepare-clis:
@echo '****** Prepare the runtimes Clis'
rm -rf spark
wget https://downloads.apache.org/spark/spark-3.2.1/spark-3.2.1-bin-hadoop2.7.tgz
tar -xf spark-3.2.1-bin-hadoop2.7.tgz
mv spark-3.2.1-bin-hadoop2.7 spark
rm spark-3.2.1-bin-hadoop2.7.tgz
wget https://downloads.apache.org/spark/spark-3.3.4/spark-3.3.4-bin-hadoop2.tgz
tar -xf spark-3.3.4-bin-hadoop2.tgz
mv spark-3.3.4-bin-hadoop2 spark
rm spark-3.3.4-bin-hadoop2.tgz
rm -rf flink
wget https://archive.apache.org/dist/flink/flink-1.14.3/flink-1.14.3-bin-scala_2.12.tgz
tar -xf flink-1.14.3-bin-scala_2.12.tgz
mv flink-1.14.3 flink
rm flink-1.14.3-bin-scala_2.12.tgz
wget https://archive.apache.org/dist/flink/flink-1.17.2/flink-1.17.2-bin-scala_2.12.tgz
tar -xf flink-1.17.2-bin-scala_2.12.tgz
mv flink-1.17.2 flink
rm flink-1.17.2-bin-scala_2.12.tgz

.PHONY: prepare-cluster
prepare-cluster:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import cloudflow.sbt.CloudflowBasePlugin._

object CloudflowNativeFlinkPlugin extends AutoPlugin {
val FlinkHome = "/opt/flink"
val FlinkVersion = "1.14.4"
val FlinkVersion = "1.17.2"
val FlinkUsrLib = s"$FlinkHome/usrlib"

val AppJarsDir: String = "app-jars"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ object CloudflowNativeSparkPlugin extends AutoPlugin {
IO.write(sparkEntrypointSh, sparkEntrypointShContent)

val scalaVersion = (ThisProject / scalaBinaryVersion).value
val sparkVersion = "3.2.4"
val sparkVersion = "3.3.4"
val sparkHome = "/opt/spark"

val sparkTgz = s"spark-${sparkVersion}-bin-hadoop2.7.tgz"
val sparkTgz = s"spark-${sparkVersion}-bin-hadoop2.tgz"
val sparkTgzUrl = s"https://downloads.apache.org/spark/spark-${sparkVersion}/${sparkTgz}"

val tiniVersion = "v0.18.0"
Expand All @@ -118,21 +118,21 @@ object CloudflowNativeSparkPlugin extends AutoPlugin {
Seq("wget", sparkTgzUrl),
Seq("tar", "-xvzf", sparkTgz),
Seq("mkdir", "-p", sparkHome),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2.7/jars", s"${sparkHome}/jars"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2.7/bin", s"${sparkHome}/bin"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2.7/sbin", s"${sparkHome}/sbin"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2.7/examples", s"${sparkHome}/examples"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2.7/data", s"${sparkHome}/data"),
Seq("cp", s"spark-${sparkVersion}-bin-hadoop2.7/kubernetes/dockerfiles/spark/entrypoint.sh", "/opt/"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/jars", s"${sparkHome}/jars"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/bin", s"${sparkHome}/bin"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/sbin", s"${sparkHome}/sbin"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/examples", s"${sparkHome}/examples"),
Seq("cp", "-r", s"spark-${sparkVersion}-bin-hadoop2/data", s"${sparkHome}/data"),
Seq("cp", s"spark-${sparkVersion}-bin-hadoop2/kubernetes/dockerfiles/spark/entrypoint.sh", "/opt/"),
Seq("mkdir", "-p", s"${sparkHome}/conf"),
Seq("cp", "/tmp/log4j.properties", s"${sparkHome}/conf/log4j.properties"),
Seq("rm", sparkTgz),
// logback configuration, based on:
// https://stackoverflow.com/a/45479379
// logback is provided by the streamlet
Seq("rm", s"${sparkHome}/jars/slf4j-log4j12-1.7.30.jar"),
Seq("rm", s"${sparkHome}/jars/log4j-1.2.17.jar"),
Seq("rm", "-rf", s"spark-${sparkVersion}-bin-hadoop2.7"),
//Seq("rm", s"${sparkHome}/jars/slf4j-log4j12-1.7.30.jar"),
//Seq("rm", s"${sparkHome}/jars/log4j-1.2.17.jar"),
Seq("rm", "-rf", s"spark-${sparkVersion}-bin-hadoop2"),
Seq("chmod", "a+x", "/opt/spark-entrypoint.sh"),
Seq("ln", "-s", "/lib", "/lib64"),
Seq("apk", "add", "bash", "curl"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ The following scripts are expected to be present on the `PATH`:
- bash
- jq (minimum version required is > 1.5, because of https://github.com/stedolan/jq/issues/1408[this] issue)
- kubectl
- flink cli (version 1.14.3)
- flink cli (version 1.17.2)

In the `flink` sub-folder the first script available is `setup-example-rbac.sh` and this first step needs to be performed once on any cluster you want to deploy Flink streamlets, refer to https://ci.apache.org/projects/flink/flink-docs-master/docs/deployment/resource-providers/native_kubernetes/#rbac[the upstream documentation] for further details. This script will setup service account and cluster role binding. Take a note of the service account name that it prints in stdout - you will need it to run subsequent scripts.

Expand Down
4 changes: 2 additions & 2 deletions docs/docs-source/docs/modules/get-started/pages/index.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ A full example installation command will look as follows:
[source,shell,subs="attributes,+quotes"]
----
helm upgrade -i cloudflow cloudflow-helm-charts/cloudflow \
--version "2.1.0" \
--version "2.3.2" \
--set cloudflow_operator.jvm.opts="*-Dcloudflow.platform.flink-enabled=false -Dcloudflow.platform.spark-enabled=false* -XX:MaxRAMPercentage=90.0 -XX:+UseContainerSupport" \
--set kafkaClusters.default.bootstrapServers=cloudflow-strimzi-kafka-bootstrap.cloudflow:9092 \
--namespace cloudflow
Expand All @@ -26,7 +26,7 @@ helm upgrade -i cloudflow cloudflow-helm-charts/cloudflow \

**In any Cloudflow application using Spark or Flink**, the Kubernetes cluster will need to have a storage class of the `ReadWriteMany` type installed.

NOTE: The NFS Server Provisioner is an excellent and easy to setup storage in the development environment, for production use the suggested and supported Cloud integrations for https://ci.apache.org/projects/flink/flink-docs-master/docs/deployment/filesystems/overview/#pluggable-file-systems[Flink] and for https://spark.apache.org/docs/3.2.0/cloud-integration.html#important-cloud-object-stores-are-not-real-filesystems[Spark]
NOTE: The NFS Server Provisioner is an excellent and easy to setup storage in the development environment, for production use the suggested and supported Cloud integrations for https://ci.apache.org/projects/flink/flink-docs-master/docs/deployment/filesystems/overview/#pluggable-file-systems[Flink] and for https://spark.apache.org/docs/3.3.4/cloud-integration.html#important-cloud-object-stores-are-not-real-filesystems[Spark]

For testing purposes, we suggest using the NFS Server Provisioner, which can be found here: https://github.com/helm/charts/tree/master/stable/nfs-server-provisioner[NFS Server Provisioner Helm chart]

Expand Down
11 changes: 6 additions & 5 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ object Dependencies {
val Scala213 = "2.13.8"

object Versions {
val cloudflowVersion = "2.3.1-RC1"
val cloudflowVersion = "2.3.2"

val flinkVersion = "1.14.4"
val sparkVersion = "3.2.1"
val flinkVersion = "1.17.2"
val flinkKafkaVersion = "3.0.2-1.17"
val sparkVersion = "3.3.4"
val akka = "2.6.19"
val jackson = "2.12.6"
val fabric8 = "5.0.0"
Expand All @@ -29,8 +30,8 @@ object Dependencies {
val flink = "org.apache.flink" %% "flink-scala" % Versions.flinkVersion
val flinkStreaming = "org.apache.flink" %% "flink-streaming-scala" % Versions.flinkVersion
val flinkAvro = "org.apache.flink" % "flink-avro" % Versions.flinkVersion
val flinkKafka = "org.apache.flink" %% "flink-connector-kafka" % Versions.flinkVersion
val flinkWeb = "org.apache.flink" %% "flink-runtime-web" % Versions.flinkVersion
val flinkKafka = "org.apache.flink" % "flink-connector-kafka" % Versions.flinkKafkaVersion
val flinkWeb = "org.apache.flink" % "flink-runtime-web" % Versions.flinkVersion

val spark = "org.apache.spark" %% "spark-core" % Versions.sparkVersion
val sparkMllib = "org.apache.spark" %% "spark-mllib" % Versions.sparkVersion
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.5.8
sbt.version=1.9.8
2 changes: 1 addition & 1 deletion project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.9")
addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.16")
addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.1.1")
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.11.0")
addSbtPlugin("com.github.sbt" % "sbt-unidoc" % "0.5.0")
Expand Down

0 comments on commit 66341bb

Please sign in to comment.