-
Notifications
You must be signed in to change notification settings - Fork 52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PR1] refactoring :apps build and adding spark3.5/iceberg1.5 artifact #255
base: main
Are you sure you want to change the base?
Changes from 2 commits
3ce4f22
7cee1ed
415deb8
339cffd
08146aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
plugins { | ||
// dependency in apps-spark-conventions | ||
id 'com.github.johnrengelman.shadow' version '7.1.2' | ||
id 'openhouse.apps-spark-conventions' | ||
} | ||
|
||
ext { | ||
log4jVersion = "2.20.0" | ||
|
||
sparkVersion = '3.5.2' | ||
icebergVersion = '1.5.2' | ||
sparkVersionSuffix = "3.5" | ||
openhouseSparkRuntimeModule = ":integrations:spark:spark-${sparkVersionSuffix}:openhouse-spark-3.5-runtime_2.12" | ||
icebergSparkRuntimeModule = "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:${icebergVersion}" | ||
tablesTestFixturesModule = ":tables-test-fixtures:tables-test-fixtures-iceberg-1.5_2.12" | ||
} | ||
|
||
dependencies { | ||
compileOnly (project(path: openhouseSparkRuntimeModule)) { | ||
exclude group: 'io.netty' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-common' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-client' | ||
} | ||
|
||
implementation "org.apache.logging.log4j:log4j-slf4j-impl:${log4jVersion}" | ||
implementation(project(':libs:datalayout')) { | ||
exclude group: 'org.apache.iceberg', module: 'iceberg-spark-runtime-3.1_2.12' | ||
} | ||
implementation("org.apache.iceberg:iceberg-bundled-guava:${icebergVersion}") | ||
implementation("org.apache.iceberg:iceberg-data:${icebergVersion}") | ||
implementation("org.apache.iceberg:iceberg-core:${icebergVersion}") | ||
implementation("org.apache.iceberg:iceberg-common:${icebergVersion}") | ||
implementation ('org.apache.spark:spark-core_2.12:' + sparkVersion) { | ||
exclude group: 'io.netty' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-common' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-client' | ||
} | ||
implementation ('org.apache.spark:spark-sql_2.12:' + sparkVersion) { | ||
exclude group: 'io.netty' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-common' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-client' | ||
} | ||
implementation (icebergSparkRuntimeModule) { | ||
exclude group: 'io.netty' | ||
} | ||
|
||
testImplementation (project(path: openhouseSparkRuntimeModule, configuration: 'shadow')) { | ||
exclude group: 'io.netty' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-common' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-client' | ||
} | ||
testImplementation(project(tablesTestFixturesModule)) { | ||
exclude group: "io.netty" | ||
} | ||
} | ||
|
||
sourceSets { | ||
main { | ||
java { | ||
srcDirs = ['src/main/java', project(':apps:openhouse-spark-apps_2.12').sourceSets.main.java.srcDirs] | ||
} | ||
} | ||
test { | ||
java { | ||
srcDirs = ['src/test/java', project(':apps:openhouse-spark-apps_2.12').sourceSets.test.java.srcDirs] | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,49 @@ | ||
plugins { | ||
id 'openhouse.java-conventions' | ||
id 'openhouse.hadoop-conventions' | ||
id 'openhouse.iceberg-conventions-1.2' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line should not be removed. |
||
id 'openhouse.maven-publish' | ||
// dependency in apps-spark-conventions | ||
id 'com.github.johnrengelman.shadow' version '7.1.2' | ||
} | ||
|
||
configurations { | ||
// Excluding these libraries avoids competing implementations for LoggerFactory | ||
// Standardizing on slf4j + log4j2 as implementation. | ||
all*.exclude module : 'spring-boot-starter-logging' | ||
all*.exclude module : 'logback-classic' | ||
shadow.extendsFrom implementation | ||
id 'openhouse.apps-spark-conventions' | ||
} | ||
|
||
ext { | ||
log4jVersion = "2.18.0" | ||
|
||
sparkVersion = '3.1.1' | ||
icebergVersion = '1.2.0' | ||
sparkVersionSuffix = "3.1" | ||
openhouseSparkRuntimeModule = ":integrations:spark:spark-${sparkVersionSuffix}:openhouse-spark-runtime_2.12" | ||
icebergSparkRuntimeModule = "org.apache.iceberg:iceberg-spark-runtime-3.1_2.12:${icebergVersion}" | ||
tablesTestFixturesModule = ":tables-test-fixtures:tables-test-fixtures_2.12" | ||
} | ||
|
||
dependencies { | ||
implementation project(':iceberg:openhouse:internalcatalog') | ||
implementation project(':client:hts') | ||
implementation project(':client:jobsclient') | ||
implementation project(':client:tableclient') | ||
implementation project(':client:secureclient') | ||
implementation project(':services:common') | ||
implementation project(':cluster:storage') | ||
compileOnly (project(path: ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12')) { | ||
compileOnly (project(path: openhouseSparkRuntimeModule)) { | ||
exclude group: 'io.netty' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-common' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-client' | ||
} | ||
|
||
implementation "org.apache.logging.log4j:log4j-slf4j-impl:${log4jVersion}" | ||
implementation project(':libs:datalayout') | ||
implementation("org.apache.iceberg:iceberg-bundled-guava") { | ||
version { | ||
strictly("${icebergVersion}") | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are these really helpful ? In practice I never find There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. strictly here overrides any dependencies of compaction that are defining a higher version of 1.5.2, such as services without strictly, these dependencies will bump up iceberg in compactino unintentionally. an alternative to forcing the version to be LOWER than what is defined in transitive dependencies, is:
|
||
} | ||
implementation("org.apache.iceberg:iceberg-data") { | ||
version { | ||
strictly("${icebergVersion}") | ||
} | ||
} | ||
implementation("org.apache.iceberg:iceberg-core") { | ||
version { | ||
strictly("${icebergVersion}") | ||
} | ||
} | ||
implementation("org.apache.iceberg:iceberg-common") { | ||
version { | ||
strictly("${icebergVersion}") | ||
} | ||
} | ||
implementation ('org.apache.spark:spark-core_2.12:' + sparkVersion) { | ||
exclude group: 'io.netty' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-common' | ||
|
@@ -43,116 +54,16 @@ dependencies { | |
exclude group: 'org.apache.hadoop', module: 'hadoop-common' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-client' | ||
} | ||
implementation ('org.apache.hadoop:hadoop-common:2.10.0') { | ||
exclude group: 'io.netty' | ||
exclude group: 'org.apache.curator', module: 'curator-client' | ||
exclude group: 'org.apache.commons', module: 'commons-lang3' | ||
|
||
} | ||
implementation ('org.apache.iceberg:iceberg-spark-runtime-3.1_2.12:' + icebergVersion) { | ||
implementation (icebergSparkRuntimeModule) { | ||
exclude group: 'io.netty' | ||
} | ||
implementation 'commons-cli:commons-cli:1.5.0' | ||
implementation 'org.reflections:reflections:0.10.2' | ||
implementation 'org.springframework.boot:spring-boot-starter-webflux:2.7.8' | ||
implementation 'io.netty:netty-resolver-dns-native-macos:4.1.75.Final:osx-x86_64' | ||
implementation 'org.springframework.retry:spring-retry:1.3.3' | ||
implementation 'org.apache.logging.log4j:log4j-core:2.18.0' | ||
implementation 'org.apache.logging.log4j:log4j-slf4j-impl:2.18.0' | ||
implementation 'org.apache.logging.log4j:log4j-api:2.18.0' | ||
implementation 'com.fasterxml.jackson.core:jackson-core:2.13.3' | ||
implementation 'com.fasterxml.jackson.core:jackson-annotations:2.13.3' | ||
implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.3' | ||
implementation 'com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.13.3' | ||
implementation 'com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.13.3' | ||
implementation 'com.fasterxml.woodstox:woodstox-core:6.2.7' | ||
|
||
// open telemetry related classed. Latest Okhttp version is 4.10.0, pinning to 4.9.3 to avoid dependency issues | ||
implementation 'com.squareup.okhttp3:okhttp:' + ok_http3_version | ||
implementation 'com.squareup.okhttp:okhttp:2.7.5' | ||
implementation 'com.squareup.okio:okio:3.2.0' | ||
implementation 'com.squareup.okio:okio-jvm:3.2.0' | ||
implementation 'org.jetbrains.kotlin:kotlin-stdlib:2.0.20' | ||
implementation 'org.jetbrains.kotlin:kotlin-stdlib-jdk7:2.0.20' | ||
implementation 'org.jetbrains.kotlin:kotlin-stdlib-jdk8:2.0.20' | ||
implementation 'io.opentelemetry:opentelemetry-api:1.18.0' | ||
implementation 'io.opentelemetry:opentelemetry-exporter-otlp:1.18.0' | ||
implementation 'io.opentelemetry:opentelemetry-sdk:1.18.0' | ||
implementation 'io.opentelemetry:opentelemetry-sdk-extension-autoconfigure:1.14.0-alpha' | ||
implementation 'io.opentelemetry:opentelemetry-semconv:1.14.0-alpha' | ||
implementation 'org.apache.commons:commons-lang3:3.12.0' | ||
|
||
testImplementation (project(path: ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12', configuration: 'shadow')) { | ||
testImplementation (project(path: openhouseSparkRuntimeModule, configuration: 'shadow')) { | ||
exclude group: 'io.netty' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-common' | ||
exclude group: 'org.apache.hadoop', module: 'hadoop-client' | ||
} | ||
// Otherwise throws the error: Scala module 2.10.0 requires Jackson Databind version >= 2.10.0 and < 2.11.0 | ||
testImplementation 'com.fasterxml.jackson.module:jackson-module-scala_2.12:2.13.1' | ||
testImplementation 'org.mockito:mockito-inline:4.11.0' | ||
testImplementation 'org.powermock:powermock-module-junit4:2.0.9' | ||
testImplementation 'org.powermock:powermock-api-mockito2:2.0.9' | ||
testImplementation(project(':tables-test-fixtures:tables-test-fixtures_2.12')) { | ||
testImplementation(project(tablesTestFixturesModule)) { | ||
exclude group: "io.netty" | ||
} | ||
testRuntimeOnly("org.eclipse.jetty:jetty-server:11.0.2") | ||
|
||
} | ||
|
||
// Need spark runtime to be built before this test for this project to run successfully because compileOnly and | ||
// testImplementation dependencies are not triggering it. | ||
test.dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:build' | ||
|
||
shadowJar { | ||
zip64 = true | ||
archiveClassifier.set('uber') | ||
mergeServiceFiles() // merge META-INF/services configuration files to allow FileSystem to be discovered | ||
dependencies { | ||
// unnecessary dependencies from iceberg-spark3-runtime | ||
exclude(dependency('org.apache.iceberg::')) | ||
// this dependency will be provided at runtime | ||
exclude(dependency('org.apache.iceberg:iceberg-spark3-runtime::')) | ||
|
||
relocate('io', 'openhouse.relocated.io') { | ||
exclude 'io.netty.resolver.dns.macos.**' // dynamically loaded classes | ||
} | ||
relocate('com', 'openhouse.relocated.com') { | ||
exclude 'com.linkedin.openhouse.**' // don't want our classes to be shaded | ||
exclude 'com.ctc.wstx.**' // dynamically loaded classes | ||
exclude 'com.squareup.**' | ||
exclude '%regex[com.sun.security.*]' | ||
} | ||
relocate 'okhttp3', 'openhouse.relocated.okhttp3' | ||
relocate 'okio', 'openhouse.relocated.okio' | ||
relocate 'reactor', 'openhouse.relocated.reactor' | ||
relocate('org','openhouse.relocated.org') { | ||
exclude 'org.apache.iceberg.**' // these are runtime classes, we shouldn't relocate them unless we shade them | ||
exclude '%regex[org.apache.hadoop.*]' // these are runtime classes too, use regex to exclude string literals | ||
exclude 'org.apache.commons.**' // these are part of method signatures reused in sub-classes | ||
exclude 'org.apache.avro.**' // these runtime classes too | ||
exclude 'org.apache.spark.**' // these runtime classes too | ||
exclude 'org.springframework.**' // otherwise fails with ClassNotFoundException: org.springframework.http.codec.ClientCodecConfigurer | ||
exclude 'org.log4j.**' | ||
exclude 'org.slf4j.**' | ||
exclude 'org.apache.log4j.**' | ||
exclude 'org.apache.logging.**' // otherwise fails with add log4j-core to the classpath | ||
exclude 'org.xml.sax.**' // otherwise fails with NoClassDefFoundError: org/xml/sax/ContentHandler | ||
exclude '%regex[org.w3c.*]' | ||
exclude '%regex[org.ietf.*]' | ||
} | ||
} | ||
} | ||
|
||
// https://github.com/johnrengelman/shadow/issues/335 | ||
// By default shadow doesn't configure the build task to depend on the shadowJar task. | ||
tasks.build.dependsOn tasks.shadowJar | ||
|
||
test { | ||
if (JavaVersion.current() >= JavaVersion.VERSION_1_9) { | ||
jvmArgs \ | ||
'--add-opens=java.base/java.nio=ALL-UNNAMED', | ||
'--add-exports=java.base/sun.nio.ch=ALL-UNNAMED', | ||
'--add-opens=java.base/sun.util.calendar=ALL-UNNAMED', | ||
'--add-exports=java.base/sun.util.calendar=ALL-UNNAMED' | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,8 +60,6 @@ allprojects { | |
if (it.path != ':integrations:spark:spark-3.5:openhouse-spark-3.5-itest') { | ||
configurations.all { | ||
resolutionStrategy { | ||
force 'com.fasterxml.jackson:jackson-bom:2.13.4' | ||
force 'com.fasterxml.jackson.core:jackson-databind:2.13.4' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are these changes relevant ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this may actually be dangerous to remove, and I will fix it, since idk what is currently relying on this. but this strict version constraint breaks :apps:spark-3.5 because that also needs jackson 2.15 |
||
force 'org.apache.orc:orc-core:1.8.3' | ||
force 'com.google.guava:guava:31.1-jre' | ||
} | ||
|
@@ -128,5 +126,4 @@ tasks.register('CopyGitHooksTask', Copy) { | |
println 'Make the git hook available in .git/hooks directory.' | ||
from file('scripts/git-hooks') | ||
into file('.git/hooks/') | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add
iceberg-conventions-1.5.2.gradle
to remove them.