Skip to content

Commit

Permalink
GEOMESA-3324 Kafka - add install script for Parquet dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
elahrvivaz committed Jan 19, 2024
1 parent 2feeeb4 commit 59c14b1
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 41 deletions.
2 changes: 1 addition & 1 deletion docs/user/kafka/confluent.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ regular Kafka store.

Note that Confluent requires the Confluent client JARs, which are not bundled with GeoMesa. If the GeoMesa Kafka
binary distribution has been :ref:`installed <setting_up_kafka_commandline>`, then the script
``bin/install-confluent-dependencies.sh`` can be used to download them.
``bin/install-confluent-support.sh`` can be used to download them.

Supported Avro Schema Fields
----------------------------
Expand Down
6 changes: 6 additions & 0 deletions docs/user/kafka/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ Do this with the following command:
$ ./bin/install-shapefile-support.sh
If working with Parquet files, install the required dependencies with the following command:

.. code-block:: bash
$ ./bin/install-parquet-support.sh
Test the command that invokes the GeoMesa Tools:

.. code-block:: bash
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ function dependencies() {
fi

# add hadoop 3+ jars if needed
local hadoop_maj_ver
hadoop_maj_ver="$([[ "$hadoop_version" =~ ([0-9][0-9]*)\. ]] && echo "${BASH_REMATCH[1]}")"
if [[ "$hadoop_maj_ver" -ge 3 ]]; then
if version_ge "${hadoop_version}" 3.0.0; then
gavs+=(
"org.apache.hadoop:hadoop-client-api:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-runtime:${hadoop_version}:jar"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,5 @@ trait RunnerWithAccumuloEnvironment extends Runner {
}
}

override def environmentErrorInfo(): Option[String] = {
if (!sys.env.contains("ACCUMULO_HOME") || !sys.env.contains("HADOOP_HOME")) {
Option("\nWarning: ACCUMULO_HOME and/or HADOOP_HOME are not set as environment variables." +
"\nGeoMesa tools will not run without the appropriate Accumulo and Hadoop jars in the tools classpath." +
"\nPlease ensure that those jars are present in the classpath by running 'geomesa classpath'." +
"\nTo take corrective action, please place the necessary jar files in the lib directory of geomesa-tools.")
} else { None }
}
override protected def classpathEnvironments: Seq[String] = Seq("ACCUMULO_HOME", "HADOOP_HOME")
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,5 @@ object CassandraRunner extends Runner {
)
}

override def environmentErrorInfo(): Option[String] = {
if (!sys.env.contains("CASSANDRA_HOME")) {
Option("Warning: you have not set the CASSANDRA_HOME environment variable." +
"\nGeoMesa tools will not run without the appropriate Cassandra jars on the classpath.")
} else { None }
}
override protected def classpathEnvironments: Seq[String] = Seq("CASSANDRA_HOME")
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,5 @@ object HBaseRunner extends Runner {
)
}

override def environmentErrorInfo(): Option[String] = {
if (!sys.env.contains("HBASE_HOME") || !sys.env.contains("HADOOP_HOME")) {
Option("Warning: you have not set HBASE_HOME and/or HADOOP_HOME as environment variables." +
"\nGeoMesa tools will not run without the appropriate HBase and Hadoop jars in the tools classpath." +
"\nPlease ensure that those jars are present in the classpath by running 'geomesa-hbase classpath'." +
"\nTo take corrective action, please place the necessary jar files in the lib directory of geomesa-tools.")
} else { None }
}
override protected def classpathEnvironments: Seq[String] = Seq("HBASE_HOME", "HADOOP_HOME")
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@
#

cd "$(dirname "$0")" || exit
export GEOMESA_DEPENDENCIES="confluent-dependencies.sh"
export GEOMESA_MAVEN_URL="${GEOMESA_MAVEN_URL:-https://packages.confluent.io/maven/}"
./install-dependencies.sh "$@"
echo >&2 "WARNING: this script is deprecated, please use 'install-confluent-support.sh' instead"
./install-confluent-support.sh "$@"
13 changes: 13 additions & 0 deletions geomesa-kafka/geomesa-kafka-tools/bin/install-confluent-support.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#! /usr/bin/env bash
#
# Copyright (c) 2013-%%copyright.year%% Commonwealth Computer Research, Inc.
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0 which
# accompanies this distribution and is available at
# http://www.opensource.org/licenses/apache2.0.php.
#

cd "$(dirname "$0")" || exit
export GEOMESA_DEPENDENCIES="confluent-dependencies.sh"
export GEOMESA_MAVEN_URL="${GEOMESA_MAVEN_URL:-https://packages.confluent.io/maven/}"
./install-dependencies.sh "$@"
12 changes: 12 additions & 0 deletions geomesa-kafka/geomesa-kafka-tools/bin/install-parquet-support.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#! /usr/bin/env bash
#
# Copyright (c) 2013-%%copyright.year%% Commonwealth Computer Research, Inc.
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0 which
# accompanies this distribution and is available at
# http://www.opensource.org/licenses/apache2.0.php.
#

cd "$(dirname "$0")" || exit
export GEOMESA_DEPENDENCIES="parquet-dependencies.sh"
./install-dependencies.sh "$@"
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,5 @@ object KafkaRunner extends Runner {
)
}

override def environmentErrorInfo(): Option[String] = {
if (!sys.env.contains("KAFKA_HOME")) {
Option("\nWarning: KAFKA_HOME is not set as an environment variable." +
"\nGeoMesa tools will not run without the appropriate Kafka and Zookeeper jars in the tools classpath." +
"\nPlease ensure that those jars are present in the classpath by running 'geomesa-kafka classpath'." +
"\nTo take corrective action, please place the necessary jar files in the lib directory of geomesa-tools.")
} else { None }
}
override protected def classpathEnvironments: Seq[String] = Seq("KAFKA_HOME")
}
4 changes: 2 additions & 2 deletions geomesa-tools/bin/install-dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ function download_dependencies() {
includes="$(dependencies)"
local classpath=""
if [[ -d "$install_dir" ]]; then
classpath="$(ls "$install_dir")"
classpath="$(ls "$install_dir" | tr '\n' ':')"
fi
local gavs=()
for gav in $includes; do
group="${gav%%:*}"
artifact="${gav#$group:}"
artifact="${artifact%%:*}"
if [[ $classpath != *"$artifact"* ]]; then
if [[ ! $classpath =~ (^|:|/)${artifact}(-[^:]*)*\.jar ]]; then
gavs+=("$gav")
fi
done
Expand Down
56 changes: 56 additions & 0 deletions geomesa-tools/conf-filtered/parquet-dependencies.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#! /usr/bin/env bash
#
# Copyright (c) 2013-%%copyright.year%% Commonwealth Computer Research, Inc.
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0 which
# accompanies this distribution and is available at
# http://www.opensource.org/licenses/apache2.0.php.
#

# This file lists the dependencies required for using geomesa-convert-parquet.
# Update the versions as required to match the target environment.

hadoop_install_version="%%hadoop.version.recommended%%"

function version_ge() { test "$(echo "$@" | tr " " "\n" | sort -rV | head -n 1)" == "$1"; }

# gets the dependencies for this module
# args:
# $1 - current classpath
function dependencies() {
local classpath="$1"

local hadoop_version="$hadoop_install_version"

if [[ -n "$classpath" ]]; then
hadoop_version="$(get_classpath_version hadoop-common "$classpath" "$hadoop_version")"
fi

declare -a gavs=(
"org.apache.hadoop:hadoop-common:${hadoop_version}:jar"
"com.fasterxml.woodstox:woodstox-core:5.3.0:jar"
"org.codehaus.woodstox:stax2-api:4.2.1:jar"
"org.apache.commons:commons-configuration2:2.8.0:jar"
"commons-collections:commons-collections:3.2.2:jar"
"commons-logging:commons-logging:1.2:jar"
)

# add hadoop 3+ jars if needed
if version_ge "${hadoop_version}" 3.0.0; then
gavs+=(
"org.apache.hadoop:hadoop-client-api:${hadoop_version}:jar"
"org.apache.hadoop:hadoop-client-runtime:${hadoop_version}:jar"
)
else
gavs+=(
"commons-configuration:commons-configuration:1.6:jar"
)
fi

echo "${gavs[@]}" | tr ' ' '\n' | sort | tr '\n' ' '
}

function exclude_dependencies() {
# local classpath="$1"
echo ""
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,24 @@ import org.locationtech.geomesa.utils.stats.MethodProfiling

import java.io.File
import java.nio.charset.StandardCharsets
import java.util.Locale
import scala.collection.JavaConverters._
import scala.util.control.NonFatal

trait Runner extends MethodProfiling with LazyLogging {

def name: String
@deprecated("Use classpathEnvironments instead")
def environmentErrorInfo(): Option[String] = None

/**
* A list of environment variables used to load the classpath, used for error messages after
* ClassNotFoundExceptions
*
* @return
*/
protected def classpathEnvironments: Seq[String] = Seq.empty

def main(args: Array[String]): Unit = execute(new MainExecutor(args))

def nailMain(context: NGContext): Unit = execute(new NailgunExecutor(context))
Expand All @@ -40,7 +50,7 @@ trait Runner extends MethodProfiling with LazyLogging {
// log the underling exception to the log file, but don't show to the user
val msg = s"Warning: Missing dependency for command execution: ${e.getMessage}"
logger.error(msg, e)
CommandResult(1, Seq(Left(msg)) ++ environmentErrorInfo().map(Left.apply))
CommandResult(1, Seq(Left(msg)) ++ getEnvironmentErrors.map(Left.apply[String, Throwable]))
case e: ParameterException => CommandResult(1, Seq(Left(e.getMessage)))
case e: CommandException => CommandResult(1, Seq(Left(e.getMessage)))
case NonFatal(e) => CommandResult(1, Seq(Right(e)))
Expand Down Expand Up @@ -97,7 +107,7 @@ trait Runner extends MethodProfiling with LazyLogging {
def usage(jc: JCommander): String = {
val out = new StringBuilder()
out.append(s"Usage: $name [command] [command options]\n")
val commands = jc.getCommands.asScala.map(_._1).toSeq.sorted
val commands = jc.getCommands.asScala.keys.toSeq.sorted
out.append(" Commands:\n")
val maxLen = commands.map(_.length).max + 4
commands.foreach { name =>
Expand Down Expand Up @@ -192,6 +202,24 @@ trait Runner extends MethodProfiling with LazyLogging {

protected def resolveEnvironment(command: Command): Unit = {}

private def getEnvironmentErrors: Option[String] = {
// noinspection ScalaDeprecation
environmentErrorInfo().orElse {
val env = classpathEnvironments
if (env.forall(sys.env.contains)) { None } else {
val envMsg = if (env.size == 1) { "is not set as an environment variable" } else { "are not set as environment variables" }
val types = env.map(e => e.take(1) ++ e.drop(1).toLowerCase(Locale.US).replace("_home", "")).mkString(" and ")
val msg =
s"\nWarning: ${env.mkString(" and/or ")} $envMsg." +
s"\nGeoMesa tools will not run without the appropriate $types JARs in the tools classpath." +
s"\nPlease ensure that those JARs are present in the classpath by running '$name classpath'." +
"\nTo take corrective action, copy the necessary JAR files in the GeoMesa tools lib directory " +
"using the provided 'install-dependencies.sh' and 'install-*-support.sh' scripts."
Some(msg)
}
}
}

class DefaultCommand(jc: JCommander) extends Command {
override def execute(): Unit = Command.user.info(usage(jc))
override val name: String = ""
Expand Down

0 comments on commit 59c14b1

Please sign in to comment.