-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactored BaseTransformation to make schema based conversions easier…
- Loading branch information
1 parent
0f46bdb
commit a2b19cd
Showing
9 changed files
with
450 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
141 changes: 141 additions & 0 deletions
141
src/main/java/com/github/jcustenborder/kafka/connect/transform/common/BytesToString.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
/** | ||
* Copyright © 2017 Jeremy Custenborder ([email protected]) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.github.jcustenborder.kafka.connect.transform.common; | ||
|
||
import com.github.jcustenborder.kafka.connect.utils.config.Description; | ||
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationTip; | ||
import com.github.jcustenborder.kafka.connect.utils.config.Title; | ||
import com.google.common.base.Strings; | ||
import org.apache.kafka.common.config.ConfigDef; | ||
import org.apache.kafka.connect.connector.ConnectRecord; | ||
import org.apache.kafka.connect.data.Field; | ||
import org.apache.kafka.connect.data.Schema; | ||
import org.apache.kafka.connect.data.SchemaAndValue; | ||
import org.apache.kafka.connect.data.SchemaBuilder; | ||
import org.apache.kafka.connect.data.Struct; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
public abstract class BytesToString<R extends ConnectRecord<R>> extends BaseTransformation<R> { | ||
private static final Logger log = LoggerFactory.getLogger(BytesToString.class); | ||
|
||
@Override | ||
public ConfigDef config() { | ||
return BytesToStringConfig.config(); | ||
} | ||
|
||
BytesToStringConfig config; | ||
|
||
@Override | ||
public void configure(Map<String, ?> settings) { | ||
this.config = new BytesToStringConfig(settings); | ||
} | ||
|
||
@Override | ||
public void close() { | ||
|
||
} | ||
|
||
@Override | ||
protected SchemaAndValue processBytes(R record, Schema inputSchema, byte[] input) { | ||
final Schema outputSchema = inputSchema.isOptional() ? Schema.OPTIONAL_STRING_SCHEMA : Schema.STRING_SCHEMA; | ||
final String output = new String(input, this.config.charset); | ||
return new SchemaAndValue(outputSchema, output); | ||
} | ||
|
||
Map<Schema, Schema> schemaCache = new HashMap<>(); | ||
|
||
@Override | ||
protected SchemaAndValue processStruct(R record, Schema inputSchema, Struct input) { | ||
final Schema schema = this.schemaCache.computeIfAbsent(inputSchema, s -> { | ||
final SchemaBuilder builder = SchemaBuilder.struct(); | ||
if (!Strings.isNullOrEmpty(inputSchema.name())) { | ||
builder.name(inputSchema.name()); | ||
} | ||
if (inputSchema.isOptional()) { | ||
builder.optional(); | ||
} | ||
|
||
for (Field field : inputSchema.fields()) { | ||
log.trace("processStruct() - processing '{}'", field.name()); | ||
final Schema fieldSchema; | ||
if (this.config.fields.contains(field.name())) { | ||
fieldSchema = field.schema().isOptional() ? | ||
Schema.OPTIONAL_STRING_SCHEMA : | ||
Schema.STRING_SCHEMA; | ||
} else { | ||
fieldSchema = field.schema(); | ||
} | ||
builder.field(field.name(), fieldSchema); | ||
} | ||
return builder.build(); | ||
}); | ||
|
||
Struct struct = new Struct(schema); | ||
for (Field field : schema.fields()) { | ||
if (this.config.fields.contains(field.name())) { | ||
byte[] buffer = input.getBytes(field.name()); | ||
struct.put(field.name(), new String(buffer, this.config.charset)); | ||
} else { | ||
struct.put(field.name(), input.get(field.name())); | ||
} | ||
} | ||
return new SchemaAndValue(schema, struct); | ||
} | ||
|
||
@Title("BytesToString(Key)") | ||
@Description("This transformation is used to convert a byte array to a string.") | ||
@DocumentationTip("This transformation is used to manipulate fields in the Key of the record.") | ||
public static class Key<R extends ConnectRecord<R>> extends BytesToString<R> { | ||
|
||
@Override | ||
public R apply(R r) { | ||
final SchemaAndValue transformed = process(r, r.keySchema(), r.key()); | ||
|
||
return r.newRecord( | ||
r.topic(), | ||
r.kafkaPartition(), | ||
transformed.schema(), | ||
transformed.value(), | ||
r.valueSchema(), | ||
r.value(), | ||
r.timestamp() | ||
); | ||
} | ||
} | ||
|
||
@Title("BytesToString(Value)") | ||
@Description("This transformation is used to convert a byte array to a string.") | ||
public static class Value<R extends ConnectRecord<R>> extends BytesToString<R> { | ||
@Override | ||
public R apply(R r) { | ||
final SchemaAndValue transformed = process(r, r.valueSchema(), r.value()); | ||
|
||
return r.newRecord( | ||
r.topic(), | ||
r.kafkaPartition(), | ||
r.keySchema(), | ||
r.key(), | ||
transformed.schema(), | ||
transformed.value(), | ||
r.timestamp() | ||
); | ||
} | ||
} | ||
} |
65 changes: 65 additions & 0 deletions
65
...ain/java/com/github/jcustenborder/kafka/connect/transform/common/BytesToStringConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/** | ||
* Copyright © 2017 Jeremy Custenborder ([email protected]) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.github.jcustenborder.kafka.connect.transform.common; | ||
|
||
import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder; | ||
import org.apache.kafka.common.config.AbstractConfig; | ||
import org.apache.kafka.common.config.ConfigDef; | ||
|
||
import java.nio.charset.Charset; | ||
import java.util.Collections; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
public class BytesToStringConfig extends AbstractConfig { | ||
public final Charset charset; | ||
public final Set<String> fields; | ||
|
||
public static final String CHARSET_CONFIG = "charset"; | ||
public static final String CHARSET_DOC = "The charset to use when creating the output string."; | ||
|
||
public static final String FIELD_CONFIG = "fields"; | ||
public static final String FIELD_DOC = "The fields to transform."; | ||
|
||
|
||
public BytesToStringConfig(Map<String, ?> settings) { | ||
super(config(), settings); | ||
String charset = getString(CHARSET_CONFIG); | ||
this.charset = Charset.forName(charset); | ||
List<String> fields = getList(FIELD_CONFIG); | ||
this.fields = new HashSet<>(fields); | ||
} | ||
|
||
public static ConfigDef config() { | ||
return new ConfigDef() | ||
.define( | ||
ConfigKeyBuilder.of(CHARSET_CONFIG, ConfigDef.Type.STRING) | ||
.documentation(CHARSET_DOC) | ||
.defaultValue("UTF-8") | ||
.importance(ConfigDef.Importance.HIGH) | ||
.build() | ||
).define( | ||
ConfigKeyBuilder.of(FIELD_CONFIG, ConfigDef.Type.LIST) | ||
.documentation(FIELD_DOC) | ||
.defaultValue(Collections.emptyList()) | ||
.importance(ConfigDef.Importance.HIGH) | ||
.build() | ||
); | ||
} | ||
|
||
} |
Oops, something went wrong.