Skip to content

Commit

Permalink
[INLONG-10826][SDK] Transform support TRIM(), REPLICATE() function (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
emptyOVO authored Aug 21, 2024
1 parent 9ac4e5a commit ee7f3d9
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;

import java.util.List;
/**
* ReplicateFunction
* description: replicate(string, numeric)--Repeat the string numeric times and return a new string
*/
public class ReplicateFunction implements ValueParser {

private ValueParser stringParser;

private ValueParser countParser;

public ReplicateFunction(Function expr) {
List<Expression> expressions = expr.getParameters().getExpressions();
stringParser = OperatorTools.buildParser(expressions.get(0));
countParser = OperatorTools.buildParser(expressions.get(1));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object stringObj = stringParser.parse(sourceData, rowIndex, context);
Object countObj = countParser.parse(sourceData, rowIndex, context);
String str = OperatorTools.parseString(stringObj);
double count = OperatorTools.parseBigDecimal(countObj).doubleValue();
return repeat(str, count);
}
private String repeat(String str, double count) {
if (count == 0) {
return "";
}
if (count == 1) {
return str;
}
StringBuilder repeatedStr = new StringBuilder();
StringBuilder originStr = new StringBuilder(str);
while (count > 0) {
if (count % 2 != 0) {
repeatedStr.append(originStr);
}
count = Math.floor(count / 2);
originStr.append(originStr);
}
return repeatedStr.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Function;

/**
* TrimFunction
* description: trim(string)--Remove Spaces before and after the string.
*/
public class TrimFunction implements ValueParser {

private ValueParser stringParser;

public TrimFunction(Function expr) {
stringParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object stringObj = stringParser.parse(sourceData, rowIndex, context);
return OperatorTools.parseString(stringObj).trim();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.inlong.sdk.transform.process.function.LogFunction;
import org.apache.inlong.sdk.transform.process.function.NowFunction;
import org.apache.inlong.sdk.transform.process.function.PowerFunction;
import org.apache.inlong.sdk.transform.process.function.ReplicateFunction;
import org.apache.inlong.sdk.transform.process.function.RoundFunction;
import org.apache.inlong.sdk.transform.process.function.SinFunction;
import org.apache.inlong.sdk.transform.process.function.SinhFunction;
Expand All @@ -42,6 +43,7 @@
import org.apache.inlong.sdk.transform.process.function.TimestampExtractFunction;
import org.apache.inlong.sdk.transform.process.function.ToDateFunction;
import org.apache.inlong.sdk.transform.process.function.ToTimestampFunction;
import org.apache.inlong.sdk.transform.process.function.TrimFunction;
import org.apache.inlong.sdk.transform.process.function.UnixTimestampFunction;
import org.apache.inlong.sdk.transform.process.parser.AdditionParser;
import org.apache.inlong.sdk.transform.process.parser.ColumnParser;
Expand Down Expand Up @@ -108,6 +110,8 @@ public class OperatorTools {
functionMap.put("log", LogFunction::new);
functionMap.put("exp", ExpFunction::new);
functionMap.put("substring", SubstringFunction::new);
functionMap.put("trim", TrimFunction::new);
functionMap.put("replicate", ReplicateFunction::new);
functionMap.put("locate", LocateFunction::new);
functionMap.put("to_date", ToDateFunction::new);
functionMap.put("date_format", DateFormatFunction::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,70 @@ public void testLocateFunction() throws Exception {
Assert.assertEquals(1, output5.size());
Assert.assertEquals(output5.get(0), "result=null");
}
@Test
public void testReplicateFunction() throws Exception {
String transformSql1 = "select replicate(string1, numeric1) from source";
TransformConfig config1 = new TransformConfig(transformSql1);
TransformProcessor<String, String> processor1 = TransformProcessor
.create(config1, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: replicate('apple', 2)
List<String> output1 = processor1.transform("apple|banana|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals(output1.get(0), "result=appleapple");
String transformSql2 = "select replicate(string2, numeric2) from source";
TransformConfig config2 = new TransformConfig(transformSql2);
TransformProcessor<String, String> processor2 = TransformProcessor
.create(config2, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case2: replicate('banana', 3)
List<String> output2 = processor2.transform("apple|banana|cloud|1|3|3", new HashMap<>());
Assert.assertEquals(1, output2.size());
Assert.assertEquals(output2.get(0), "result=bananabananabanana");
// case3: replicate('banana', 1)
List<String> output3 = processor2.transform("apple|banana|cloud|1|1|3", new HashMap<>());
Assert.assertEquals(1, output2.size());
Assert.assertEquals(output3.get(0), "result=banana");
// case3: replicate('cloud', 0)
String transformSql3 = "select replicate(string3, numeric3) from source";
TransformConfig config3 = new TransformConfig(transformSql3);
TransformProcessor<String, String> processor3 = TransformProcessor
.create(config3, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
List<String> output4 = processor3.transform("apple|banana|cloud|2|1|0", new HashMap<>());
Assert.assertEquals(1, output4.size());
Assert.assertEquals(output4.get(0), "result=");
}

@Test
public void testTrimFunction() throws Exception {
String transformSql1 = "select trim(string1) from source";
TransformConfig config1 = new TransformConfig(transformSql1);
TransformProcessor<String, String> processor1 = TransformProcessor
.create(config1, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: trim(' in long')
List<String> output1 = processor1.transform(" in long|in long | in long ", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals(output1.get(0), "result=in long");
String transformSql2 = "select trim(string2) from source";
TransformConfig config2 = new TransformConfig(transformSql2);
TransformProcessor<String, String> processor2 = TransformProcessor
.create(config2, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case2: trim('in long ')
List<String> output2 = processor2.transform(" in long|in long | in long ", new HashMap<>());
Assert.assertEquals(1, output2.size());
Assert.assertEquals(output2.get(0), "result=in long");
String transformSql3 = "select trim(string2) from source";
TransformConfig config3 = new TransformConfig(transformSql2);
TransformProcessor<String, String> processor3 = TransformProcessor
.create(config2, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case3: trim(' in long ')
List<String> output3 = processor3.transform(" in long|in long | in long ", new HashMap<>());
Assert.assertEquals(1, output3.size());
Assert.assertEquals(output3.get(0), "result=in long");
}

}

0 comments on commit ee7f3d9

Please sign in to comment.