From b1a81926c487add3b77a932ecd5c62d40910c2fc Mon Sep 17 00:00:00 2001 From: Pravin Bhat Date: Wed, 28 Aug 2024 15:56:46 -0400 Subject: [PATCH 1/3] Implement overwrite function in extract-json feature. --- .../java/com/datastax/cdm/feature/ExtractJson.java | 7 +++++++ .../java/com/datastax/cdm/job/DiffJobSession.java | 11 +++++++++-- .../com/datastax/cdm/properties/KnownProperties.java | 4 +++- src/resources/cdm-detailed.properties | 6 ++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/datastax/cdm/feature/ExtractJson.java b/src/main/java/com/datastax/cdm/feature/ExtractJson.java index ae2de7c8..6dcc5d12 100644 --- a/src/main/java/com/datastax/cdm/feature/ExtractJson.java +++ b/src/main/java/com/datastax/cdm/feature/ExtractJson.java @@ -40,6 +40,7 @@ public class ExtractJson extends AbstractFeature { private String targetColumnName = ""; private Integer targetColumnIndex = -1; + private boolean overwriteTarget = false; @Override public boolean loadProperties(IPropertyHelper helper) { @@ -49,6 +50,8 @@ public boolean loadProperties(IPropertyHelper helper) { originColumnName = getColumnName(helper, KnownProperties.EXTRACT_JSON_ORIGIN_COLUMN_NAME); targetColumnName = getColumnName(helper, KnownProperties.EXTRACT_JSON_TARGET_COLUMN_MAPPING); + overwriteTarget = helper.getBoolean(KnownProperties.EXTRACT_JSON_TARGET_OVERWRITE); + // Convert columnToFieldMapping to targetColumnName and originJsonFieldName if (!targetColumnName.isBlank()) { String[] parts = targetColumnName.split("\\:"); @@ -146,6 +149,10 @@ public String getTargetColumnName() { return isEnabled ? targetColumnName : ""; } + public boolean overwriteTarget() { + return overwriteTarget; + } + private String getColumnName(IPropertyHelper helper, String colName) { String columnName = CqlTable.unFormatName(helper.getString(colName)); return (null == columnName) ? "" : columnName; diff --git a/src/main/java/com/datastax/cdm/job/DiffJobSession.java b/src/main/java/com/datastax/cdm/job/DiffJobSession.java index 9951e408..643ff691 100644 --- a/src/main/java/com/datastax/cdm/job/DiffJobSession.java +++ b/src/main/java/com/datastax/cdm/job/DiffJobSession.java @@ -66,6 +66,7 @@ public class DiffJobSession extends CopyJobSession { boolean logDebug = logger.isDebugEnabled(); boolean logTrace = logger.isTraceEnabled(); private ExtractJson extractJsonFeature; + private boolean overwriteTarget; public DiffJobSession(CqlSession originSession, CqlSession targetSession, SparkConf sc) { super(originSession, targetSession, sc); @@ -111,6 +112,7 @@ public DiffJobSession(CqlSession originSession, CqlSession targetSession, SparkC } extractJsonFeature = (ExtractJson) this.targetSession.getCqlTable().getFeature(Featureset.EXTRACT_JSON); + overwriteTarget = extractJsonFeature.isEnabled() && extractJsonFeature.overwriteTarget(); logger.info("CQL -- origin select: {}", this.originSession.getOriginSelectByPartitionRangeStatement().getCQL()); logger.info("CQL -- target select: {}", this.targetSession.getTargetSelectByPKStatement().getCQL()); @@ -270,7 +272,13 @@ private String isDifferent(Record record) { logger.trace("PK {}, targetIndex {} skipping constant column {}", pk, targetIndex, targetColumnNames.get(targetIndex)); return; // nothing to compare in origin - } else if (targetIndex == extractJsonFeature.getTargetColumnIndex()) { + } + + targetAsOriginType = targetSession.getCqlTable().getAndConvertData(targetIndex, targetRow); + if (targetIndex == extractJsonFeature.getTargetColumnIndex()) { + if (!overwriteTarget && null != targetAsOriginType) { + return; // skip validation when target has data + } originIndex = extractJsonFeature.getOriginColumnIndex(); origin = extractJsonFeature.extract(originRow.getString(originIndex)); } else { @@ -301,7 +309,6 @@ private String isDifferent(Record record) { + explodeMapKeyIndex + ", valueIndex:" + explodeMapValueIndex + ")"); } } - targetAsOriginType = targetSession.getCqlTable().getAndConvertData(targetIndex, targetRow); if (logDebug) logger.debug( diff --git a/src/main/java/com/datastax/cdm/properties/KnownProperties.java b/src/main/java/com/datastax/cdm/properties/KnownProperties.java index fc8c10be..5741cf80 100644 --- a/src/main/java/com/datastax/cdm/properties/KnownProperties.java +++ b/src/main/java/com/datastax/cdm/properties/KnownProperties.java @@ -243,13 +243,15 @@ public enum PropertyType { public static final String EXTRACT_JSON_EXCLUSIVE = "spark.cdm.feature.extractJson.exclusive"; public static final String EXTRACT_JSON_ORIGIN_COLUMN_NAME = "spark.cdm.feature.extractJson.originColumn"; public static final String EXTRACT_JSON_TARGET_COLUMN_MAPPING = "spark.cdm.feature.extractJson.propertyMapping"; + public static final String EXTRACT_JSON_TARGET_OVERWRITE = "spark.cdm.feature.extractJson.overwrite"; static { types.put(EXTRACT_JSON_EXCLUSIVE, PropertyType.BOOLEAN); defaults.put(EXTRACT_JSON_EXCLUSIVE, "false"); types.put(EXTRACT_JSON_ORIGIN_COLUMN_NAME, PropertyType.STRING); types.put(EXTRACT_JSON_TARGET_COLUMN_MAPPING, PropertyType.STRING); - } + types.put(EXTRACT_JSON_TARGET_OVERWRITE, PropertyType.BOOLEAN); + defaults.put(EXTRACT_JSON_TARGET_OVERWRITE, "false"); } // ========================================================================== // Guardrail Feature diff --git a/src/resources/cdm-detailed.properties b/src/resources/cdm-detailed.properties index b5e3a30d..77b98f9b 100644 --- a/src/resources/cdm-detailed.properties +++ b/src/resources/cdm-detailed.properties @@ -388,10 +388,16 @@ spark.cdm.perfops.ratelimit.target 20000 # - If the specified JSON property does not exist in the JSON content, the Target column # will be set to null. # Note: This feature currently supports extraction of only one JSON property. +# +# .overwrite Default is false. This property only applies to Validation run (NA for Migration) +# When set to true, the extracted JSON value will overwrite any existing value in the +# Target column during Validation. False will skip validation if the Target column has +# any non-null value. #----------------------------------------------------------------------------------------------------------- #spark.cdm.feature.extractJson.exclusive false #spark.cdm.feature.extractJson.originColumn origin_columnname_with_json_content #spark.cdm.feature.extractJson.propertyMapping origin_json_propertyname:target_columnname +#spark.cdm.feature.extractJson.overwrite false #=========================================================================================================== # Guardrail feature manages records that exceed guardrail checks. The Guardrail job will generate a From bd1099d9baf1aec64765de1062bf75b360e03519 Mon Sep 17 00:00:00 2001 From: Pravin Bhat Date: Thu, 29 Aug 2024 11:36:57 -0400 Subject: [PATCH 2/3] Added test to verify mapped properties --- .../datastax/cdm/feature/ExtractJsonTest.java | 14 ++++++++++++++ .../com/datastax/cdm/feature/TrackRunTest.java | 18 ++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 src/test/java/com/datastax/cdm/feature/TrackRunTest.java diff --git a/src/test/java/com/datastax/cdm/feature/ExtractJsonTest.java b/src/test/java/com/datastax/cdm/feature/ExtractJsonTest.java index 3f3fcc53..e7cfe218 100644 --- a/src/test/java/com/datastax/cdm/feature/ExtractJsonTest.java +++ b/src/test/java/com/datastax/cdm/feature/ExtractJsonTest.java @@ -59,6 +59,7 @@ public class ExtractJsonTest { String standardOriginName = "content"; String standardTargetName = "age"; + String mappedTargetName = "personAge:person_age"; @BeforeEach public void setup() { @@ -98,9 +99,22 @@ public void loadProperties() { assertAll( () -> assertTrue(loaded, "properties are loaded and valid"), () -> assertTrue(feature.isEnabled()), + () -> assertFalse(feature.overwriteTarget()), () -> assertEquals(standardTargetName, feature.getTargetColumnName()) ); } + + @Test + public void loadPropertiesWithMapping() { + when(propertyHelper.getString(KnownProperties.EXTRACT_JSON_TARGET_COLUMN_MAPPING)).thenReturn(mappedTargetName); + boolean loaded = feature.loadProperties(propertyHelper); + + assertAll( + () -> assertTrue(loaded, "properties are loaded and valid"), + () -> assertTrue(feature.isEnabled()), + () -> assertEquals("person_age", feature.getTargetColumnName()) + ); + } @Test public void loadPropertiesException() { diff --git a/src/test/java/com/datastax/cdm/feature/TrackRunTest.java b/src/test/java/com/datastax/cdm/feature/TrackRunTest.java new file mode 100644 index 00000000..7db9f8e1 --- /dev/null +++ b/src/test/java/com/datastax/cdm/feature/TrackRunTest.java @@ -0,0 +1,18 @@ +package com.datastax.cdm.feature; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +class TrackRunTest { + + @Test + void test() { + assertEquals("MIGRATE", TrackRun.RUN_TYPE.MIGRATE.name()); + assertEquals("DIFF_DATA", TrackRun.RUN_TYPE.DIFF_DATA.name()); + + assertEquals(2, TrackRun.RUN_TYPE.values().length); + assertEquals(5, TrackRun.RUN_STATUS.values().length); + } + +} From d608690c9e620fbe47ac8366c2d209b85b2058ab Mon Sep 17 00:00:00 2001 From: Pravin Bhat Date: Thu, 29 Aug 2024 15:47:25 -0400 Subject: [PATCH 3/3] Added copyright --- .../com/datastax/cdm/feature/TrackRunTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/test/java/com/datastax/cdm/feature/TrackRunTest.java b/src/test/java/com/datastax/cdm/feature/TrackRunTest.java index 7db9f8e1..5bfa547e 100644 --- a/src/test/java/com/datastax/cdm/feature/TrackRunTest.java +++ b/src/test/java/com/datastax/cdm/feature/TrackRunTest.java @@ -1,3 +1,18 @@ +/* + * Copyright DataStax, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.datastax.cdm.feature; import static org.junit.jupiter.api.Assertions.assertEquals;