Skip to content

Commit

Permalink
Merge pull request #16 from databrickslabs/feature-repeating-fields
Browse files Browse the repository at this point in the history
Feature repeating fields
  • Loading branch information
zavoraad authored Mar 15, 2024
2 parents db2fcce + dbd45b7 commit 1f91d24
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
11 changes: 11 additions & 0 deletions src/main/scala/com/databricks/labs/smolder/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ import org.apache.spark.sql.functions._

object functions {

/**
* Extracts the value at a specific index in a repeating field
*
* @param col A column containing the repeated field from a message segment
* @param repIndex The index of repeated field value that must be extracted
* @return Yields a new column containing the field of a message segment.
*/
def repeating_field(col: Column, repIndex: Int, delim: String="~"): Column = {
split(col, delim).getItem(repIndex)
}

/**
* Parses a textual, pipe-delimited HL7v2 message.
*
Expand Down
13 changes: 13 additions & 0 deletions src/test/scala/com/databricks/labs/smolder/functionsSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,19 @@ class functionsSuite extends SmolderBaseTest {
assert(evnType.first().getString(0) === "A03")
}

test("Test repeating segment function"){
//get a "stable identifier" i.e. val for import
val spark2 = spark
import spark2.implicits._

val df = Seq("MSH\rTST|1234567890^^^HOSPITALONE^MRN~4646464646^^^HOSPITALTWO^MRN~9431675613^^^HOSPITALTHRE^MRN").toDF("text")
val hl7Df = df.select(parse_hl7_message(df("text")).alias("hl7"))
assert( hl7Df.select(segment_field("TST", 0, col("hl7.segments")).alias("TST_0")).select(repeating_field(col("TST_0"), 0, "~")).first().getString(0) === "1234567890^^^HOSPITALONE^MRN" )
assert( hl7Df.select(segment_field("TST", 0, col("hl7.segments")).alias("TST_0")).select(repeating_field(col("TST_0"), 1, "~")).first().getString(0) === "4646464646^^^HOSPITALTWO^MRN" )
assert( hl7Df.select(segment_field("TST", 0, col("hl7.segments")).alias("TST_0")).select(repeating_field(col("TST_0"), 2, "~")).first().getString(0) === "9431675613^^^HOSPITALTHRE^MRN" )

}

test("use the segment field and subfield functions to extract the patient's first name") {

val file = testFile("single_record.hl7")
Expand Down
2 changes: 1 addition & 1 deletion version.sbt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version in ThisBuild := "0.0.1-SNAPSHOT"
version in ThisBuild := "0.0.3-SNAPSHOT"

0 comments on commit 1f91d24

Please sign in to comment.