From cce9321cb0776f77c15f74d7a535d9172ddd228e Mon Sep 17 00:00:00 2001 From: "Meti A. Bayissa" Date: Wed, 21 Aug 2024 05:20:18 +0300 Subject: [PATCH 01/12] Added Ethiopian (Geez) number to Arabic Numeral converter --- .../extraction/util/GeezNumberUtils.scala | 89 ++++++++++++++++++ .../extraction/util/GeezNumberUtilsTest.scala | 91 +++++++++++++++++++ 2 files changed, 180 insertions(+) create mode 100644 core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala create mode 100644 core/src/test/scala/org/dbpedia/extraction/util/GeezNumberUtilsTest.scala diff --git a/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala b/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala new file mode 100644 index 000000000..1f0c1328f --- /dev/null +++ b/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala @@ -0,0 +1,89 @@ +package org.dbpedia.extraction.util + +import java.util.logging.{Logger, Level} +import scala.util.control.Breaks._ + +class GeezNumberUtils { + + private val geezNumberMap: Map[Char, Int] = Map( + '፩' -> 1, + '፪' -> 2, + '፫' -> 3, + '፬' -> 4, + '፭' -> 5, + '፮' -> 6, + '፯' -> 7, + '፰' -> 8, + '፱' -> 9, + '፲' -> 10, + '፳' -> 20, + '፴' -> 30, + '፵' -> 40, + '፶' -> 50, + '፷' -> 60, + '፸' -> 70, + '፹' -> 80, + '፺' -> 90, + '፻' -> 100, + '፼' -> 10000 + ) + + // Preprocesses and converts geez scripts to arabic numeral. + def preprocess(geezStr: String): List[Int] = { + val strippedGeezStr = geezStr.trim + val stack = scala.collection.mutable.ListBuffer[Int]() + + for (char <- strippedGeezStr) { + geezNumberMap.get(char) match { + case Some(value) => stack.append(value) + case None => + throw new Exception(s"Unknown Geez number character: $char") + } + } + + stack.toList + } + + // Recursively calculates and performs the calcualtion to convert geez number to arabic numerals. + def calculate(start: Int, end: Int, arr: List[Int]): Option[Int] = { + if (start > end) return None + if (start == end) { + return Some(arr(start)) + } + var idxs: List[Int] = List() + + breakable { + for (multiplier <- List(10000, 100)) { + if (arr.slice(start, end + 1).contains(multiplier)) { + idxs = (end to start by -1).filter(i => arr(i) == multiplier).toList + if (idxs.nonEmpty) { + break + } + } + } + } + + for (node <- idxs) { + val leftSubTreeValue = calculate(start, node - 1, arr).getOrElse(1) + val rightSubTreeValue = calculate(node + 1, end, arr).getOrElse(0) + return Some((arr(node) * leftSubTreeValue) + rightSubTreeValue) + } + + if (idxs.isEmpty) { + Some(arr.slice(start, end + 1).sum) + } else { + None + } + + } + + def convertGeezToArabicNumeral(geezStr: String): Option[Int] = { + val stack = preprocess(geezStr) + val convertedNumber = calculate(0, stack.length - 1, stack) + convertedNumber match { + case Some(number) => return Some(number) + case None => return None + } + } + +} diff --git a/core/src/test/scala/org/dbpedia/extraction/util/GeezNumberUtilsTest.scala b/core/src/test/scala/org/dbpedia/extraction/util/GeezNumberUtilsTest.scala new file mode 100644 index 000000000..f319d3c9f --- /dev/null +++ b/core/src/test/scala/org/dbpedia/extraction/util/GeezNumberUtilsTest.scala @@ -0,0 +1,91 @@ +package org.dbpedia.extraction.util +import org.scalatest.Matchers +import org.scalatest.FlatSpec +import org.scalatest.junit.JUnitRunner +import org.junit.runner.RunWith + +@RunWith(classOf[JUnitRunner]) +class GeezNumberUtilsTest extends FlatSpec with Matchers { + + "ConvertGeezToArabicNumeral" should "return 2" in { + convertGeezToArabicNumeral("፪") should equal( + Some(2) + ) + } + + "ConvertGeezToArabicNumeral" should "return 10" in { + convertGeezToArabicNumeral("፲") should equal( + Some(10) + ) + } + + "ConvertGeezToArabicNumeral" should "return 107" in { + convertGeezToArabicNumeral("፻፯") should equal( + Some(107) + ) + } + + "ConvertGeezToArabicNumeral" should "return 113" in { + convertGeezToArabicNumeral("፻፲፫") should equal( + Some(113) + ) + } + + "ConvertGeezToArabicNumeral" should "return 898" in { + convertGeezToArabicNumeral("፰፻፺፰") should equal( + Some(898) + ) + } + + "ConvertGeezToArabicNumeral" should "return 1111111111" in { + convertGeezToArabicNumeral("፲፩፼፲፩፻፲፩፼፲፩፻፲፩") should equal( + Some(1111111111) + ) + } + + "ConvertGeezToArabicNumeral" should "return 1234" in { + convertGeezToArabicNumeral("፲፪፻፴፬") should equal( + Some(1234) + ) + } + "ConvertGeezToArabicNumeral" should "return 12345" in { + convertGeezToArabicNumeral("፼፳፫፻፵፭") should equal( + Some(12345) + ) + } + "ConvertGeezToArabicNumeral" should "return 7654321" in { + convertGeezToArabicNumeral("፯፻፷፭፼፵፫፻፳፩") should equal( + Some(7654321) + ) + } + "ConvertGeezToArabicNumeral" should "return 17654321" in { + convertGeezToArabicNumeral("፲፯፻፷፭፼፵፫፻፳፩") should equal( + Some(17654321) + ) + } + "ConvertGeezToArabicNumeral" should "return 51615131" in { + convertGeezToArabicNumeral("፶፩፻፷፩፼፶፩፻፴፩") should equal( + Some(51615131) + ) + } + + "ConvertGeezToArabicNumeral" should "return 3030" in { + convertGeezToArabicNumeral("፴፻፴") should equal( + Some(3030) + ) + } + + "ConvertGeezToArabicNumeral" should "return 333333333" in { + convertGeezToArabicNumeral("፫፼፴፫፻፴፫፼፴፫፻፴፫") should equal( + Some(333333333) + ) + } + + private val geezNumberParser = new GeezNumberUtils() + + private def convertGeezToArabicNumeral(input: String): Option[Int] = { + + geezNumberParser.convertGeezToArabicNumeral(input) + + } +} From b5c97754481c5b3733530a72da6c312a8e05db73 Mon Sep 17 00:00:00 2001 From: "Meti A. Bayissa" Date: Wed, 21 Aug 2024 05:21:39 +0300 Subject: [PATCH 02/12] Added date configs for Amharic --- .../extraction/config/dataparser/DateTimeParserConfig.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/scala/org/dbpedia/extraction/config/dataparser/DateTimeParserConfig.scala b/core/src/main/scala/org/dbpedia/extraction/config/dataparser/DateTimeParserConfig.scala index e1f89ac10..ca3c849f7 100644 --- a/core/src/main/scala/org/dbpedia/extraction/config/dataparser/DateTimeParserConfig.scala +++ b/core/src/main/scala/org/dbpedia/extraction/config/dataparser/DateTimeParserConfig.scala @@ -6,6 +6,8 @@ object DateTimeParserConfig val monthsMap = Map( // For "ar" configuration, right-to-left rendering may seem like a bug, but it's not. // Don't change this unless you know how it is done. + "am" -> Map("january"->1,"february"->2,"march"->3,"april"->4,"may"->5,"june"->6,"july"->7,"august"->8,"september"->9,"october"->10,"november"->11,"december"->12, + "ጃንዩወሪ" -> 1, "ፌብሩወሪ" -> 2,"ማርች" -> 3,"ኤፕሪል" -> 4,"ሜይ" -> 5,"ጁን" -> 6,"ጁላይ" -> 7,"ኦገስት" -> 8,"ሴፕተምበር" -> 9,"ኦክቶበር" -> 10,"ኖቬምበር" -> 11,"ዲሴምበር" -> 12), "ar" -> Map("جانفي"->1,"فيفري"->2,"مارس"->3,"أفريل"->4,"ماي"->5,"جوان"->6,"جويلية"->7,"أوت"->8,"سبتمبر"->9,"أكتوبر"->10,"نوفمبر"->11,"ديسمبر"->12, "يناير"->1,"فبراير"->2,"أبريل"->4,"مايو"->5,"يونيو"->6,"يوليو"->7,"يوليوز"->7,"أغسطس"->8,"غشت"->8,"شتنبر"->9,"نونبر"->11,"دجنبر"->12), "bg" -> Map("януари"->1,"февруари"->2,"март"->3,"април"->4,"май"->5,"юни"->6,"юли"->7,"август"->8,"септември"->9,"октомври"->10,"ноември"->11,"декември"->12), From decffa0f9a23dd740f0f2bc54cf386a545e28a82 Mon Sep 17 00:00:00 2001 From: "Meti A. Bayissa" Date: Wed, 21 Aug 2024 05:24:53 +0300 Subject: [PATCH 03/12] Added Ethiopian calendar to gregorian converter --- .../EthiopianDateParserConfig.scala | 53 +++++ .../dataparser/DateTimeParser.scala | 12 +- .../dataparser/EthiopianDateParser.scala | 218 ++++++++++++++++++ 3 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 core/src/main/scala/org/dbpedia/extraction/config/dataparser/EthiopianDateParserConfig.scala create mode 100644 core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala diff --git a/core/src/main/scala/org/dbpedia/extraction/config/dataparser/EthiopianDateParserConfig.scala b/core/src/main/scala/org/dbpedia/extraction/config/dataparser/EthiopianDateParserConfig.scala new file mode 100644 index 000000000..e55ab1279 --- /dev/null +++ b/core/src/main/scala/org/dbpedia/extraction/config/dataparser/EthiopianDateParserConfig.scala @@ -0,0 +1,53 @@ +package org.dbpedia.extraction.config.dataparser + +object EthiopianDateParserConfig { + val geezNumberDateMap = Map( + 1 -> "፩", + 2 -> "፪", + 3 -> "፫", + 4 -> "፬", + 5 -> "፭", + 6 -> "፮", + 7 -> "፯", + 8 -> "፰", + 9 -> "፱", + 10 -> "፲", + 11 -> "፲፩", + 12 -> "፲፪", + 13 -> "፲፫", + 14 -> "፲፬", + 15 -> "፲፭", + 16 -> "፲፮", + 17 -> "፲፯", + 18 -> "፲፰", + 19 -> "፲፱", + 20 -> "፳", + 21 -> "፳፩", + 22 -> "፳፪", + 23 -> "፳፫", + 24 -> "፳፬", + 25 -> "፳፭", + 26 -> "፳፮", + 27 -> "፳፯", + 28 -> "፳፰", + 29 -> "፳፱", + 30 -> "፴" + ) + + val monthsMap = Map( + "መስከረም" -> 1, + "ጥቅምት" -> 2, + "ኅዳር" -> 3, + "ታኅሳስ" -> 4, + "ጥር" -> 5, + "የካቲት" -> 6, + "መጋቢት" -> 7, + "ሚያዝያ" -> 8, + "ግንቦት" -> 9, + "ሰኔ" -> 10, + "ሐምሌ" -> 11, + "ነሐሴ" -> 12, + "ጳጉሜ" -> 13 + ) + +} diff --git a/core/src/main/scala/org/dbpedia/extraction/dataparser/DateTimeParser.scala b/core/src/main/scala/org/dbpedia/extraction/dataparser/DateTimeParser.scala index 792d105f9..454246bcf 100644 --- a/core/src/main/scala/org/dbpedia/extraction/dataparser/DateTimeParser.scala +++ b/core/src/main/scala/org/dbpedia/extraction/dataparser/DateTimeParser.scala @@ -24,7 +24,6 @@ class DateTimeParser ( context : @transient private val logger = Logger.getLogger(getClass.getName) // language-specific configurations - private val language = if(DateTimeParserConfig.supportedLanguages.contains(context.language.wikiCode)) context.language.wikiCode else "en" private val months = DateTimeParserConfig.monthsMap.getOrElse(language, DateTimeParserConfig.monthsMap("en")) @@ -32,6 +31,7 @@ class DateTimeParser ( context : private val cardinalityRegex = DateTimeParserConfig.cardinalityRegexMap.getOrElse(language, DateTimeParserConfig.cardinalityRegexMap("en")) private val templates = DateTimeParserConfig.templateDateMap.getOrElse(language, Map()) + private val ethiopianDateParser = new EthiopianDateParser(datatype:Datatype, strict:Boolean); // parse logic configurations override val splitPropertyNodeRegex: String = if (DataParserConfig.splitPropertyNodeRegexDateTime.contains(language)) @@ -189,7 +189,17 @@ class DateTimeParser ( context : } private def findDate(input: String) : Option[Date] = + { + + // scan for Ethiopian (geez) calendar dates + if(language == "am"){ + for(date <- ethiopianDateParser.findGeezDate(input)) + { + return Some(date) + } + } + for(date <- catchDate(input)) { return Some(date) diff --git a/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala b/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala new file mode 100644 index 000000000..a2eb321c2 --- /dev/null +++ b/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala @@ -0,0 +1,218 @@ +package org.dbpedia.extraction.dataparser +import java.util.logging.{Logger, Level} +import scala.util.matching.Regex +import org.dbpedia.extraction.config.dataparser.{ + EthiopianDateParserConfig, + DateTimeParserConfig +} +import org.dbpedia.extraction.util.{Language, Date} +import org.dbpedia.extraction.util.{GeezNumberUtils} +import org.dbpedia.extraction.ontology.datatypes.Datatype + +class EthiopianDateParser(datatype: Datatype, val strict: Boolean = false) { + require(datatype != null, "datatype != null") + @transient private val logger = Logger.getLogger(getClass.getName) + + val geezNumberParser = new GeezNumberUtils() + private val monthsMap = EthiopianDateParserConfig.monthsMap + private val monthsName = monthsMap.keys.mkString("|") + private val geezNumberDate = + EthiopianDateParserConfig.geezNumberDateMap.values.mkString("|") + + private val gregorianDateIndicator = s""".*(እ.ኤ.አ).*""".r + private val prefix = if (strict) """\s*""" else """.*?""" + private val postfix = if (strict) """\s*""" else ".*" + + // catches dd-mm-yyyy including a 13th month 21 13 2013, 21-13-2013, 21/13/2013, 21-13-2013, 21/13/2013 + private val dateRegex1: Regex = + s"""$prefix\\b(0?[1-9]|[12][0-9]|3[01])\\b[-/\\s]\\b(0?[1-9]|1[0-2]|13)\\b[-/\\s](\\d{4}|[\\u1369-\\u137C]+)$postfix""".r + + // Regex for dates containing geez characters + // catches dates like ጥቅምት-21-2013 or ጥቅምት/21/2013 or ጥቅምት 21 2013 + private val dateRegex2: Regex = + s"""$prefix($monthsName)[\\s/-](\\b(0?[1-9]|[12][0-9]|3[01])\\b)[\\s/-](\\d{4}|[\\u1369-\\u137C]+)$postfix""".r + + // catches dates dd-month-yyyy like 21-ጥቅምት-2013 or 21/ጥቅምት/2013 or 21 ጥቅምት 2013 + private val dateRegex3: Regex = + s"""$prefix(\\b(0?[1-9]|[12][0-9]|3[01])\\b)[\\s/-]($monthsName)[\\s/-](\\d{4}|[\\u1369-\\u137C]+)$postfix""".r + + // catches dates month-dd-yyyy ጥቅምት ፳፩ ፳፻፲፫ or ጥቅምት/፳፩/፳፻፲፫ or ጥቅምት ፳፩ ፳፻፲፫ mmmm-dd-yyyy + private val dateRegex4: Regex = + s"""$prefix(\\b$monthsName)[\\s/-]($geezNumberDate|0?[1-9]|[12][0-9]|3[01])[\\s/-](\\d{4}|[\\u1369-\\u137C]+)$postfix""".r + + // catches dates like ፳፩ ጥቅምት ፳፻፲፫ or ፳፩/ጥቅምት/፳፻፲፫ or 21/ጥቅምት/2013 dd-mmmm-yyyy + private val dateRegex5: Regex = + s"""$prefix(\\b$geezNumberDate|0?[1-9]|[12][0-9]|3[01])[\\s/-]($monthsName)[\\s/-](\\d{4}|[\\u1369-\\u137C]+)$postfix""".r + + def catchGeezDate(dateString: String): Option[(String, String, String)] = { + + for (dateRegex1(day, month, year) <- List(dateString)) { + return Some((year, month, day)) + } + + // Amharic month names (month-day-year) + for (dateRegex2(month, day, year) <- List(dateString)) { + return Some((year, month, day)) + + } + + // Amharic month names (day-month-year) + for (dateRegex3(day, month, year) <- List(dateString)) { + return Some((year, month, day)) + } + + // dates that contain geez/Amharic numbers (month-day-year) + for (dateRegex4(month, day, year) <- List(dateString)) { + return Some((year, month, day)) + } + + // dates that contain geez/ Amharic numbers (day-month-year) + for (dateRegex5(day, month, year) <- List(dateString)) { + return Some((year, month, day)) + } + + None + } + + def isLeapYear(year: Int): Boolean = { + return (year % 4 == 3) + } + + def isValidEthiopianCalendarDate(year: Int, month: Int, day: Int): Boolean = { + // Validate year + if (year <= 0) { + logger.log(Level.FINE, "Year must be greater than 0.") + return false + } + + // Validate month + if (month < 1 || month > 13) { + logger.log( + Level.FINE, + s"Month must be between 1 and 13. Provided month: $month." + ) + return false + } + + // Validate day + if (day < 1 || day > 30) { + logger.log( + Level.FINE, + s"Day must be between 1 and 30. Provided day: $day." + ) + return false + } + + // Validate case for Pagume (month 13 in Ethiopian Calendar) + if (month == 13) { + if (day > 6) { + logger.log( + Level.FINE, + s"Day in Pagume cannot exceed 6. Provided day: $day." + ) + return false + } + if (!isLeapYear(year) && day > 5) { + logger.log( + Level.FINE, + s"Pagume only has 5 days in non-leap years. Provided day: $day." + ) + return false + } + } + + true + } + + private def ethiopianDateToJDN(year: Int, month: Int, day: Int): Double = { + val EPOCH: Long = 1723856 + val julianDayNumber: Double = + (EPOCH + 365) + 365 * (year - 1) + (year / 4).toInt + 30 * month + day - 31 + return julianDayNumber + } + + def geezToGregorianDateConverter( + year: Int, + month: Int, + day: Int, + datatype: Datatype + ): Option[Date] = { + val JDN: Double = ethiopianDateToJDN(year, month, day) + val Q: Double = JDN + 0.5 + val Z: Long = Q.toLong + val W: Long = ((Z - 1867216.25) / 36524.25).toLong + val X: Long = (W / 4).toLong + val A: Long = Z + 1 + W - X + val B: Long = A + 1524 + val C: Long = ((B - 122.1) / 365.25).toLong + val D: Long = (365.25 * C).toLong + val E: Long = ((B - D) / 30.6001).toLong + val F: Long = (30.6001 * E).toLong + val gregorianDay: Int = (B - D - F + (Q - Z)).toInt + val gregorianMonth: Long = if (E - 1 <= 12) E - 1 else E - 13 + val gregorianYear: Long = if (month <= 2) C - 4715 else C - 4716 + + Some( + new Date( + Some(gregorianYear.toInt), + Some(gregorianMonth.toInt), + Some(gregorianDay.toInt), + datatype + ) + ) + } + + def isArabicNumeral(str: String): Boolean = { + str.forall(c => c.isDigit) + } + + def formatDate( + dateString: Option[(String, String, String)] + ): Option[(Int, Int, Int)] = { + + dateString.flatMap { case (year, month, day) => + val yearNum = + if (isArabicNumeral(year)) year.toInt + else geezNumberParser.convertGeezToArabicNumeral(year).getOrElse(0) + val monthNum = + if (isArabicNumeral(month)) month.toInt + else { + EthiopianDateParserConfig.monthsMap.getOrElse( + month, + geezNumberParser.convertGeezToArabicNumeral(month).getOrElse(0) + ) + } + val dayNum = + if (isArabicNumeral(day)) day.toInt + else geezNumberParser.convertGeezToArabicNumeral(day).getOrElse(0) + + return Some((yearNum, monthNum, dayNum)) + + } + } + + def findGeezDate(input: String): Option[Date] = { + val isGregorianDate = (gregorianDateIndicator.findFirstIn(input)).isDefined + + if (isGregorianDate) { + return None + } + + val dateString: Option[(String, String, String)] = catchGeezDate(input) + val (yearNum, monthNum, dayNum) = + formatDate(dateString).getOrElse((0, 0, 0)) + + if (!isValidEthiopianCalendarDate(yearNum, monthNum, dayNum)) { + return None + } + + for ( + date <- geezToGregorianDateConverter(yearNum, monthNum, dayNum, datatype) + ) { + + return Some(date) + } + None + + } +} From ab670c1ce7b71807616f032e7f8445ae31ce757c Mon Sep 17 00:00:00 2001 From: "Meti A. Bayissa" Date: Wed, 21 Aug 2024 05:27:55 +0300 Subject: [PATCH 04/12] Added tests for Amharic dates --- .../dataparser/DateTimeParserTest.scala | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala index b2dd49795..a48ad1b1e 100644 --- a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala +++ b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala @@ -13,7 +13,7 @@ import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class DateTimeParserTest extends FlatSpec with Matchers { - //gYear positive tests - Input is inside equivalence class + // gYear positive tests - Input is inside equivalence class "DateTimeParser" should "return gYear (2008)" in { @@ -677,6 +677,64 @@ class DateTimeParserTest extends FlatSpec with Matchers parse("ja", "xsd:gYear", "西暦2012年") should equal (Some("2012")) } +// Amharic date Test: If date is already in gregorian calendar conversion shouldnt be performed. + +"DateTimeParser" should "parse Gregorian Date 21-ጁላይ-2013 and return date 2013-07-21" in + { + parse("am", "xsd:date", "21-ጁላይ-2013 እ.ኤ.አ.") should equal (Some("2013-07-21")) + } +"DateTimeParser" should "parse Gregorian Date september 23 2000 እ.ኤ.አ. and return date 2000-09-23" in + { + parse("am", "xsd:date", "september 23 2000 እ.ኤ.አ.") should equal (Some("2000-09-23")) + } +"DateTimeParser" should "parse Gregorian Date 23 ሴፕተምበር 2013 እ.ኤ.አ. and return date 2013-09-23" in + { + parse("am", "xsd:date", "23 ሴፕተምበር 2013 እ.ኤ.አ.") should equal (Some("2013-09-23")) + } + "DataParser" should "parse Gregorian Date 23/07/2013 እ.ኤ.አ. and return date 2013-09-23" in + { + parse("am", "xsd:date", "23/07/2013 እ.ኤ.አ.") should equal (Some("2013-07-23")) + } + +// Amharic date Test: Invalid dates test +"DateTimeParser" should "not parse invalid Ethiopian Date 31/01/2024 and return None" in + { + parse("am", "xsd:date", "31 ጥቅምት 2024") should equal (None) + } +"DateTimeParser" should "not parse invalid Ethiopian Date 06/ጳጉሜ/2000 and return None" in + { + parse("am", "xsd:date", "06/ጳጉሜ/2000") should equal (None) + } +"DateTimeParser" should "not parse invalid Ethiopian Date 07/13/2003 and return None" in + { + parse("am", "xsd:date", "07/13/2003") should equal (None) + + } +// Amharic date Test: Geez calender dates should be converted to gregorian +"DateTimeParser" should "parse Ethiopian Date 21-11-2013 and return date 2020-10-31" in + { + parse("am", "xsd:date", "21-11-2013") should equal (Some("2021-07-28")) + } +"DateTimeParser" should "parse Ethiopian Date 21/ሚያዝያ/2013 and return date 2021-04-29" in + { + parse("am", "xsd:date", "21/ሚያዝያ/2013") should equal (Some("2021-04-29")) + } +"DateTimeParser" should "parse Ethiopian Date ሚያዝያ ፳፩ ፳፻ and return date 2008-04-29" in + { + parse("am", "xsd:date", "ሚያዝያ ፳፩ ፳፻") should equal (Some("2008-04-29")) + } +"DateTimeParser" should "parse Ethiopian Date ግንቦት ፳፩ 2010 and return date 2018-05-29" in + { + parse("am", "xsd:date", "ግንቦት ፳፩ 2010") should equal (Some("2018-05-29")) + } +"DateTimeParser" should "parse Ethiopian Date ፳፩ ሚያዝያ ፳፻ and return date 2008-04-29" in + { + parse("am", "xsd:date", "፳፩ ሚያዝያ ፳፻") should equal (Some("2008-04-29")) + } +"DateTimeParser" should "parse Ethiopian Date ጳጉሜ 5 ፳፻ and return date 2008-09-10" in + { + parse("am", "xsd:date", "ጳጉሜ 5 ፳፻") should equal (Some("2008-09-10")) + } private val wikiParser = WikiParser.getInstance() From 285a3815704f77d1375af3af8fe87f8e6edfa5b6 Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:42:22 +0300 Subject: [PATCH 05/12] Update core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala Co-authored-by: Ted Thibodeau Jr --- .../scala/org/dbpedia/extraction/util/GeezNumberUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala b/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala index 1f0c1328f..2c8672bca 100644 --- a/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala +++ b/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala @@ -44,7 +44,7 @@ class GeezNumberUtils { stack.toList } - // Recursively calculates and performs the calcualtion to convert geez number to arabic numerals. + // Recursively calculates and performs the calculation to convert geez numbers to Arabic numerals. def calculate(start: Int, end: Int, arr: List[Int]): Option[Int] = { if (start > end) return None if (start == end) { From 9f2be609ace2ac72d511bab15609d1349837d5c7 Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:42:39 +0300 Subject: [PATCH 06/12] Update core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala Co-authored-by: Ted Thibodeau Jr --- .../scala/org/dbpedia/extraction/util/GeezNumberUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala b/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala index 2c8672bca..3b41b2673 100644 --- a/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala +++ b/core/src/main/scala/org/dbpedia/extraction/util/GeezNumberUtils.scala @@ -28,7 +28,7 @@ class GeezNumberUtils { '፼' -> 10000 ) - // Preprocesses and converts geez scripts to arabic numeral. + // Preprocesses and converts geez scripts to Arabic numerals. def preprocess(geezStr: String): List[Int] = { val strippedGeezStr = geezStr.trim val stack = scala.collection.mutable.ListBuffer[Int]() From 0ff33af8c5b094eaf2f7b5396786efa47e62d811 Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:42:50 +0300 Subject: [PATCH 07/12] Update core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala Co-authored-by: Ted Thibodeau Jr --- .../org/dbpedia/extraction/dataparser/DateTimeParserTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala index a48ad1b1e..ab95484e5 100644 --- a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala +++ b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala @@ -710,7 +710,7 @@ class DateTimeParserTest extends FlatSpec with Matchers parse("am", "xsd:date", "07/13/2003") should equal (None) } -// Amharic date Test: Geez calender dates should be converted to gregorian +// Amharic date Test: Geez calendar dates should be converted to Gregorian "DateTimeParser" should "parse Ethiopian Date 21-11-2013 and return date 2020-10-31" in { parse("am", "xsd:date", "21-11-2013") should equal (Some("2021-07-28")) From d1870a6c2afefea69aa7fb639797fb41803fc3d6 Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:42:58 +0300 Subject: [PATCH 08/12] Update core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala Co-authored-by: Ted Thibodeau Jr --- .../org/dbpedia/extraction/dataparser/DateTimeParserTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala index ab95484e5..63bfc6f8b 100644 --- a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala +++ b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala @@ -677,7 +677,7 @@ class DateTimeParserTest extends FlatSpec with Matchers parse("ja", "xsd:gYear", "西暦2012年") should equal (Some("2012")) } -// Amharic date Test: If date is already in gregorian calendar conversion shouldnt be performed. +// Amharic date Test: If date is already in Gregorian calendar, conversion shouldn't be performed. "DateTimeParser" should "parse Gregorian Date 21-ጁላይ-2013 and return date 2013-07-21" in { From 56a7c883f493b9cf9e69212fa9eb57162f0154bd Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:43:13 +0300 Subject: [PATCH 09/12] Update core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala Co-authored-by: Ted Thibodeau Jr --- .../dbpedia/extraction/dataparser/DateTimeParserTest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala index 63bfc6f8b..557dac716 100644 --- a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala +++ b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala @@ -683,9 +683,9 @@ class DateTimeParserTest extends FlatSpec with Matchers { parse("am", "xsd:date", "21-ጁላይ-2013 እ.ኤ.አ.") should equal (Some("2013-07-21")) } -"DateTimeParser" should "parse Gregorian Date september 23 2000 እ.ኤ.አ. and return date 2000-09-23" in +"DateTimeParser" should "parse Gregorian date September 23 2000 እ.ኤ.አ. and return date 2000-09-23" in { - parse("am", "xsd:date", "september 23 2000 እ.ኤ.አ.") should equal (Some("2000-09-23")) + parse("am", "xsd:date", "September 23 2000 እ.ኤ.አ.") should equal (Some("2000-09-23")) } "DateTimeParser" should "parse Gregorian Date 23 ሴፕተምበር 2013 እ.ኤ.አ. and return date 2013-09-23" in { From 891b41f578648e2975a85762328169e0492565ef Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:45:01 +0300 Subject: [PATCH 10/12] Update core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala Co-authored-by: Ted Thibodeau Jr --- .../org/dbpedia/extraction/dataparser/EthiopianDateParser.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala b/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala index a2eb321c2..a1a8964b7 100644 --- a/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala +++ b/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala @@ -53,7 +53,6 @@ class EthiopianDateParser(datatype: Datatype, val strict: Boolean = false) { // Amharic month names (month-day-year) for (dateRegex2(month, day, year) <- List(dateString)) { return Some((year, month, day)) - } // Amharic month names (day-month-year) From 9eb47eeab7c5db0c48a68d702a6c6eeb4a884baf Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:45:23 +0300 Subject: [PATCH 11/12] Update core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala Co-authored-by: Ted Thibodeau Jr --- .../org/dbpedia/extraction/dataparser/EthiopianDateParser.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala b/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala index a1a8964b7..9539289ab 100644 --- a/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala +++ b/core/src/main/scala/org/dbpedia/extraction/dataparser/EthiopianDateParser.scala @@ -65,7 +65,7 @@ class EthiopianDateParser(datatype: Datatype, val strict: Boolean = false) { return Some((year, month, day)) } - // dates that contain geez/ Amharic numbers (day-month-year) + // dates that contain geez/Amharic numbers (day-month-year) for (dateRegex5(day, month, year) <- List(dateString)) { return Some((year, month, day)) } From ac87b85d3edcb590cfdbef6764813e955c0ff397 Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:47:50 +0300 Subject: [PATCH 12/12] Update core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala Co-authored-by: Ted Thibodeau Jr --- .../dbpedia/extraction/dataparser/DateTimeParserTest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala index 557dac716..301eca57c 100644 --- a/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala +++ b/core/src/test/scala/org/dbpedia/extraction/dataparser/DateTimeParserTest.scala @@ -687,11 +687,11 @@ class DateTimeParserTest extends FlatSpec with Matchers { parse("am", "xsd:date", "September 23 2000 እ.ኤ.አ.") should equal (Some("2000-09-23")) } -"DateTimeParser" should "parse Gregorian Date 23 ሴፕተምበር 2013 እ.ኤ.አ. and return date 2013-09-23" in +"DateTimeParser" should "parse Gregorian date 23 ሴፕተምበር 2013 እ.ኤ.አ. and return date 2013-09-23" in { parse("am", "xsd:date", "23 ሴፕተምበር 2013 እ.ኤ.አ.") should equal (Some("2013-09-23")) } - "DataParser" should "parse Gregorian Date 23/07/2013 እ.ኤ.አ. and return date 2013-09-23" in + "DataParser" should "parse Gregorian date 23/07/2013 እ.ኤ.አ. and return date 2013-09-23" in { parse("am", "xsd:date", "23/07/2013 እ.ኤ.አ.") should equal (Some("2013-07-23")) }