From cf0e88c9d1ab6c90d0067ac74eb260f3bcfc6ff3 Mon Sep 17 00:00:00 2001 From: Thanh Lam Date: Thu, 4 Mar 2021 16:02:00 -0500 Subject: [PATCH 1/3] CSM BDS fixes related to date-time format issues #992 #995 --- csm_big_data/logstash/config/logstash.conf | 12 ++++++++++++ csm_big_data/python/findJobKeys.py | 2 +- csm_big_data/python/findJobMetrics.py | 2 +- csm_big_data/python/findJobTimeRange.py | 4 ++-- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/csm_big_data/logstash/config/logstash.conf b/csm_big_data/logstash/config/logstash.conf index 3b4a7984e..9f18e90d6 100644 --- a/csm_big_data/logstash/config/logstash.conf +++ b/csm_big_data/logstash/config/logstash.conf @@ -94,6 +94,18 @@ else if "ras" in [tags] and "csm" in [tags] { add_field => { "type" => "log-ras" } } +### Use logstash gsub filter plugin to process input data before sending to elastic index. +### For the two fields: data.begin_time and data.history.end_time, the blank is replaced +### with a letter T. Note: these fields have type "date" that requires the valid format. +### Also note: No change to csm_transaction.log with this process. + + mutate { + gsub => [ + "[data][begin_time]", " ", "T", + "[data][history][end_time]", " ", "T" + ] + } + date { match => ["time_stamp", "ISO8601","YYYY-MM-dd HH:mm:ss.SSS" ] target => "time_stamp" diff --git a/csm_big_data/python/findJobKeys.py b/csm_big_data/python/findJobKeys.py index 76d4379e5..b69694600 100755 --- a/csm_big_data/python/findJobKeys.py +++ b/csm_big_data/python/findJobKeys.py @@ -125,7 +125,7 @@ def main(args): # --------------------------------------------------------------------------------------------- # TODO Add a utility script to manage this. - date_format= '%Y-%m-%d %H:%M:%S.%f' + date_format= '%Y-%m-%dT%H:%M:%S.%f' print_format='%Y-%m-%d %H:%M:%S:%f' search_format='epoch_millis' diff --git a/csm_big_data/python/findJobMetrics.py b/csm_big_data/python/findJobMetrics.py index e20e2bed0..724c3c8c4 100755 --- a/csm_big_data/python/findJobMetrics.py +++ b/csm_big_data/python/findJobMetrics.py @@ -120,7 +120,7 @@ def main(args): } # --------------------------------------------------------------------------------------------- - date_format= '%Y-%m-%d %H:%M:%S.%f' + date_format= '%Y-%m-%dT%H:%M:%S.%f' print_format='%Y-%m-%d %H:%M:%S:%f' search_format='epoch_millis' diff --git a/csm_big_data/python/findJobTimeRange.py b/csm_big_data/python/findJobTimeRange.py index 6666228ed..2970dc64a 100755 --- a/csm_big_data/python/findJobTimeRange.py +++ b/csm_big_data/python/findJobTimeRange.py @@ -114,11 +114,11 @@ def main(args): if len(hits) > 0 : tr_data = cast.deep_get( hits[0], "_source", "data") - date_format= '%Y-%m-%d %H:%M:%S.%f' + date_format= '%Y-%m-%dT%H:%M:%S.%f' print_format='%Y-%m-%d.%H:%M:%S:%f' search_format='"yyyy-MM-dd HH:mm:ss:SSS"' - start_time=datetime.strptime(tr_data["begin_time"], '%Y-%m-%d %H:%M:%S.%f') + start_time=datetime.strptime(tr_data["begin_time"], '%Y-%m-%dT%H:%M:%S.%f') start_time='{0}'.format(start_time.strftime(print_format)[:-3]) # If a history is present end_time is end_time, otherwise it's now. From 88e19b5cd3a408989e71c626541cb3ab6154ac62 Mon Sep 17 00:00:00 2001 From: Thanh Lam Date: Tue, 9 Mar 2021 17:17:27 -0500 Subject: [PATCH 2/3] Adding a fix for time range query --- csm_big_data/python/cast_helper.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/csm_big_data/python/cast_helper.py b/csm_big_data/python/cast_helper.py index 4c1403f7e..3e7aae0dc 100755 --- a/csm_big_data/python/cast_helper.py +++ b/csm_big_data/python/cast_helper.py @@ -48,7 +48,8 @@ DATE_FORMAT = '(\d{4})-(\d{1,2})-(\d{1,2})[ \.T]*(\d{0,2}):{0,1}(\d{0,2}):{0,1}(\d{0,2})' DATE_FORMAT_PRINT = '%Y-%m-%d %H:%M:%S' #TIME_SEARCH_FORMAT = 'yyyy-MM-dd HH:mm:ss' -TIME_SEARCH_FORMAT = "epoch_millis" +#TIME_SEARCH_FORMAT = "epoch_millis" +TIME_SEARCH_FORMAT = "strict_date_optional_time_nanos" USER_JOB_FIELDS=["data.primary_job_id","data.secondary_job_id", "data.allocation_id", "data.user_name", "data.begin_time", "data.history.end_time", "data.state"] @@ -156,8 +157,11 @@ def build_time_range(start_time, end_time, for a record to be considered "in range". ''' # Build the time range - start_time = convert_timestamp(start_time) - end_time = convert_timestamp(end_time) + # UPDATE: convert_timestamp() returns time in epoch_millis format which is not needed + # when TIME_SEARCH_FORMAT is changed to strict_date_optional_time_nanos. + # Following two lines are commented out. + ##start_time = convert_timestamp(start_time) + ##end_time = convert_timestamp(end_time) if start_time and end_time: # Build the time range. From b936eeb078c83e524ba48b72ee83db4fde62d62b Mon Sep 17 00:00:00 2001 From: Thanh Lam Date: Tue, 23 Mar 2021 16:40:18 -0400 Subject: [PATCH 3/3] Commented out convert_timestamp --- csm_big_data/python/cast_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csm_big_data/python/cast_helper.py b/csm_big_data/python/cast_helper.py index 3e7aae0dc..a0911556b 100755 --- a/csm_big_data/python/cast_helper.py +++ b/csm_big_data/python/cast_helper.py @@ -218,8 +218,8 @@ def build_timestamp_range( start_time, end_time, field="@timestamp"): ''' # Build the time range - start_time = convert_timestamp(start_time) - end_time = convert_timestamp(end_time) + #start_time = convert_timestamp(start_time) + #end_time = convert_timestamp(end_time) # Build the time range. target=[]