From fe100b055856610d35c01d1be63949ed69667c7e Mon Sep 17 00:00:00 2001 From: Abdul Mannan Date: Thu, 31 May 2018 18:58:36 +0500 Subject: [PATCH] exclude paths which do not exist --- edx/analytics/tasks/common/spark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edx/analytics/tasks/common/spark.py b/edx/analytics/tasks/common/spark.py index 06ba4febab..84817683b0 100644 --- a/edx/analytics/tasks/common/spark.py +++ b/edx/analytics/tasks/common/spark.py @@ -184,7 +184,7 @@ def get_event_log_dataframe(self, spark, *args, **kwargs): pattern=self.pattern, date_pattern=self.date_pattern, ).output() - self.path_targets = [task.path for task in path_targets] + self.path_targets = [task.path for task in path_targets if task.exists()] dataframe = spark.read.format('json').load(self.path_targets, schema=self.get_log_schema()) dataframe = dataframe.filter(dataframe['time'].isNotNull()) \ .withColumn('event_date', date_format(to_date(dataframe['time']), 'yyyy-MM-dd'))