From 18e0de06e051224966d8591dba850f35024e52eb Mon Sep 17 00:00:00 2001 From: Laszlo Bodor Date: Thu, 7 Nov 2024 18:18:13 +0100 Subject: [PATCH 1/2] HIVE-28626: Display MoveTask/StatsTask duration on the query summary Change-Id: Ib37772938db5fdecf6507cdd43717a89f1e6250a --- .../org/apache/hadoop/hive/ql/log/PerfLogger.java | 2 ++ .../org/apache/hadoop/hive/ql/exec/MoveTask.java | 13 +++++++------ .../org/apache/hadoop/hive/ql/exec/StatsTask.java | 7 +++++++ .../org/apache/hadoop/hive/ql/metadata/Hive.java | 2 +- .../apache/hadoop/hive/ql/stats/BasicStatsTask.java | 1 - 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java index 530a114b9503..d979766e0a62 100644 --- a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java +++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java @@ -101,6 +101,8 @@ public class PerfLogger { public static final String LOAD_PARTITION = "LoadPartition"; public static final String LOAD_DYNAMIC_PARTITIONS = "LoadDynamicPartitions"; + public static final String STATS_TASK = "StatsTask"; + public static final String HIVE_GET_TABLE = "getTablesByType"; public static final String HIVE_GET_DATABASE = "getDatabase"; public static final String HIVE_GET_DATABASE_2 = "getDatabase2"; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 2721977d6f9b..e88b83505b46 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -104,6 +104,7 @@ public class MoveTask extends Task implements Serializable { private static final long serialVersionUID = 1L; private static transient final Logger LOG = LoggerFactory.getLogger(MoveTask.class); + private final PerfLogger perfLogger = SessionState.getPerfLogger(); public MoveTask() { super(); @@ -169,7 +170,6 @@ public void flattenUnionSubdirectories(Path sourcePath) throws HiveException { private void moveFile(Path sourcePath, Path targetPath, boolean isDfsDir) throws HiveException { try { - PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.perfLogBegin("MoveTask", PerfLogger.FILE_MOVES); String mesg = "Moving data to " + (isDfsDir ? "" : "local ") + "directory " @@ -569,7 +569,8 @@ public int execute() { } releaseLocks(tbd); } - + long moveFilesDuration = perfLogger.getDuration(PerfLogger.FILE_MOVES); + console.printInfo(String.format("Time taken to move files:\t %d ms", moveFilesDuration)); return 0; } catch (HiveException he) { return processHiveException(he); @@ -710,8 +711,8 @@ private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, pushFeed(FeedType.DYNAMIC_PARTITIONS, dp.values()); } - String loadTime = "\t Time taken to load dynamic partitions: " + - (System.currentTimeMillis() - startTime)/1000.0 + " seconds"; + String loadTime = String.format("Time taken to load dynamic partitions:\t %.3f seconds", + (System.currentTimeMillis() - startTime) / 1000.0); console.printInfo(loadTime); LOG.info(loadTime); @@ -758,8 +759,8 @@ private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, } LOG.info("Loading partition " + entry.getKey()); } - console.printInfo("\t Time taken for adding to write entity : " + - (System.currentTimeMillis() - startTime)/1000.0 + " seconds"); + console.printInfo(String.format("Time taken for adding to write entity:\t %.3f seconds", + (System.currentTimeMillis() - startTime) / 1000.0)); dc = null; // reset data container to prevent it being added again. return dc; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index 9e0583468080..f322e1a0ce4d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -28,6 +28,8 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.session.SessionState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.TaskQueue; @@ -55,6 +57,7 @@ public class StatsTask extends Task implements Serializable { private static final long serialVersionUID = 1L; private static transient final Logger LOG = LoggerFactory.getLogger(StatsTask.class); + private final PerfLogger perfLogger = SessionState.getPerfLogger(); public StatsTask() { super(); @@ -94,6 +97,7 @@ public int execute() { } int ret = 0; try { + perfLogger.perfLogBegin("StatsTask", PerfLogger.STATS_TASK); if (work.isFooterScan()) { work.getBasicStatsNoJobWork().setPartitions(work.getPartitions()); @@ -113,6 +117,9 @@ public int execute() { LOG.error("Failed to run stats task", e); setException(e); return 1; + } finally { + perfLogger.perfLogEnd("StatsTask", PerfLogger.STATS_TASK); + console.printInfo(String.format("StatsTask took %d", perfLogger.getDuration(PerfLogger.STATS_TASK))); } return 0; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index f447aacdf7d7..9b51956ce23d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -5047,7 +5047,7 @@ private static void deleteAndRename(FileSystem destFs, Path destFile, FileStatus try { // rename cannot overwrite non empty destination directory, so deleting the destination before renaming. destFs.delete(destFile); - LOG.info("Deleted destination file" + destFile.toUri()); + LOG.info("Deleted destination file: " + destFile.toUri()); } catch (FileNotFoundException e) { // no worries } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index 4abb77641fe7..e1927ce570c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -102,7 +102,6 @@ public BasicStatsTask(HiveConf conf, BasicStatsWork work) { @Override public int process(Hive db, Table tbl) throws Exception { - LOG.info("Executing stats task"); table = tbl; return aggregateStats(db, tbl); From 692a5c33a79e7c7e69d8d0b2c28e252a4ba1729c Mon Sep 17 00:00:00 2001 From: Laszlo Bodor Date: Sun, 24 Nov 2024 19:20:58 +0100 Subject: [PATCH 2/2] PR comments --- ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java | 9 +++++---- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index e88b83505b46..8d5385496d25 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -78,6 +78,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.util.DirectionUtils; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -686,7 +687,7 @@ private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, Map dps = Utilities.getFullDPSpecs(conf, dpCtx, dynamicPartitionSpecs); console.printInfo(System.getProperty("line.separator")); - long startTime = System.currentTimeMillis(); + long startTime = Time.monotonicNow(); // load the list of DP partitions and return the list of partition specs // TODO: In a follow-up to HIVE-1361, we should refactor loadDynamicPartitions // to use Utilities.getFullDPSpecs() to get the list of full partSpecs. @@ -712,7 +713,7 @@ private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, } String loadTime = String.format("Time taken to load dynamic partitions:\t %.3f seconds", - (System.currentTimeMillis() - startTime) / 1000.0); + (Time.monotonicNow() - startTime) / 1000.0); console.printInfo(loadTime); LOG.info(loadTime); @@ -721,7 +722,7 @@ private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, " To turn off this error, set hive.error.on.empty.partition=false."); } - startTime = System.currentTimeMillis(); + startTime = Time.monotonicNow(); // for each partition spec, get the partition // and put it to WriteEntity for post-exec hook for(Map.Entry, Partition> entry : dp.entrySet()) { @@ -760,7 +761,7 @@ private DataContainer handleDynParts(Hive db, Table table, LoadTableDesc tbd, LOG.info("Loading partition " + entry.getKey()); } console.printInfo(String.format("Time taken for adding to write entity:\t %.3f seconds", - (System.currentTimeMillis() - startTime) / 1000.0)); + (Time.monotonicNow() - startTime) / 1000.0)); dc = null; // reset data container to prevent it being added again. return dc; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 9b51956ce23d..5a52a6e5703f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -5047,7 +5047,7 @@ private static void deleteAndRename(FileSystem destFs, Path destFile, FileStatus try { // rename cannot overwrite non empty destination directory, so deleting the destination before renaming. destFs.delete(destFile); - LOG.info("Deleted destination file: " + destFile.toUri()); + LOG.info("Deleted destination file: {}", destFile.toUri()); } catch (FileNotFoundException e) { // no worries }