Skip to content

Commit

Permalink
HIVE-28637: Fix the issue of datasize becoming negative due to overfl…
Browse files Browse the repository at this point in the history
…ow during addition (yijiuqi, reviewed by Seonggon Namgung, Shohei Okumiya)
  • Loading branch information
yiqijiu authored Dec 11, 2024
1 parent 3483bc3 commit 4218877
Show file tree
Hide file tree
Showing 3 changed files with 416 additions and 4 deletions.
8 changes: 4 additions & 4 deletions ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,14 @@ public Statistics clone() {
}

public void addBasicStats(Statistics stats) {
dataSize += stats.dataSize;
numRows += stats.numRows;
dataSize = StatsUtils.safeAdd(dataSize, stats.dataSize);
numRows = StatsUtils.safeAdd(numRows, stats.numRows);
basicStatsState = inferColumnStatsState(basicStatsState, stats.basicStatsState);
}

@Deprecated
public void addToDataSize(long rds) {
dataSize += rds;
dataSize = StatsUtils.safeAdd(dataSize, rds);
}

public void setColumnStats(Map<String, ColStatistics> colStats) {
Expand Down Expand Up @@ -255,7 +255,7 @@ public void addToColumnStats(List<ColStatistics> colStats) {
if (columnStats.containsKey(key) && columnStats.get(key) != null) {
updatedCS = columnStats.get(key);
updatedCS.setAvgColLen(Math.max(updatedCS.getAvgColLen(), cs.getAvgColLen()));
updatedCS.setNumNulls(updatedCS.getNumNulls() + cs.getNumNulls());
updatedCS.setNumNulls(StatsUtils.safeAdd(updatedCS.getNumNulls(), cs.getNumNulls()));
updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint()));
columnStats.put(key, updatedCS);
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
create table explain_multiple_ptf_big_table
(
key1 string,
value_str1 string,
key3 string
);
create table explain_multiple_ptf_big_table2
(
key21 string,
value_str21 string,
key23 string
);
alter table explain_multiple_ptf_big_table
update statistics set('numRows' = '4611686036854775807',
'rawDataSize' = '922337203685477500');
alter table explain_multiple_ptf_big_table2
update statistics set('numRows' = '4611686036854775807',
'rawDataSize' = '9223372036854775800');
explain
select *,
row_number() over (partition by key order by key2 desc) rn,
row_number() over (partition by key2 order by key desc) rn2,
max(value_str) over (partition by key2 order by key desc) max1,
max(value_str) over (partition by key order by key2 desc) max3,
min(value_str) over (partition by key2 order by key desc) min1,
min(value_str) over (partition by key order by key2 desc) min3,
last_value(value_str) over (partition by key) lv,
first_value(value_str) over (partition by key2) fv,
max(value_str) over (partition by key) fv21
from (select key1 key, value_str1 value_str, key3 key2
from explain_multiple_ptf_big_table
union all
select key21 key, value_str21 value_str, key23 key2
from explain_multiple_ptf_big_table2) a1;

Loading

0 comments on commit 4218877

Please sign in to comment.