Skip to content

Commit

Permalink
[Rust]fix hash value to spark_murmur3 (#385)
Browse files Browse the repository at this point in the history
* fix hash value to spark_murmur3

Signed-off-by: zenghua <[email protected]>

* fix clippy

Signed-off-by: zenghua <[email protected]>

---------

Signed-off-by: zenghua <[email protected]>
Co-authored-by: zenghua <[email protected]>
  • Loading branch information
Ceng23333 and zenghua authored Jan 8, 2024
1 parent 99cd804 commit c97f58d
Show file tree
Hide file tree
Showing 10 changed files with 739 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
package org.apache.spark.sql.lakesoul.commands

import com.dmetasoul.lakesoul.tables.LakeSoulTable
import org.apache.spark.sql.catalyst.expressions.Murmur3HashFunction
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.lakesoul.catalog.LakeSoulCatalog
import org.apache.spark.sql.lakesoul.sources.LakeSoulSQLConf
import org.apache.spark.sql.lakesoul.test.{LakeSoulTestBeforeAndAfterEach, LakeSoulTestSparkSession, LakeSoulTestUtils}
import org.apache.spark.sql.test.{SharedSparkSession, TestSparkSession}
import org.apache.spark.sql.types.DataTypes.{BooleanType, DoubleType, FloatType, IntegerType, ShortType, StringType, createArrayType}
import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SparkSession}
import org.apache.spark.unsafe.types.UTF8String
import org.junit.runner.RunWith
import org.scalatestplus.junit.JUnitRunner

Expand Down Expand Up @@ -770,4 +773,42 @@ class UpsertSuiteBase extends QueryTest
}
}

test("hash value") {
sql("select hash(1), hash(2), hash(3), hash(4)").show()

sql("select hash(cast(1 as long)), hash(cast(2 as long)), hash(cast(3 as long)), hash(cast(4 as long))").show()
println(java.lang.Float.floatToIntBits(1.0f))
println(java.lang.Double.doubleToLongBits(1.0d))

sql("select hash(1.0), hash(2.0), hash(3.0), hash(4.0)").show()

sql("select hash(cast(1.0 as double)), hash(cast(2.0 as double)), hash(cast(3.0 as double)), hash(cast(4.0 as double))").show()

sql("select hash('1'), hash('2'), hash('3'), hash('4')").show()


sql("select hash(array(cast(49 as BYTE)))").show()
//0
sql("select hash('2','22'), hash('3', '32'), hash('1', '1')").show()

//1
sql("select hash('1','12'), hash('4', '42'), hash('2', '2')").show()

println("bool false: " + Murmur3HashFunction.hash(false, BooleanType, 42))
println("string 321: " + Murmur3HashFunction.hash(UTF8String.fromString("321"), StringType, 42))
println("bytes [1]: " + Murmur3HashFunction.hash(Array('1'.toByte), createArrayType(ShortType), 42))
println("byte 1: " + Murmur3HashFunction.hash('1'.toByte, ShortType, 42))
println("float 1.0: " + Murmur3HashFunction.hash(1.0f, FloatType, 42))
println("float -0.0 :" + Murmur3HashFunction.hash(-0.0f, FloatType, 42))
println("float 0.0 :" + Murmur3HashFunction.hash(0.0f, FloatType, 42))
println("double -0.0 :" + Murmur3HashFunction.hash(-0.0, DoubleType, 42))
println("double 0.0 :" + Murmur3HashFunction.hash(0.0, DoubleType, 42))
println("integer 1065353216: " + Murmur3HashFunction.hash(1065353216, IntegerType, 42))
println("string 321,321: " + Murmur3HashFunction.hash(UTF8String.fromString("321"), StringType,
Murmur3HashFunction.hash(UTF8String.fromString("321"), StringType, 42)))
println("string 1,12: " + Murmur3HashFunction.hash(UTF8String.fromString("12"), StringType,
Murmur3HashFunction.hash(UTF8String.fromString("1"), StringType, 42)))
println("string 2,22: " + Murmur3HashFunction.hash(UTF8String.fromString("22"), StringType,
Murmur3HashFunction.hash(UTF8String.fromString("2"), StringType, 42)))
}
}
2 changes: 1 addition & 1 deletion rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ uuid = { version = "1.4.0", features = ["v4", "fast-rng", "macro-diagnostics"]}
serde = { version = "1.0", features = ["derive", "std", "rc"]}
rand = "^0.8"
bytes = "1.4.0"
half = "^2.1"
57 changes: 57 additions & 0 deletions rust/lakesoul-datafusion/src/test/hash_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// SPDX-FileCopyrightText: 2023 LakeSoul Contributors
//
// SPDX-License-Identifier: Apache-2.0

mod hash_tests {
use lakesoul_io::hash_utils::{HashValue, HASH_SEED};

#[test]
fn hash_value_test() {

// let hash = "321".hash_one(HASH_SEED) as i32;
// dbg!(hash);
assert_eq!(1.hash_one(HASH_SEED) as i32, -559580957);
assert_eq!(2.hash_one(HASH_SEED) as i32, 1765031574);
assert_eq!(3.hash_one(HASH_SEED) as i32, -1823081949);
assert_eq!(4.hash_one(HASH_SEED) as i32, -397064898);

assert_eq!(1u64.hash_one(HASH_SEED) as i32, -1712319331);
assert_eq!(2u64.hash_one(HASH_SEED) as i32, -797927272);
assert_eq!(3u64.hash_one(HASH_SEED) as i32, 519220707);
assert_eq!(4u64.hash_one(HASH_SEED) as i32, 1344313940);


assert_eq!(1.0f32.hash_one(HASH_SEED) as i32, -466301895);
assert_eq!(2.0f32.hash_one(HASH_SEED) as i32, 1199227445);
assert_eq!(3.0f32.hash_one(HASH_SEED) as i32, 1710391653);
assert_eq!(4.0f32.hash_one(HASH_SEED) as i32, -1959694433);

assert_eq!(1.0.hash_one(HASH_SEED) as i32, -460888942);
assert_eq!(2.0.hash_one(HASH_SEED) as i32, -2030303457);
assert_eq!(3.0.hash_one(HASH_SEED) as i32, 1075969934);
assert_eq!(4.0.hash_one(HASH_SEED) as i32, 1290556682);

assert_eq!("1".hash_one(HASH_SEED) as i32, 1625004744);
assert_eq!("2".hash_one(HASH_SEED) as i32, 870267989);
assert_eq!("3".hash_one(HASH_SEED) as i32, -1756013582);
assert_eq!("4".hash_one(HASH_SEED) as i32, -2142269034);

assert_eq!("321".hash_one("321".hash_one(HASH_SEED)) as i32, -218318595);

assert_eq!("12".hash_one("1".hash_one(HASH_SEED)) as i32, 891492135);

assert_eq!("22".hash_one("2".hash_one(HASH_SEED)) as i32, 1475972200);

assert_eq!(0.0f32.hash_one(HASH_SEED) as i32, 933211791);
assert_eq!((-0.0f32).hash_one(HASH_SEED) as i32, 933211791);
assert_eq!(0.0.hash_one(HASH_SEED) as i32, -1670924195);
assert_eq!((-0.0).hash_one(HASH_SEED) as i32, -1670924195);
assert_eq!(49u8.hash_one(HASH_SEED) as i32, 766678906);
assert_eq!(49.hash_one(HASH_SEED) as i32, 766678906);
assert_eq!(false.hash_one(HASH_SEED) as i32, 933211791);

assert_eq!(1065353216.hash_one(HASH_SEED) as i32, -466301895);

}

}
1 change: 1 addition & 0 deletions rust/lakesoul-datafusion/src/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use lakesoul_metadata::MetaDataClient;

mod insert_tests;
mod upsert_tests;
mod hash_tests;
// mod compaction_tests;
// mod streaming_tests;

Expand Down
3 changes: 2 additions & 1 deletion rust/lakesoul-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ serde_json = { workspace = true }
log = { workspace = true }
proto = { path = "../proto" }
parking_lot = "0.12.1"
ahash = "0.8.6"

half = { workspace = true }


[features]
Expand Down
Loading

0 comments on commit c97f58d

Please sign in to comment.