diff --git a/flink-cyber/flink-stellar/pom.xml b/flink-cyber/flink-stellar/pom.xml index b153e0a34..fbaa37447 100644 --- a/flink-cyber/flink-stellar/pom.xml +++ b/flink-cyber/flink-stellar/pom.xml @@ -77,11 +77,6 @@ org.adrianwalker multiline-string - - com.trendmicro - tlsh - 3.7.1 - org.apache.commons commons-math3 @@ -252,6 +247,12 @@ ${global_hamcrest_version} test + + org.junit.jupiter + junit-jupiter + ${jupiter.junit.version} + test + org.junit.jupiter junit-jupiter-api diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/BitPairsTable.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/BitPairsTable.java new file mode 100644 index 000000000..cd24d1553 --- /dev/null +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/BitPairsTable.java @@ -0,0 +1,47 @@ +package org.apache.metron.stellar.common.utils.hashing.tlsh; + +class BitPairsTable { + + private static final int BIT_PAIRS_DIFF_TABLE_SIZE = 256; + + private final int[][] table; + + BitPairsTable() { + this.table = generateDefaultBitPairsDiffTable(); + } + + private static int[][] generateDefaultBitPairsDiffTable() { + int[][] result = new int[BIT_PAIRS_DIFF_TABLE_SIZE][BIT_PAIRS_DIFF_TABLE_SIZE]; + + for (int i = 0; i < BIT_PAIRS_DIFF_TABLE_SIZE; i++) { + for (int j = 0; j < BIT_PAIRS_DIFF_TABLE_SIZE; j++) { + int x = i; + int y = j; + int diff = 0; + + for (int z = 0; z < 4; z++) { + int d = Math.abs(x % 4 - y % 4); + + if (d == 3) { + diff += d * 2; + } else { + diff += d; + } + + if (z < 3) { + x /= 4; + y /= 4; + } + } + + result[i][j] = diff; + } + } + + return result; + } + + public int getValue(int row, int column) { + return table[row][column]; + } +} diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/SlidingWindow.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/SlidingWindow.java new file mode 100644 index 000000000..bdf7786e8 --- /dev/null +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/SlidingWindow.java @@ -0,0 +1,39 @@ +package org.apache.metron.stellar.common.utils.hashing.tlsh; + + +import java.util.function.IntUnaryOperator; +import java.util.stream.IntStream; + +public class SlidingWindow { + public static final int DEFAULT_SIZE = 5; + private final byte[] window; + private int byteCount = 0; + + SlidingWindow() { + this.window = new byte[DEFAULT_SIZE]; + } + + public void put(final byte value) { + int cursor = byteCount % window.length; + window[cursor] = value; + byteCount++; + } + + public int[] getWindow() { + final int startPosition = (byteCount - 1) % window.length; + final IntUnaryOperator reverseIterate = i -> i == 0 ? window.length - 1 : i - 1; + final IntUnaryOperator mapper = i -> window[i] & 0xFF; + return IntStream.iterate(startPosition, reverseIterate) + .limit(window.length) + .map(mapper) + .toArray(); + } + + public int getByteCount() { + return byteCount; + } + + public int getWindowSize() { + return window.length; + } +} diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSH.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSH.java index 8c6d17a51..4d6aa6455 100644 --- a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSH.java +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSH.java @@ -17,42 +17,87 @@ */ package org.apache.metron.stellar.common.utils.hashing.tlsh; -import com.trendmicro.tlsh.BucketOption; -import com.trendmicro.tlsh.ChecksumOption; -import com.trendmicro.tlsh.Tlsh; -import com.trendmicro.tlsh.TlshCreator; +import java.nio.ByteBuffer; -import java.util.Optional; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHUtil.swapNibble; /** * The abstraction around interacting with TLSH. */ public class TLSH { - private TlshCreator creator; - public TLSH(BucketOption bucketOption, ChecksumOption checksumOption) { - creator = new TlshCreator(bucketOption, checksumOption); - } - public String apply(byte[] data, boolean force) { - try { - creator.update(data); - return creator.getHash(force).getEncoded(); - } finally { - creator.reset(); + /** + * The checksum bytes. + */ + private final int[] checksum; + /** + * The buckets bytes. + */ + private final int[] codes; + /** + * The encoded length value. + */ + private final int lValue; + /** + * The q1 ratio. + */ + private final int q1Ratio; + /** + * The q2 ratio. + */ + private final int q2Ratio; + + + public TLSH(int[] checksum, int[] codes, int lValue, int q1, int q2) { + this.checksum = checksum; + this.codes = codes; + this.lValue = lValue; + this.q1Ratio = q1; + this.q2Ratio = q2; + } + + + public String getHash() { + return TLSHUtil.bytesToHex(getHexBytes()); + } + + public int[] getChecksum() { + return checksum; + } + + public int[] getCodes() { + return codes; } - } - public static int distance(String hash1, String hash2, Optional includeLength) { - if (hash1 == null || hash2 == null) { - return -1; + public int getlValue() { + return lValue; } - if (hash1.equals(hash2)) { - return 0; + public int getQ1Ratio() { + return q1Ratio; } - Tlsh t1 = Tlsh.fromTlshStr(hash1); - Tlsh t2 = Tlsh.fromTlshStr(hash2); - return t1.totalDiff(t2, includeLength.orElse(false)); - } + public int getQ2Ratio() { + return q2Ratio; + } + + public byte[] getHexBytes() { + final ByteBuffer buf = ByteBuffer.allocate(checksum.length + 2 + codes.length); + for (final int c : checksum) { + buf.put((byte) swapNibble(c)); + } + buf.put((byte) swapNibble(lValue)); + buf.put((byte) (q1Ratio << 4 | q2Ratio)); + for (int i = codes.length - 1; i >= 0; i--) { + buf.put((byte) codes[i]); + } + buf.flip(); + if (buf.hasArray() && 0 == buf.arrayOffset()) { + return buf.array(); + } else { + final byte[] hash = new byte[buf.remaining()]; + buf.get(hash); + return hash; + } + } } diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHBuilder.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHBuilder.java new file mode 100644 index 000000000..f9f88a85b --- /dev/null +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHBuilder.java @@ -0,0 +1,208 @@ +package org.apache.metron.stellar.common.utils.hashing.tlsh; + + +import java.nio.ByteBuffer; +import java.util.Arrays; + +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.T0; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.T11; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.T13; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.T2; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.T3; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.T5; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.T7; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHUtil.bucketMapping; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHUtil.fastBucketMapping; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHUtil.hexToBytes; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHUtil.lCapturing; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHUtil.swapNibble; + +public class TLSHBuilder { + private SlidingWindow slidingWindow; + private final CHECKSUM_OPTION checksumOption; + private final BUCKET_OPTION bucketOption; + private final int[] checksum; + private final int[] aBucket; + + + public TLSHBuilder() { + this(CHECKSUM_OPTION.CHECKSUM_1, BUCKET_OPTION.BUCKET_128); + } + + public TLSHBuilder(CHECKSUM_OPTION checksumOption, BUCKET_OPTION bucketOption) { + this.slidingWindow = new SlidingWindow(); + this.checksumOption = checksumOption; + this.bucketOption = bucketOption; + this.checksum = new int[checksumOption.getChecksumSize()]; + this.aBucket = new int[256]; + } + + public TLSH getTLSH(final byte[] bytes) { + return getTLSH(ByteBuffer.wrap(bytes)); + } + + public TLSH getTLSH(final ByteBuffer buffer) { + fill(buffer); + final int[] sortedCopy = Arrays.copyOf(aBucket, bucketOption.getBucketSize()); + Arrays.sort(sortedCopy); + + final int quartile = bucketOption.getBucketSize() / 4; + final int p1 = quartile - 1; + final int q1 = sortedCopy[p1]; + final int q2 = sortedCopy[p1 + quartile]; + final int q3 = sortedCopy[p1 + 2 * quartile]; + final int lValue = lCapturing(slidingWindow.getByteCount()); + final int q1ratio = (int) (q1 * 100.0f / q3) & 0x0F; + final int q2ratio = (int) (q2 * 100.0f / q3) & 0x0F; + + final int[] compressedBucket = compress(aBucket, q3, q2, q1); + + return new TLSH(checksum.clone(), compressedBucket, lValue, q1ratio, q2ratio); + } + + public TLSH fromHex(String hashHex) { + return fromHex(hexToBytes(hashHex)); + } + + public TLSH fromHex(byte[] hash) { + final int bucketCount; + final int checksumLength; + switch (hash.length) { + case 35: + bucketCount = 128; + checksumLength = 1; + break; + case 37: + bucketCount = 128; + checksumLength = 3; + break; + case 67: + bucketCount = 256; + checksumLength = 1; + break; + case 69: + bucketCount = 256; + checksumLength = 3; + break; + default: + throw new IllegalArgumentException( + String.format("Illegal hash buffer length: %d, must be one of 35,37,67,69", hash.length)); + } + final ByteBuffer buf = ByteBuffer.wrap(hash); + final int[] checksum = new int[checksumLength]; + for (int i = 0; i < checksum.length; i++) { + checksum[i] = swapNibble(buf.get() & 0xFF); + } + final int lValue = swapNibble(buf.get() & 0xFF); + final int qRatio = buf.get() & 0xFF; + final int q1Ratio = qRatio >> 4; + final int q2Ratio = qRatio & 0x0F; + final int[] codes = new int[bucketCount / 8 * 2]; + for (int i = 0; i < codes.length; i++) { + codes[codes.length - 1 - i] = buf.get() & 0xFF; + } + return new TLSH(checksum, codes, lValue, q1Ratio, q2Ratio); + } + + private void fill(ByteBuffer buffer) { + if (slidingWindow.getByteCount() == 0) { + while (slidingWindow.getByteCount() < slidingWindow.getWindowSize() - 1 && buffer.hasRemaining()) { + slidingWindow.put(buffer.get()); + if (!buffer.hasRemaining()) { + return; + } + } + } + while (buffer.hasRemaining()) { + slidingWindow.put(buffer.get()); + int[] window = slidingWindow.getWindow(); + processChecksum(window); + processBuckets(window); + } + } + + private int[] compress(int[] buckets, long q3, long q2, long q1) { + final int codeSize = bucketOption.getBucketSize() / 4; + final int[] result = new int[codeSize]; + for (int i = 0; i < codeSize; i++) { + int h = 0; + for (int j = 0; j < 4; j++) { + final long k = buckets[4 * i + j]; + if (q3 < k) { + h += 3 << j * 2; + } else if (q2 < k) { + h += 2 << j * 2; + } else if (q1 < k) { + h += 1 << j * 2; + } + } + result[i] = h; + } + return result; + } + + private void processBuckets(int[] window) { + aBucket[fastBucketMapping(T2, window[0], window[1], window[2])]++; + aBucket[fastBucketMapping(T3, window[0], window[1], window[3])]++; + aBucket[fastBucketMapping(T5, window[0], window[2], window[3])]++; + + aBucket[fastBucketMapping(T7, window[0], window[2], window[4])]++; + aBucket[fastBucketMapping(T11, window[0], window[1], window[4])]++; + aBucket[fastBucketMapping(T13, window[0], window[3], window[4])]++; + } + + private void processChecksum(int[] window) { + if (checksumOption == CHECKSUM_OPTION.CHECKSUM_1) { + checksum[0] = fastBucketMapping(T0, window[0], window[1], checksum[0]); + } else { + checksum[0] = fastBucketMapping(T0, window[0], window[1], checksum[0]); + checksum[1] = bucketMapping(checksum[0], window[0], window[1], checksum[1]); + checksum[2] = bucketMapping(checksum[1], window[0], window[1], checksum[2]); + } + } + + public void clean() { + slidingWindow = new SlidingWindow(); + Arrays.fill(checksum, 0); + Arrays.fill(aBucket, 0); + } + + + public enum CHECKSUM_OPTION { + CHECKSUM_1(1), CHECKSUM_3(3); + + private final int option; + + CHECKSUM_OPTION(int option) { + this.option = option; + } + + public int getChecksumSize() { + return option; + } + + public static CHECKSUM_OPTION fromVal(int val) { + return Arrays.stream(values()).filter(op -> op.option == val).findFirst().orElse(CHECKSUM_1); + } + } + + public enum BUCKET_OPTION { + BUCKET_128(128), BUCKET_256(256); + + private final int option; + + BUCKET_OPTION(int option) { + this.option = option; + } + + public int getBucketSize() { + return this.option; + } + + public static BUCKET_OPTION fromVal(int val) { + return Arrays.stream(values()).filter(op -> op.option == val).findFirst().orElse(BUCKET_128); + } + } + + +} diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHCache.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHCache.java deleted file mode 100644 index 10d106f20..000000000 --- a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHCache.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.metron.stellar.common.utils.hashing.tlsh; - -import com.trendmicro.tlsh.BucketOption; -import com.trendmicro.tlsh.ChecksumOption; - -import java.util.AbstractMap; -import java.util.HashMap; -import java.util.Map; - -/** - * Create a threadlocal cache of TLSH handlers. - */ -public class TLSHCache { - public static ThreadLocal INSTANCE = ThreadLocal.withInitial(() -> new TLSHCache()); - private Map, TLSH> cache = new HashMap<>(); - private TLSHCache() {} - - public TLSH getTLSH(BucketOption bo, ChecksumOption co) { - return cache.computeIfAbsent( new AbstractMap.SimpleEntry<>(bo, co) - , kv -> new TLSH(kv.getKey(), kv.getValue()) - ); - } -} diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHConstants.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHConstants.java new file mode 100644 index 000000000..0957ab17e --- /dev/null +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHConstants.java @@ -0,0 +1,135 @@ +package org.apache.metron.stellar.common.utils.hashing.tlsh; + +import java.nio.charset.StandardCharsets; + +public final class TLSHConstants { + + /** + * Pearson's table. + */ + public static final int[] PEARSON_TABLE = {1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163, 14, 197, 213, + 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200, 110, 177, 104, 103, 141, 253, 255, 50, 77, 101, + 81, 18, 45, 96, 31, 222, 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235, 97, 234, + 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248, 174, 169, 211, 58, 66, 154, 106, 195, 245, + 171, 17, 187, 182, 179, 0, 243, 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219, + 119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10, 138, 30, 48, 183, 156, 35, 61, 26, + 143, 74, 251, 94, 129, 162, 63, 152, 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131, + 125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123, 118, 73, 2, 157, 46, 116, 9, 145, + 134, 228, 207, 212, 202, 215, 69, 229, 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, + 203, 233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76, 140, 36, 210, 172, 41, 54, + 159, 8, 185, 232, 113, 196, 231, 47, 146, 120, 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, + 109, 184, 209}; + + /** + * Lookup table for the logs of the length value. The last entry saturates the + * logLength at 255. + * + *

+ * 7 -> 25L means 25 is the highest number for which the log is 7.
+ * Generally speaking for the closed interval [ TOPVAL(n-1)+1 .. TOPVAL(n) ] the + * logLength is n. + */ + static final long[] TOPVAL = {/* 0 */ 1, /* 1 */ 2, /* 2 */ 3, /* 3 */ 5, /* 4 */ 7, /* 5 */ 11, /* 6 */ 17, + /* 7 */ 25, /* 8 */ 38, /* 9 */ 57, /* 10 */ 86, /* 11 */ 129, /* 12 */ 194, /* 13 */ 291, /* 14 */ 437, + /* 15 */ 656, /* 16 */ 854, /* 17 */ 1_110, /* 18 */ 1443, /* 19 */ 1876, /* 20 */ 2439, /* 21 */ 3171, + /* 22 */ 3475, /* 23 */ 3823, /* 24 */ 4205, /* 25 */ 4626, /* 26 */ 5088, /* 27 */ 5597, /* 28 */ 6157, + /* 29 */ 6772, /* 30 */ 7450, /* 31 */ 8195, /* 32 */ 9014, /* 33 */ 9916, /* 34 */ 10_907, /* 35 */ 11_998, + /* 36 */ 13_198, /* 37 */ 14_518, /* 38 */ 15_970, /* 39 */ 17_567, /* 40 */ 19_323, /* 41 */ 21_256, + /* 42 */ 23_382, /* 43 */ 25_720, /* 44 */ 28_292, /* 45 */ 31_121, /* 46 */ 34_233, /* 47 */ 37_656, + /* 48 */ 41_422, /* 49 */ 45_564, /* 50 */ 50_121, /* 51 */ 55_133, /* 52 */ 60_646, /* 53 */ 66_711, + /* 54 */ 73_382, /* 55 */ 80_721, /* 56 */ 88_793, /* 57 */ 97_672, /* 58 */ 107_439, /* 59 */ 118_183, + /* 60 */ 130_002, /* 61 */ 143_002, /* 62 */ 157_302, /* 63 */ 173_032, /* 64 */ 190_335, /* 65 */ 209_369, + /* 66 */ 230_306, /* 67 */ 253_337, /* 68 */ 278_670, /* 69 */ 306_538, /* 70 */ 337_191, /* 71 */ 370_911, + /* 72 */ 408_002, /* 73 */ 448_802, /* 74 */ 493_682, /* 75 */ 543_050, /* 76 */ 597_356, /* 77 */ 657_091, + /* 78 */ 722_800, /* 79 */ 795_081, /* 80 */ 874_589, /* 81 */ 962_048, /* 82 */ 1_058_252, + /* 83 */ 1_164_078, /* 84 */ 1_280_486, /* 85 */ 1_408_534, /* 86 */ 1_549_388, /* 87 */ 1_704_327, + /* 88 */ 1_874_759, /* 89 */ 2_062_236, /* 90 */ 2_268_459, /* 91 */ 2_495_305, /* 92 */ 2_744_836, + /* 93 */ 3_019_320, /* 94 */ 3_321_252, /* 95 */ 3_653_374, /* 96 */ 4_018_711, /* 97 */ 4_420_582, + /* 98 */ 4_862_641, /* 99 */ 5_348_905, /* 100 */ 5_883_796, /* 101 */ 6_472_176, /* 102 */ 7_119_394, + /* 103 */ 7_831_333, /* 104 */ 8_614_467, /* 105 */ 9_475_909, /* 106 */ 10_423_501, /* 107 */ 11_465_851, + /* 108 */ 12_612_437, /* 109 */ 13_873_681, /* 110 */ 15_261_050, /* 111 */ 16_787_154, + /* 112 */ 18_465_870, /* 113 */ 20_312_458, /* 114 */ 22_343_706, /* 115 */ 24_578_077, + /* 116 */ 27_035_886, /* 117 */ 29_739_474, /* 118 */ 32_713_425, /* 119 */ 35_984_770, + /* 120 */ 39_583_245, /* 121 */ 43_541_573, /* 122 */ 47_895_730, /* 123 */ 52_685_306, + /* 124 */ 57_953_837, /* 125 */ 63_749_221, /* 126 */ 70_124_148, /* 127 */ 77_136_564, + /* 128 */ 84_850_228, /* 129 */ 93_335_252, /* 130 */ 102_668_779, /* 131 */ 112_935_659, + /* 132 */ 124_229_227, /* 133 */ 136_652_151, /* 134 */ 150_317_384, /* 135 */ 165_349_128, + /* 136 */ 181_884_040, /* 137 */ 200_072_456, /* 138 */ 220_079_703, /* 139 */ 242_087_671, + /* 140 */ 266_296_456, /* 141 */ 292_926_096, /* 142 */ 322_218_735, /* 143 */ 354_440_623, + /* 144 */ 389_884_688, /* 145 */ 428_873_168, /* 146 */ 471_760_495, /* 147 */ 518_936_559, + /* 148 */ 570_830_240, /* 149 */ 627_913_311, /* 150 */ 690_704_607, /* 151 */ 759_775_136, + /* 152 */ 835_752_671, /* 153 */ 919_327_967, /* 154 */ 1_011_260_767, /* 155 */ 1_112_386_880, + /* 156 */ 1_223_623_232, /* 157 */ 1_345_985_727, /* 158 */ 1_480_584_256, /* 159 */ 1_628_642_751, + /* 160 */ 1_791_507_135, /* 161 */ 1_970_657_856, /* 162 */ 2_167_723_648L, /* 163 */ 2_384_496_256L, + /* 164 */ 2_622_945_920L, /* 165 */ 2_885_240_448L, /* 166 */ 3_173_764_736L, /* 167 */ 3_491_141_248L, + /* 168 */ 3_840_255_616L, /* 169 */ 4_224_281_216L, /* 170 */ 4_646_709_504L, /* 171 */ 5_111_380_735L, + /* 172 */ 5_622_519_040L, /* 173 */ 6_184_770_816L, /* 174 */ 6_803_248_384L, /* 175 */ 7_483_572_991L, + /* 176 */ 8_231_930_623L, /* 177 */ 9_055_123_968L, /* 178 */ 9_960_636_928L, /* 179 */ 10_956_701_183L, + /* 180 */ 12_052_370_943L, /* 181 */ 13_257_608_703L, /* 182 */ 14_583_370_240L, /* 183 */ 16_041_708_032L, + /* 184 */ 17_645_878_271L, /* 185 */ 19_410_467_839L, /* 186 */ 21_351_515_136L, /* 187 */ 23_486_667_775L, + /* 188 */ 25_835_334_655L, /* 189 */ 28_418_870_271L, /* 190 */ 31_260_756_991L, /* 191 */ 34_386_835_455L, + /* 192 */ 37_825_517_567L, /* 193 */ 41_608_071_168L, /* 194 */ 45_768_882_175L, /* 195 */ 50_345_768_959L, + /* 196 */ 55_380_346_880L, /* 197 */ 60_918_384_640L, /* 198 */ 67_010_226_176L, /* 199 */ 73_711_251_455L, + /* 200 */ 81_082_380_287L, /* 201 */ 89_190_617_088L, /* 202 */ 98_109_681_663L, /* 203 */ 107_920_658_432L, + /* 204 */ 118_712_725_503L, /* 205 */ 130_584_006_656L, /* 206 */ 143_642_402_816L, + /* 207 */ 158_006_648_832L, /* 208 */ 173_807_329_279L, /* 209 */ 191_188_066_303L, + /* 210 */ 210_306_867_200L, /* 211 */ 231_337_566_208L, /* 212 */ 254_471_331_839L, + /* 213 */ 279_918_460_927L, /* 214 */ 307_910_328_319L, /* 215 */ 338_701_369_343L, + /* 216 */ 372_571_521_024L, /* 217 */ 409_827_917_823L, /* 218 */ 450_810_724_351L, + /* 219 */ 495_891_791_872L, /* 220 */ 545_481_015_295L, /* 221 */ 600_029_102_079L, + /* 222 */ 660_032_028_671L, /* 223 */ 726_035_300_351L, /* 224 */ 798_638_833_663L, + /* 225 */ 878_502_772_736L, /* 226 */ 966_353_059_839L, /* 227 */ 1_062_988_382_207L, + /* 228 */ 1_169_287_217_151L, /* 229 */ 1_286_216_024_063L, /* 230 */ 1_414_837_633_024L, + /* 231 */ 1_556_321_468_416L, /* 232 */ 1_711_953_739_776L, /* 233 */ 1_883_149_107_199L, + /* 234 */ 2_071_464_050_688L, /* 235 */ 2_278_610_567_167L, /* 236 */ 2_506_471_636_992L, + /* 237 */ 2_757_119_049_728L, /* 238 */ 3_032_831_098_880L, /* 239 */ 3_336_114_143_231L, + /* 240 */ 3_669_725_675_520L, /* 241 */ 4_036_698_439_680L, /* 242 */ 4_440_368_349_184L, + /* 243 */ 4_884_405_157_887L, /* 244 */ 5_372_846_014_464L, /* 245 */ 5_910_131_113_984L, + /* 246 */ 6_501_144_199_168L, /* 247 */ 7_151_258_697_727L, /* 248 */ 7_866_384_908_288L, + /* 249 */ 8_653_023_477_760L, /* 250 */ 9_518_326_480_895L, /* 251 */ 10_470_159_810_560L, + /* 252 */ 11_517_175_529_472L, /* 253 */ 12_668_893_659_136L, /* 254 */ 13_935_783_182_336L, + /* 255 */ /* 15329425519609L */ Long.MAX_VALUE}; + + /** The Pearson default hash of 0. */ + public static final int T0 = 1 /* T[0] */; + /** The Pearson default hash of 2. */ + public static final int T2 = 49 /* T[2] */; + /** The Pearson default hash of 3. */ + public static final int T3 = 12 /* T[3] */; + /** The Pearson default hash of 5. */ + public static final int T5 = 178 /* T[5] */; + /** The Pearson default hash of 7. */ + public static final int T7 = 166 /* T[7] */; + /** The Pearson default hash of 11. */ + public static final int T11 = 84 /* T[11] */; + /** The Pearson default hash of 13. */ + public static final int T13 = 230 /* T[13] */; + + /** + * The scaling multiplier for difference scoring. + */ + public static final int DIFF_SCALE = 12; + + + /** The length threshold for step 1. */ + public static final int LEN_STEP_1 = 656; + /** The log(1.5) constant used in CPP reference implementation for step 1. */ + public static final double LOG_1_5 = 0.405_465_100D; + + /** The length threshold for step 2. */ + public static final int LEN_STEP_2 = 3199; + /** The adjustment for step 2. */ + public static final double LEN_ADJ_2 = 8.727_770D; + /** The log(1.3) constant used in CPP reference implementation for step 2. */ + public static final double LOG_1_3 = 0.262_364_260D; + + /** The adjustment for step 3. */ + public static final double LEN_ADJ_3 = 62.547_200D; + /** The log(1.1) constant used in CPP reference implementation for step 3. */ + public static final double LOG_1_1 = 0.095_310_180D; + + public static final byte[] HEX_ARRAY = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII); + + private TLSHConstants(){ + } +} diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHHasher.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHHasher.java index f3f8e4fb3..144688567 100644 --- a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHHasher.java +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHHasher.java @@ -17,10 +17,7 @@ */ package org.apache.metron.stellar.common.utils.hashing.tlsh; -import com.trendmicro.tlsh.BucketOption; -import com.trendmicro.tlsh.ChecksumOption; import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.binary.Hex; import org.apache.metron.stellar.common.utils.ConversionUtils; import org.apache.metron.stellar.common.utils.SerDeUtils; @@ -28,152 +25,143 @@ import org.apache.metron.stellar.common.utils.hashing.Hasher; import java.nio.charset.StandardCharsets; -import java.security.NoSuchAlgorithmException; -import java.util.*; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; +import java.util.Set; public class TLSHHasher implements Hasher { - public static final String TLSH_KEY = "tlsh"; - public static final String TLSH_BIN_KEY = "tlsh_bin"; - - public enum Config implements EnumConfigurable { - BUCKET_SIZE("bucketSize"), - CHECKSUM("checksumBytes"), - HASHES("hashes"), - FORCE("force") - ; - final public String key; - Config(String key) { - this.key = key; + public static final String TLSH_KEY = "tlsh"; + public static final String TLSH_BIN_KEY = "tlsh_bin"; + + public enum Config implements EnumConfigurable { + BUCKET_SIZE("bucketSize"), + CHECKSUM("checksumBytes"), + HASHES("hashes"), + FORCE("force"); + final public String key; + + Config(String key) { + this.key = key; + } + + @Override + public String getKey() { + return key; + } } + Integer bucketOption = 128; + Integer checksumOption = 1; + Boolean force = true; + List hashes = new ArrayList<>(); + + /** + * Returns an encoded string representation of the hash value of the input. It is expected that + * this implementation does throw exceptions when the input is null. + * + * @param o The value to hash. + * @return A hash of {@code toHash} that has been encoded. + * + */ @Override - public String getKey() { - return key; + public Object getHash(Object o) { + TLSHBuilder builder = new TLSHBuilder(TLSHBuilder.CHECKSUM_OPTION.fromVal(checksumOption), TLSHBuilder.BUCKET_OPTION.fromVal(bucketOption)); + byte[] data; + if (o instanceof String) { + data = ((String) o).getBytes(StandardCharsets.UTF_8); + } else if (o instanceof byte[]) { + data = (byte[]) o; + } else { + data = SerDeUtils.toBytes(o); + } + try { + TLSH tlsh = builder.getTLSH(data); + builder.clean(); + String hash = tlsh.getHash(); + if (hashes != null && !hashes.isEmpty()) { + Map ret = new HashMap<>(); + ret.put(TLSH_KEY, hash); + ret.putAll(bin(hash)); + return ret; + } else { + return hash; + } + } catch (Exception e) { + return null; + } } - } - - BucketOption bucketOption = BucketOption.BUCKETS_128; - ChecksumOption checksumOption = ChecksumOption.CHECKSUM_1B; - Boolean force = true; - List hashes = new ArrayList<>(); - /** - * Returns an encoded string representation of the hash value of the input. It is expected that - * this implementation does throw exceptions when the input is null. - * - * @param o The value to hash. - * @return A hash of {@code toHash} that has been encoded. - * @throws EncoderException If unable to encode the hash then this exception occurs. - * @throws NoSuchAlgorithmException If the supplied algorithm is not known. - */ - @Override - public Object getHash(Object o) throws EncoderException, NoSuchAlgorithmException { - TLSH tlsh = TLSHCache.INSTANCE.get().getTLSH(bucketOption, checksumOption); - byte[] data = null; - if (o instanceof String) { - data = ((String)o).getBytes(StandardCharsets.UTF_8); - } else if (o instanceof byte[]) { - data = (byte[])o; - } else { - data = SerDeUtils.toBytes(o); - } - try { - String hash = tlsh.apply(data, force); - if (hashes != null && hashes.size() > 0) { - Map ret = new HashMap<>(); - ret.put(TLSH_KEY, hash); - ret.putAll(bin(hash)); + public Map bin(String hash) throws DecoderException { + Random r = new Random(0); + byte[] h = Hex.decodeHex(hash.substring(2 * checksumOption).toCharArray()); + BitSet vector = BitSet.valueOf(h); + int n = vector.length(); + Map ret = new HashMap<>(); + boolean singleHash = hashes.size() == 1; + for (int numHashes : hashes) { + BitSet projection = new BitSet(); + for (int i = 0; i < numHashes; ++i) { + int index = r.nextInt(n); + projection.set(i, vector.get(index)); + } + String outputHash = numHashes + Hex.encodeHexString(projection.toByteArray()); + if (singleHash) { + ret.put(TLSH_BIN_KEY, outputHash); + } else { + ret.put(TLSH_BIN_KEY + "_" + numHashes, outputHash); + } + } return ret; - } else { - return hash; - } - } catch (Exception e) { - return null; } - } - public Map bin(String hash) throws DecoderException { - Random r = new Random(0); - byte[] h = Hex.decodeHex(hash.substring(2 * checksumOption.getChecksumLength()).toCharArray()); - BitSet vector = BitSet.valueOf(h); - int n = vector.length(); - Map ret = new HashMap<>(); - boolean singleHash = hashes.size() == 1; - for (int numHashes : hashes) { - BitSet projection = new BitSet(); - for (int i = 0; i < numHashes; ++i) { - int index = r.nextInt(n); - projection.set(i, vector.get(index)); - } - String outputHash = numHashes + Hex.encodeHexString(projection.toByteArray()); - if (singleHash) { - ret.put(TLSH_BIN_KEY, outputHash); - } else { - ret.put(TLSH_BIN_KEY + "_" + numHashes, outputHash); - } + @Override + public void configure(Optional> config) { + if (config.isPresent() && !config.get().isEmpty()) { + bucketOption = Config.BUCKET_SIZE.get(config.get() + , o -> { + Integer bucketSize = ConversionUtils.convert(o, Integer.class); + return bucketSize.equals(256) ? 256 : 128; + } + ).orElse(bucketOption); + + checksumOption = Config.CHECKSUM.get(config.get() + , o -> { + Integer checksumBytes = ConversionUtils.convert(o, Integer.class); + return checksumBytes.equals(3) ? 3 : 1; + } + ).orElse(checksumOption); + + force = Config.FORCE.get(config.get() + , o -> ConversionUtils.convert(o, Boolean.class) + ).orElse(force); + + hashes = Config.HASHES.get(config.get() + , o -> { + List ret = new ArrayList<>(); + if (o instanceof List) { + List vals = (List) o; + for (Object oVal : vals) { + ret.add(ConversionUtils.convert(oVal, Integer.class)); + } + } else { + ret.add(ConversionUtils.convert(o, Integer.class)); + } + return ret; + } + ).orElse(hashes); + } } - return ret; - } - - @Override - public void configure(Optional> config) { - if (config.isPresent() && !config.get().isEmpty()) { - bucketOption = Config.BUCKET_SIZE.get(config.get() - , o -> { - Integer bucketSize = ConversionUtils.convert(o, Integer.class); - switch (bucketSize) { - case 128: - return BucketOption.BUCKETS_128; - case 256: - return BucketOption.BUCKETS_256; - default: - return null; - } - } - ).orElse(bucketOption); - - checksumOption = Config.CHECKSUM.get(config.get() - , o -> { - Integer checksumBytes= ConversionUtils.convert(o, Integer.class); - switch (checksumBytes) { - case 1: - return ChecksumOption.CHECKSUM_1B; - case 3: - return ChecksumOption.CHECKSUM_3B; - default: - return null; - } - - } - ).orElse(checksumOption); - - force = Config.FORCE.get(config.get() - , o -> ConversionUtils.convert(o, Boolean.class) - ).orElse(force); - - hashes = Config.HASHES.get(config.get() - , o -> { - List ret = new ArrayList<>(); - if(o instanceof List) { - List vals = (List)o; - for(Object oVal : vals) { - ret.add(ConversionUtils.convert(oVal, Integer.class)); - } - } - else { - ret.add(ConversionUtils.convert(o, Integer.class)); - } - return ret; - } - ).orElse(hashes); + public static Set supportedHashes() { + return new HashSet() {{ + add("TLSH"); + }}; } - } - - public static final Set supportedHashes() { - return new HashSet() {{ - add("TLSH"); - }}; - } } diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHScorer.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHScorer.java new file mode 100644 index 000000000..8d465e2ed --- /dev/null +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHScorer.java @@ -0,0 +1,74 @@ +package org.apache.metron.stellar.common.utils.hashing.tlsh; + +import java.util.Arrays; + +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.DIFF_SCALE; + +public class TLSHScorer { + + private final BitPairsTable diffTable; + + public TLSHScorer() { + this.diffTable = new BitPairsTable(); + } + + public int score(TLSH tlsh1, TLSH tlsh2, boolean lenDiff) { + int score = 0; + + score += scoreChecksum(tlsh1.getChecksum(), tlsh2.getChecksum()); + if (lenDiff) { + score += scoreLValue(tlsh1.getlValue(), tlsh2.getlValue()); + } + score += scoreQ(tlsh1.getQ1Ratio(), tlsh2.getQ1Ratio()); + score += scoreQ(tlsh1.getQ2Ratio(), tlsh2.getQ2Ratio()); + score += scoreBuckets(tlsh1.getCodes(), tlsh2.getCodes()); + + return score; + } + + private int scoreBuckets(final int[] buckets1, final int[] buckets2) { + if (buckets1.length != buckets2.length) { + throw new IllegalArgumentException( + String.format("Number of body bytes differ %d != %d", buckets1.length, buckets2.length)); + } + + int diff = 0; + for (int i = 0; i < buckets1.length; i++) { + diff += this.diffTable.getValue(buckets1[i], buckets2[i]); + } + return diff; + } + + private int scoreChecksum(final int[] checksumA, final int[] checksumB) { + if (checksumA.length != checksumB.length) { + throw new IllegalArgumentException( + String.format("Number of checksum bytes differ %d != %d", checksumA.length, checksumB.length)); + } + return Arrays.equals(checksumA, checksumB) ? 0 : 1; + } + + private int scoreQ(final int q2, final int q3) { + final int q1diff = modDiff(q2, q3, 16); + + return q1diff <= 1 ? q1diff : (q1diff - 1) * DIFF_SCALE; + } + + private int scoreLValue(final int lValue2, final int lValue3) { + final int ldiff = modDiff(lValue2, lValue3, 256); + switch (ldiff) { + case 0: + return 0; + case 1: + return 1; + default: + return DIFF_SCALE * ldiff; + } + } + + private int modDiff(final int initialPosition, final int finalPosition, final int circularQueueSize) { + int internalDistance = Math.abs(finalPosition - initialPosition); + int externalDistance = circularQueueSize - internalDistance; + + return Math.min(internalDistance, externalDistance); + } +} diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHUtil.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHUtil.java new file mode 100644 index 000000000..56616202c --- /dev/null +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSHUtil.java @@ -0,0 +1,100 @@ +package org.apache.metron.stellar.common.utils.hashing.tlsh; + + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.stream.IntStream; + +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.HEX_ARRAY; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.LEN_ADJ_2; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.LEN_ADJ_3; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.LEN_STEP_1; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.LEN_STEP_2; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.LOG_1_1; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.LOG_1_3; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.LOG_1_5; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.PEARSON_TABLE; +import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHConstants.TOPVAL; +import java.io.ByteArrayOutputStream; + +public final class TLSHUtil { + + + private TLSHUtil() { + } + + public static int hash(final int salt, final int i, final int j, final int k) { + int res = 0; + res = PEARSON_TABLE[res ^ salt]; + res = PEARSON_TABLE[res ^ i]; + res = PEARSON_TABLE[res ^ j]; + res = PEARSON_TABLE[res ^ k]; + return res; + } + + public static int bucketMapping(final int salt, final int i, final int j, final int k) { + return PEARSON_TABLE[PEARSON_TABLE[PEARSON_TABLE[PEARSON_TABLE[salt] ^ i] ^ j] ^ k]; + } + + public static int fastBucketMapping(final int mod_salt, final int i, final int j, final int k) { + return PEARSON_TABLE[PEARSON_TABLE[PEARSON_TABLE[mod_salt ^ i] ^ j] ^ k]; + } + + /** + * Capture the log(length) in a single byte value. + * + * @param len the length + * @return the byte value + */ + public static int lCapturing(final int len) { + final int x = Arrays.binarySearch(TOPVAL, len); + return x >= 0 ? x : -x - 1; + } + + /** + * Capture the log(length) in a single byte value. + * + *

+ * Math.log based implementation. + * + * @param len the length + * @return the byte value + */ + public static int lCapturingLog(final int len) { + if (len <= 0) { + return 0; + } + double d = (float) Math.log((float) len); + if (len <= LEN_STEP_1) { + d = d / LOG_1_5; + } else if (len <= LEN_STEP_2) { + d = d / LOG_1_3 - LEN_ADJ_2; + } else { + d = d / LOG_1_1 - LEN_ADJ_3; + } + return Math.min((int) Math.floor(d), 255); + } + + public static String bytesToHex(byte[] bytes) { + byte[] hexChars = new byte[bytes.length * 2]; + for (int i = 0; i < bytes.length; i++) { + int unSignByte = bytes[i] & 0xFF; + hexChars[i * 2] = HEX_ARRAY[unSignByte >>> 4]; + hexChars[i * 2 + 1] = HEX_ARRAY[unSignByte & 0x0F]; + } + return new String(hexChars, StandardCharsets.UTF_8); + } + + public static byte[] hexToBytes(final CharSequence hex) { + final int len = hex.length(); + final byte[] data = new byte[len / 2]; + for (int i = 0; i < len; i += 2) { + data[i / 2] = (byte) ((Character.digit(hex.charAt(i), 16) << 4) + Character.digit(hex.charAt(i + 1), 16)); + } + return data; + } + + public static int swapNibble(final int x) { + return (x & 0x0F) << 4 | (x & 0xF0) >> 4; + } +} diff --git a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/dsl/functions/HashFunctions.java b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/dsl/functions/HashFunctions.java index cfa29e280..d8110c941 100644 --- a/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/dsl/functions/HashFunctions.java +++ b/flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/dsl/functions/HashFunctions.java @@ -21,7 +21,8 @@ import org.apache.metron.stellar.common.utils.ConversionUtils; import org.apache.metron.stellar.common.utils.hashing.HashStrategy; import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSH; -import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHHasher; +import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHBuilder; +import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHScorer; import org.apache.metron.stellar.dsl.BaseStellarFunction; import org.apache.metron.stellar.dsl.Stellar; @@ -33,114 +34,126 @@ public class HashFunctions { - @Stellar( - name = "GET_HASHES_AVAILABLE", - description = "Will return all available hashing algorithms available to 'HASH'.", - returns = "A list containing all supported hashing algorithms." - ) - public static class ListSupportedHashTypes extends BaseStellarFunction { + @Stellar( + name = "GET_HASHES_AVAILABLE", + description = "Will return all available hashing algorithms available to 'HASH'.", + returns = "A list containing all supported hashing algorithms." + ) + public static class ListSupportedHashTypes extends BaseStellarFunction { - @Override - public List apply(final List args) { - if (args == null || args.size() != 0) { - throw new IllegalArgumentException("Invalid call. This function does not expect any arguments."); - } + @Override + public List apply(final List args) { + if (args == null || args.size() != 0) { + throw new IllegalArgumentException("Invalid call. This function does not expect any arguments."); + } - List ret = new ArrayList<>(); - ret.addAll(HashStrategy.ALL_SUPPORTED_HASHES); - return ret; + List ret = new ArrayList<>(); + ret.addAll(HashStrategy.ALL_SUPPORTED_HASHES); + return ret; + } } - } - @Stellar( - name = "HASH", - description = "Hashes a given value using the given hashing algorithm and returns a hex encoded string.", - params = { - "toHash - value to hash.", - "hashType - A valid string representation of a hashing algorithm. See 'GET_HASHES_AVAILABLE'.", - "config? - Configuration for the hash function in the form of a String to object map.\n" - + " For forensic hash TLSH (see https://github.com/trendmicro/tlsh and Jonathan Oliver, Chun Cheng, and Yanggui Chen, TLSH - A Locality Sensitive Hash. 4th Cybercrime and Trustworthy Computing Workshop, Sydney, November 2013):\n" - + " - bucketSize : This defines the size of the hash created. Valid values are 128 (default) or 256 (the former results in a 70 character hash and latter results in 134 characters) \n" - + " - checksumBytes : This defines how many bytes are used to capture the checksum. Valid values are 1 (default) and 3\n" - + " - force : If true (the default) then a hash can be generated from as few as 50 bytes. If false, then at least 256 bytes are required. Insufficient variation or size in the bytes result in a null being returned.\n" - + " - hashes : You can compute a second hash for use in fuzzy clustering TLSH signatures. The number of hashes is the lever to adjust the size of those clusters and \"fuzzy\" the clusters are. If this is specified, then one or more bins are created based on the specified size and the function will return a Map containing the bins.\n" - + " For all other hashes:\n" - + " - charset : The character set to use (UTF8 is default). \n" - }, - returns = "A hex encoded string of a hashed value using the given algorithm. If 'hashType' is null " + - "then '00', padded to the necessary length, will be returned. If 'toHash' is not able to be hashed or " + - "'hashType' is null then null is returned." - ) - public static class Hash extends BaseStellarFunction { + @Stellar( + name = "HASH", + description = "Hashes a given value using the given hashing algorithm and returns a hex encoded string.", + params = { + "toHash - value to hash.", + "hashType - A valid string representation of a hashing algorithm. See 'GET_HASHES_AVAILABLE'.", + "config? - Configuration for the hash function in the form of a String to object map.\n" + + " For forensic hash TLSH (see https://github.com/trendmicro/tlsh and Jonathan Oliver, Chun Cheng, and Yanggui Chen, TLSH - A Locality Sensitive Hash. 4th Cybercrime and Trustworthy Computing Workshop, Sydney, November 2013):\n" + + " - bucketSize : This defines the size of the hash created. Valid values are 128 (default) or 256 (the former results in a 70 character hash and latter results in 134 characters) \n" + + " - checksumBytes : This defines how many bytes are used to capture the checksum. Valid values are 1 (default) and 3\n" + + " - force : If true (the default) then a hash can be generated from as few as 50 bytes. If false, then at least 256 bytes are required. Insufficient variation or size in the bytes result in a null being returned.\n" + + " - hashes : You can compute a second hash for use in fuzzy clustering TLSH signatures. The number of hashes is the lever to adjust the size of those clusters and \"fuzzy\" the clusters are. If this is specified, then one or more bins are created based on the specified size and the function will return a Map containing the bins.\n" + + " For all other hashes:\n" + + " - charset : The character set to use (UTF8 is default). \n" + }, + returns = "A hex encoded string of a hashed value using the given algorithm. If 'hashType' is null " + + "then '00', padded to the necessary length, will be returned. If 'toHash' is not able to be hashed or " + + "'hashType' is null then null is returned." + ) + public static class Hash extends BaseStellarFunction { - @Override - @SuppressWarnings("unchecked") - public Object apply(final List args) { - if (args == null || args.size() < 2) { - throw new IllegalArgumentException("Invalid number of arguments: " + (args == null ? 0 : args.size())); - } + @Override + @SuppressWarnings("unchecked") + public Object apply(final List args) { + if (args == null || args.size() < 2) { + throw new IllegalArgumentException("Invalid number of arguments: " + (args == null ? 0 : args.size())); + } - final Object toHash = args.get(0); - final Object hashType = args.get(1); - if (hashType == null) { - return null; - } + final Object toHash = args.get(0); + final Object hashType = args.get(1); + if (hashType == null) { + return null; + } - Map config = null; - if (args.size() > 2) { - Object configObj = args.get(2); - if (configObj instanceof Map && configObj != null) { - config = (Map)configObj; + Map config = null; + if (args.size() > 2) { + Object configObj = args.get(2); + if (configObj instanceof Map && configObj != null) { + config = (Map) configObj; + } + } + try { + return HashStrategy.getHasher(hashType.toString(), Optional.ofNullable(config)).getHash(toHash); + } catch (final EncoderException e) { + return null; + } catch (final NoSuchAlgorithmException e) { + throw new IllegalArgumentException("Invalid hash type: " + hashType.toString()); + } } - } - try { - return HashStrategy.getHasher(hashType.toString(), Optional.ofNullable(config)).getHash(toHash); - } catch (final EncoderException e) { - return null; - } catch (final NoSuchAlgorithmException e) { - throw new IllegalArgumentException("Invalid hash type: " + hashType.toString()); - } } - } - @Stellar( - name = "DIST", - namespace="TLSH", - params = { - "hash1 - The first TLSH hash", - "hash2 - The first TLSH hash", - "includeLength? - Include the length in the distance calculation or not?", - }, - description = "Will return the hamming distance between two TLSH hashes (note: must be computed with the same params). " + - "For more information, see https://github.com/trendmicro/tlsh and Jonathan Oliver, Chun Cheng, and Yanggui Chen, TLSH - A Locality Sensitive Hash. 4th Cybercrime and Trustworthy Computing Workshop, Sydney, November 2013. " + - "For a discussion of tradeoffs, see Table II on page 5 of https://github.com/trendmicro/tlsh/blob/master/TLSH_CTC_final.pdf", - returns = "An integer representing the distance between hash1 and hash2. The distance is roughly hamming distance, so 0 is very similar." - ) - public static class TlshDist extends BaseStellarFunction { + @Stellar( + name = "DIST", + namespace = "TLSH", + params = { + "hash1 - The first TLSH hash", + "hash2 - The first TLSH hash", + "includeLength? - Include the length in the distance calculation or not?", + }, + description = "Will return the hamming distance between two TLSH hashes (note: must be computed with the same params). " + + "For more information, see https://github.com/trendmicro/tlsh and Jonathan Oliver, Chun Cheng, and Yanggui Chen, TLSH - A Locality Sensitive Hash. 4th Cybercrime and Trustworthy Computing Workshop, Sydney, November 2013. " + + "For a discussion of tradeoffs, see Table II on page 5 of https://github.com/trendmicro/tlsh/blob/master/TLSH_CTC_final.pdf", + returns = "An integer representing the distance between hash1 and hash2. The distance is roughly hamming distance, so 0 is very similar." + ) + public static class TlshDist extends BaseStellarFunction { + + @Override + public Integer apply(final List args) { + if (args == null || args.size() < 2) { + throw new IllegalArgumentException("Invalid call. This function requires at least 2 arguments: the two TLSH hashes."); + } + Object h1Obj = args.get(0); + Object h2Obj = args.get(1); + if (h1Obj != null && !(h1Obj instanceof String)) { + throw new IllegalArgumentException(h1Obj + " must be strings"); + } + if (h2Obj != null && !(h2Obj instanceof String)) { + throw new IllegalArgumentException(h2Obj + " must be strings"); + } - @Override - public Integer apply(final List args) { - if (args == null || args.size() < 2) { - throw new IllegalArgumentException("Invalid call. This function requires at least 2 arguments: the two TLSH hashes."); - } - Object h1Obj = args.get(0); - Object h2Obj = args.get(1); - if(h1Obj != null && !(h1Obj instanceof String) ) { - throw new IllegalArgumentException(h1Obj + " must be strings"); - } - if(h2Obj != null && !(h2Obj instanceof String) ) { - throw new IllegalArgumentException(h2Obj + " must be strings"); - } + Optional includeLength = Optional.empty(); + if (args.size() > 2) { + Object includeLengthArg = args.get(2); + if (includeLengthArg != null) { + includeLength = Optional.ofNullable(ConversionUtils.convert(includeLengthArg, Boolean.class)); + } + } + if (h1Obj == null || h2Obj == null) { + return -1; + } - Optional includeLength = Optional.empty(); - if(args.size() > 2) { - Object includeLengthArg = args.get(2); - if(includeLengthArg != null) { - includeLength = Optional.ofNullable(ConversionUtils.convert(includeLengthArg, Boolean.class)); + if (h1Obj.equals(h2Obj)) { + return 0; + } + TLSHScorer scorer = new TLSHScorer(); + TLSHBuilder builder = new TLSHBuilder(); + TLSH tlsh1 = builder.fromHex(h1Obj.toString()); + builder.clean(); + TLSH tlsh2 = builder.fromHex(h2Obj.toString()); + return scorer.score(tlsh1, tlsh2, includeLength.orElse(false)); } - } - return TLSH.distance(h1Obj == null?null:h1Obj.toString(), h2Obj == null?null:h2Obj.toString(), includeLength); } - } } diff --git a/flink-cyber/flink-stellar/src/test/java/org/apache/metron/stellar/dsl/functions/HashFunctionsTest.java b/flink-cyber/flink-stellar/src/test/java/org/apache/metron/stellar/dsl/functions/HashFunctionsTest.java index b8212b823..4f3282da2 100644 --- a/flink-cyber/flink-stellar/src/test/java/org/apache/metron/stellar/dsl/functions/HashFunctionsTest.java +++ b/flink-cyber/flink-stellar/src/test/java/org/apache/metron/stellar/dsl/functions/HashFunctionsTest.java @@ -22,300 +22,427 @@ import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang.SerializationUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSH; +import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHBuilder; import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHHasher; +import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHScorer; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import java.io.File; +import java.io.IOException; import java.io.Serializable; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.security.Security; -import java.util.*; +import java.util.AbstractMap; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; import java.util.concurrent.ForkJoinPool; +import java.util.stream.Stream; import static org.apache.metron.stellar.common.utils.StellarProcessorUtils.run; import static org.junit.jupiter.api.Assertions.*; public class HashFunctionsTest { - static final Hex HEX = new Hex(StandardCharsets.UTF_8); - final HashFunctions.ListSupportedHashTypes listSupportedHashTypes = new HashFunctions.ListSupportedHashTypes(); - final HashFunctions.Hash hash = new HashFunctions.Hash(); - - @Test - public void nullArgumentsShouldFail() { - assertThrows(IllegalArgumentException.class, () -> listSupportedHashTypes.apply(null)); - } - - @Test - public void getSupportedHashAlgorithmsCalledWithParametersShouldFail() { - assertThrows(IllegalArgumentException.class, () -> listSupportedHashTypes.apply(Collections.singletonList("bogus"))); - } - - @Test - public void listSupportedHashTypesReturnsAtMinimumTheHashingAlgorithmsThatMustBeSupported() { - final List requiredAlgorithmsByJava = Arrays.asList("MD5", "SHA", "SHA-256"); // These are required for all Java platforms (see java.security.MessageDigest). Note: SHA is SHA-1 - final Collection supportedHashes = listSupportedHashTypes.apply(Collections.emptyList()); - requiredAlgorithmsByJava.forEach(a -> assertTrue(supportedHashes.contains(a))); - } - - @Test - public void nullArgumentListShouldThrowException() { - assertThrows(IllegalArgumentException.class, () -> hash.apply(null)); - } - - @Test - public void emptyArgumentListShouldThrowException() { - assertThrows(IllegalArgumentException.class, () -> hash.apply(Collections.emptyList())); - } - - @Test - public void singleArgumentListShouldThrowException() { - assertThrows(IllegalArgumentException.class, () -> hash.apply(Collections.singletonList("some value."))); - } - - @Test - public void argumentListWithMoreThanTwoValuesShouldThrowException3() { - assertThrows(IllegalArgumentException.class, () -> hash.apply(Arrays.asList("1", "2", "3"))); - } - - @Test - public void argumentListWithMoreThanTwoValuesShouldThrowException4() { - assertThrows(IllegalArgumentException.class, () -> hash.apply(Arrays.asList("1", "2", "3", "4"))); - } - - @Test - public void invalidAlgorithmArgumentShouldThrowException() { - assertThrows(IllegalArgumentException.class, () -> hash.apply(Arrays.asList("value to hash", "invalidAlgorithm"))); - } - - @Test - public void invalidNullAlgorithmArgumentShouldReturnNull() { - assertNull(hash.apply(Arrays.asList("value to hash", null))); - } - - @Test - public void nullInputForValueToHashShouldReturnHashedEncodedValueOf0x00() { - assertEquals(StringUtils.repeat('0', 32), hash.apply(Arrays.asList(null, "md5"))); - } - - @Test - public void nullInputForValueToHashShouldReturnHashedEncodedValueOf0x00InDirectStellarCall() { - final String algorithm = "'md5'"; - final Map variables = new HashMap<>(); - variables.put("toHash", null); - - assertEquals(StringUtils.repeat('0', 32), run("HASH(toHash, " + algorithm + ")", variables)); - } - - @Test - public void allAlgorithmsForMessageDigestShouldBeAbleToHash() { - final String valueToHash = "My value to hash"; - final Set algorithms = Security.getAlgorithms("MessageDigest"); - - algorithms.forEach(algorithm -> { - try { + private static final Map fileCache = new HashMap<>(); + static final Hex HEX = new Hex(StandardCharsets.UTF_8); + final HashFunctions.ListSupportedHashTypes listSupportedHashTypes = new HashFunctions.ListSupportedHashTypes(); + final HashFunctions.Hash hash = new HashFunctions.Hash(); + + @Test + public void nullArgumentsShouldFail() { + assertThrows(IllegalArgumentException.class, () -> listSupportedHashTypes.apply(null)); + } + + @Test + public void getSupportedHashAlgorithmsCalledWithParametersShouldFail() { + assertThrows(IllegalArgumentException.class, () -> listSupportedHashTypes.apply(Collections.singletonList("bogus"))); + } + + @Test + public void listSupportedHashTypesReturnsAtMinimumTheHashingAlgorithmsThatMustBeSupported() { + final List requiredAlgorithmsByJava = Arrays.asList("MD5", "SHA-256"); // These are required for all Java platforms (see java.security.MessageDigest). Note: SHA is SHA-1 + final Collection supportedHashes = listSupportedHashTypes.apply(Collections.emptyList()); + requiredAlgorithmsByJava.forEach(a -> assertTrue(supportedHashes.contains(a))); + assertTrue(supportedHashes.contains("SHA") || supportedHashes.contains("SHA-1")); + } + + @Test + public void nullArgumentListShouldThrowException() { + assertThrows(IllegalArgumentException.class, () -> hash.apply(null)); + } + + @Test + public void emptyArgumentListShouldThrowException() { + assertThrows(IllegalArgumentException.class, () -> hash.apply(Collections.emptyList())); + } + + @Test + public void singleArgumentListShouldThrowException() { + assertThrows(IllegalArgumentException.class, () -> hash.apply(Collections.singletonList("some value."))); + } + + @Test + public void argumentListWithMoreThanTwoValuesShouldThrowException3() { + assertThrows(IllegalArgumentException.class, () -> hash.apply(Arrays.asList("1", "2", "3"))); + } + + @Test + public void argumentListWithMoreThanTwoValuesShouldThrowException4() { + assertThrows(IllegalArgumentException.class, () -> hash.apply(Arrays.asList("1", "2", "3", "4"))); + } + + @Test + public void invalidAlgorithmArgumentShouldThrowException() { + assertThrows(IllegalArgumentException.class, () -> hash.apply(Arrays.asList("value to hash", "invalidAlgorithm"))); + } + + @Test + public void invalidNullAlgorithmArgumentShouldReturnNull() { + assertNull(hash.apply(Arrays.asList("value to hash", null))); + } + + @Test + public void nullInputForValueToHashShouldReturnHashedEncodedValueOf0x00() { + assertEquals(StringUtils.repeat('0', 32), hash.apply(Arrays.asList(null, "md5"))); + } + + @Test + public void nullInputForValueToHashShouldReturnHashedEncodedValueOf0x00InDirectStellarCall() { + final String algorithm = "'md5'"; + final Map variables = new HashMap<>(); + variables.put("toHash", null); + + assertEquals(StringUtils.repeat('0', 32), run("HASH(toHash, " + algorithm + ")", variables)); + } + + @Test + public void allAlgorithmsForMessageDigestShouldBeAbleToHash() { + final String valueToHash = "My value to hash"; + final Set algorithms = Security.getAlgorithms("MessageDigest"); + + algorithms.forEach(algorithm -> { + try { + final MessageDigest expected = MessageDigest.getInstance(algorithm); + expected.update(valueToHash.getBytes(StandardCharsets.UTF_8)); + + assertEquals(expectedHexString(expected), hash.apply(Arrays.asList(valueToHash, algorithm))); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + public void allAlgorithmsForMessageDigestShouldBeAbleToHashDirectStellarCall() { + final String valueToHash = "My value to hash"; + final Set algorithms = Security.getAlgorithms("MessageDigest"); + + algorithms.forEach(algorithm -> { + try { + final Object actual = run("HASH('" + valueToHash + "', '" + algorithm + "')", Collections.emptyMap()); + + final MessageDigest expected = MessageDigest.getInstance(algorithm); + expected.update(valueToHash.getBytes(StandardCharsets.UTF_8)); + + assertEquals(expectedHexString(expected), actual); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + public void nonStringValueThatIsSerializableHashesSuccessfully() throws Exception { + final String algorithm = "'md5'"; + final String valueToHash = "'My value to hash'"; + final Serializable input = (Serializable) Collections.singletonList(valueToHash); + + final MessageDigest expected = MessageDigest.getInstance(algorithm.replace("'", "")); + expected.update(SerializationUtils.serialize(input)); + + final Map variables = new HashMap<>(); + variables.put("toHash", input); + + assertEquals(expectedHexString(expected), run("HASH(toHash, " + algorithm + ")", variables)); + } + + @Test + public void callingHashFunctionsWithVariablesAsInputHashesSuccessfully() throws Exception { + final String algorithm = "md5"; + final String valueToHash = "'My value to hash'"; + final Serializable input = (Serializable) Collections.singletonList(valueToHash); + final MessageDigest expected = MessageDigest.getInstance(algorithm); - expected.update(valueToHash.getBytes(StandardCharsets.UTF_8)); + expected.update(SerializationUtils.serialize(input)); - assertEquals(expectedHexString(expected), hash.apply(Arrays.asList(valueToHash, algorithm))); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } - }); - } + final Map variables = new HashMap<>(); + variables.put("toHash", input); + variables.put("hashType", algorithm); - @Test - public void allAlgorithmsForMessageDigestShouldBeAbleToHashDirectStellarCall() { - final String valueToHash = "My value to hash"; - final Set algorithms = Security.getAlgorithms("MessageDigest"); + assertEquals(expectedHexString(expected), run("HASH(toHash, hashType)", variables)); + } - algorithms.forEach(algorithm -> { - try { - final Object actual = run("HASH('" + valueToHash + "', '" + algorithm + "')", Collections.emptyMap()); + @Test + public void callingHashFunctionWhereOnlyHashTypeIsAVariableHashesSuccessfully() throws Exception { + final String algorithm = "md5"; + final String valueToHash = "'My value to hash'"; final MessageDigest expected = MessageDigest.getInstance(algorithm); - expected.update(valueToHash.getBytes(StandardCharsets.UTF_8)); - - assertEquals(expectedHexString(expected), actual); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } - }); - } - - @Test - public void nonStringValueThatIsSerializableHashesSuccessfully() throws Exception { - final String algorithm = "'md5'"; - final String valueToHash = "'My value to hash'"; - final Serializable input = (Serializable) Collections.singletonList(valueToHash); - - final MessageDigest expected = MessageDigest.getInstance(algorithm.replace("'", "")); - expected.update(SerializationUtils.serialize(input)); - - final Map variables = new HashMap<>(); - variables.put("toHash", input); - - assertEquals(expectedHexString(expected), run("HASH(toHash, " + algorithm + ")", variables)); - } - - @Test - public void callingHashFunctionsWithVariablesAsInputHashesSuccessfully() throws Exception { - final String algorithm = "md5"; - final String valueToHash = "'My value to hash'"; - final Serializable input = (Serializable) Collections.singletonList(valueToHash); - - final MessageDigest expected = MessageDigest.getInstance(algorithm); - expected.update(SerializationUtils.serialize(input)); - - final Map variables = new HashMap<>(); - variables.put("toHash", input); - variables.put("hashType", algorithm); - - assertEquals(expectedHexString(expected), run("HASH(toHash, hashType)", variables)); - } - - @Test - public void callingHashFunctionWhereOnlyHashTypeIsAVariableHashesSuccessfully() throws Exception { - final String algorithm = "md5"; - final String valueToHash = "'My value to hash'"; - - final MessageDigest expected = MessageDigest.getInstance(algorithm); - expected.update(valueToHash.replace("'", "").getBytes(StandardCharsets.UTF_8)); - - final Map variables = new HashMap<>(); - variables.put("hashType", algorithm); - - assertEquals(expectedHexString(expected), run("HASH(" + valueToHash + ", hashType)", variables)); - } - - @Test - public void aNonNullNonSerializableObjectReturnsAValueOfNull() { - final Map variables = new HashMap<>(); - variables.put("toHash", new Object()); - assertNull(run("HASH(toHash, 'md5')", variables)); - } - - public static String TLSH_DATA = "The best documentation is the UNIX source. After all, this is what the " - + "system uses for documentation when it decides what to do next! The " - + "manuals paraphrase the source code, often having been written at " - + "different times and by different people than who wrote the code. " - + "Think of them as guidelines. Sometimes they are more like wishes... " - + "Nonetheless, it is all too common to turn to the source and find " - + "options and behaviors that are not documented in the manual. Sometimes " - + "you find options described in the manual that are unimplemented " - + "and ignored by the source."; - String TLSH_EXPECTED = "6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F8"; - - @Test - public void tlsh_happyPath() { - final Map variables = new HashMap<>(); - - variables.put("toHash", TLSH_DATA); - variables.put("toHashBytes", TLSH_DATA.getBytes(StandardCharsets.UTF_8)); - //this value is pulled from a canonical example at https://github.com/idealista/tlsh#how-to-calculate-a-hash - assertEquals(TLSH_EXPECTED, run("HASH(toHash, 'tlsh')", variables)); - assertEquals(TLSH_EXPECTED, run("HASH(toHash, 'TLSH')", variables)); - assertEquals(TLSH_EXPECTED, run("HASH(toHashBytes, 'tlsh')", variables)); - } - - @Test - @SuppressWarnings("unchecked") - public void tlsh_multiBin() { - final Map variables = new HashMap<>(); - - variables.put("toHash", TLSH_DATA); - Map out = (Map)run("HASH(toHash, 'tlsh', { 'hashes' : [ 8, 16, 32 ]} )", variables); - - assertTrue(out.containsKey(TLSHHasher.TLSH_KEY)); - for(int h : ImmutableList.of(8, 16, 32)) { - assertTrue(out.containsKey(TLSHHasher.TLSH_BIN_KEY + "_" + h)); + expected.update(valueToHash.replace("'", "").getBytes(StandardCharsets.UTF_8)); + + final Map variables = new HashMap<>(); + variables.put("hashType", algorithm); + + assertEquals(expectedHexString(expected), run("HASH(" + valueToHash + ", hashType)", variables)); + } + + @Test + public void aNonNullNonSerializableObjectReturnsAValueOfNull() { + final Map variables = new HashMap<>(); + variables.put("toHash", new Object()); + assertNull(run("HASH(toHash, 'md5')", variables)); } - } - - - @Test - public void tlsh_multithread() { - //we want to ensure that everything is threadsafe, so we'll spin up some random data - //generate some hashes and then do it all in parallel and make sure it all matches. - Map>, String> hashes = new HashMap<>(); - Random r = new Random(0); - for(int i = 0;i < 20;++i) { - byte[] d = new byte[256]; - r.nextBytes(d); - Map config = new HashMap() - {{ - put(TLSHHasher.Config.BUCKET_SIZE.key, r.nextBoolean() ? 128 : 256); - put(TLSHHasher.Config.CHECKSUM.key, r.nextBoolean() ? 1 : 3); - }}; - String hash = (String)run("HASH(data, 'tlsh', config)", ImmutableMap.of("config", config, "data", d)); - assertNotNull(hash); - hashes.put(new AbstractMap.SimpleEntry<>(d, config), hash); + + public static String TLSH_DATA = "The best documentation is the UNIX source. After all, this is what the " + + "system uses for documentation when it decides what to do next! The " + + "manuals paraphrase the source code, often having been written at " + + "different times and by different people than who wrote the code. " + + "Think of them as guidelines. Sometimes they are more like wishes... " + + "Nonetheless, it is all too common to turn to the source and find " + + "options and behaviors that are not documented in the manual. Sometimes " + + "you find options described in the manual that are unimplemented " + + "and ignored by the source."; + String TLSH_EXPECTED = "6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F8"; + + @Test + public void tlsh_happyPath() { + final Map variables = new HashMap<>(); + + variables.put("toHash", TLSH_DATA); + variables.put("toHashBytes", TLSH_DATA.getBytes(StandardCharsets.UTF_8)); + //this value is pulled from a canonical example at https://github.com/idealista/tlsh#how-to-calculate-a-hash + assertEquals(TLSH_EXPECTED, run("HASH(toHash, 'tlsh')", variables)); + assertEquals(TLSH_EXPECTED, run("HASH(toHash, 'TLSH')", variables)); + assertEquals(TLSH_EXPECTED, run("HASH(toHashBytes, 'tlsh')", variables)); + } + + @Test + @SuppressWarnings("unchecked") + public void tlsh_multiBin() { + final Map variables = new HashMap<>(); + + variables.put("toHash", TLSH_DATA); + Map out = (Map) run("HASH(toHash, 'tlsh', { 'hashes' : [ 8, 16, 32 ]} )", variables); + + assertTrue(out.containsKey(TLSHHasher.TLSH_KEY)); + for (int h : ImmutableList.of(8, 16, 32)) { + assertTrue(out.containsKey(TLSHHasher.TLSH_BIN_KEY + "_" + h)); + } + } + + + @Test + public void tlsh_multithread() { + //we want to ensure that everything is threadsafe, so we'll spin up some random data + //generate some hashes and then do it all in parallel and make sure it all matches. + Map>, String> hashes = new HashMap<>(); + Random r = new Random(0); + for (int i = 0; i < 20; ++i) { + byte[] d = new byte[256]; + r.nextBytes(d); + Map config = new HashMap() {{ + put(TLSHHasher.Config.BUCKET_SIZE.key, r.nextBoolean() ? 128 : 256); + put(TLSHHasher.Config.CHECKSUM.key, r.nextBoolean() ? 1 : 3); + }}; + String hash = (String) run("HASH(data, 'tlsh', config)", ImmutableMap.of("config", config, "data", d)); + assertNotNull(hash); + hashes.put(new AbstractMap.SimpleEntry<>(d, config), hash); + } + ForkJoinPool forkJoinPool = new ForkJoinPool(5); + + forkJoinPool.submit(() -> + hashes.entrySet().parallelStream().forEach( + kv -> { + Map config = kv.getKey().getValue(); + byte[] data = kv.getKey().getKey(); + String hash = (String) run("HASH(data, 'tlsh', config)", ImmutableMap.of("config", config, "data", data)); + assertEquals(hash, kv.getValue()); + } + ) + ); } - ForkJoinPool forkJoinPool = new ForkJoinPool(5); - - forkJoinPool.submit(() -> - hashes.entrySet().parallelStream().forEach( - kv -> { - Map config = kv.getKey().getValue(); - byte[] data = kv.getKey().getKey(); - String hash = (String)run("HASH(data, 'tlsh', config)", ImmutableMap.of("config", config, "data", data)); - assertEquals(hash, kv.getValue()); - } - ) - ); - } - - @Test - @SuppressWarnings("unchecked") - public void tlsh_similarity() { - for(Map.Entry kv : ImmutableMap.of("been", "ben", "document", "dokumant", "code", "cad").entrySet()) { - Map variables = ImmutableMap.of("toHash", TLSH_DATA, "toHashSimilar", TLSH_DATA.replace(kv.getKey(), kv.getValue())); - Map bin1 = (Map) run("HASH(toHashSimilar, 'tlsh', { 'hashes' : 4, 'bucketSize' : 128 })", variables); - Map bin2 = (Map) run("HASH(toHash, 'tlsh', { 'hashes' : [ 4 ], 'bucketSize' : 128 })", variables); - assertEquals(bin1.get("tlsh_bin"), bin2.get("tlsh_bin"), kv.getKey() + " != " + kv.getValue() + " because " + bin1.get("tlsh") + " != " + bin2.get("tlsh")); - assertNotEquals(bin1.get("tlsh"), bin2.get("tlsh")); - Map distVariables = ImmutableMap.of("hash1", bin1.get(TLSHHasher.TLSH_KEY), "hash2", bin2.get(TLSHHasher.TLSH_KEY)); - { - //ensure the diff is minimal - Integer diff = (Integer) run("TLSH_DIST( hash1, hash2)", distVariables); - Integer diffReflexive = (Integer) run("TLSH_DIST( hash2, hash1)", distVariables); - assertTrue(diff < 100, "diff == " + diff); - assertEquals(diff, diffReflexive); - } - - { - //ensure that d(x,x) == 0 - Integer diff = (Integer) run("TLSH_DIST( hash1, hash1)", distVariables); - assertEquals((int)0, (int)diff); - } + + @Test + @SuppressWarnings("unchecked") + public void tlsh_similarity() { + for (Map.Entry kv : ImmutableMap.of("been", "ben", "document", "dokumant", "code", "cad").entrySet()) { + Map variables = ImmutableMap.of("toHash", TLSH_DATA, "toHashSimilar", TLSH_DATA.replace(kv.getKey(), kv.getValue())); + Map bin1 = (Map) run("HASH(toHashSimilar, 'tlsh', { 'hashes' : 4, 'bucketSize' : 128 })", variables); + Map bin2 = (Map) run("HASH(toHash, 'tlsh', { 'hashes' : [ 4 ], 'bucketSize' : 128 })", variables); + assertEquals(bin1.get("tlsh_bin"), bin2.get("tlsh_bin"), kv.getKey() + " != " + kv.getValue() + " because " + bin1.get("tlsh") + " != " + bin2.get("tlsh")); + assertNotEquals(bin1.get("tlsh"), bin2.get("tlsh")); + Map distVariables = ImmutableMap.of("hash1", bin1.get(TLSHHasher.TLSH_KEY), "hash2", bin2.get(TLSHHasher.TLSH_KEY)); + { + //ensure the diff is minimal + Integer diff = (Integer) run("TLSH_DIST( hash1, hash2)", distVariables); + Integer diffReflexive = (Integer) run("TLSH_DIST( hash2, hash1)", distVariables); + assertTrue(diff < 100, "diff == " + diff); + assertEquals(diff, diffReflexive); + } + + { + //ensure that d(x,x) == 0 + Integer diff = (Integer) run("TLSH_DIST( hash1, hash1)", distVariables); + assertEquals((int) 0, (int) diff); + } + } + } + + + public static Stream tlshHashFromHexParams() { + return Stream.of( + Arguments.of(TLSHBuilder.CHECKSUM_OPTION.CHECKSUM_1, + TLSHBuilder.BUCKET_OPTION.BUCKET_128, + "DD6000030030000C000000000C300CC00000C000030000000000F00030F0C00300CCC0", + "F87000008008000822B80080002C82A000808002800C003020000B2830202008A83A22", + 166, 165 + ), + Arguments.of(TLSHBuilder.CHECKSUM_OPTION.CHECKSUM_1, + TLSHBuilder.BUCKET_OPTION.BUCKET_256, + "DD6000C300F000030003003FC00000000000C003000000CC000030033000C000030000030030000C000000000C300CC00000C000030000000000F00030F0C00300CCC0", + "F87000200B0E0880008200A2800080C00000080000220222020080AC0280A0C0A2008A008008000822B80080002C82A000808002800C003020000B2830202008A83A22", + 332, 331 + ), + Arguments.of(TLSHBuilder.CHECKSUM_OPTION.CHECKSUM_3, + TLSHBuilder.BUCKET_OPTION.BUCKET_128, + "DDB56E6000030030000C000000000C300CC00000C000030000000000F00030F0C00300CCC0", + "F861367000008008000822B80080002C82A000808002800C003020000B2830202008A83A22", + 166, 165 + ), + Arguments.of(TLSHBuilder.CHECKSUM_OPTION.CHECKSUM_3, + TLSHBuilder.BUCKET_OPTION.BUCKET_256, + "DDB56E6000C300F000030003003FC00000000000C003000000CC000030033000C000030000030030000C000000000C300CC00000C000030000000000F00030F0C00300CCC0", + "F861367000200B0E0880008200A2800080C00000080000220222020080AC0280A0C0A2008A008008000822B80080002C82A000808002800C003020000B2830202008A83A22", + 332, 331 + ) + ); + } + + @ParameterizedTest + @MethodSource("tlshHashFromHexParams") + public void testTLSHHexDistance(TLSHBuilder.CHECKSUM_OPTION checksumOption, TLSHBuilder.BUCKET_OPTION bucketOption, + String hashHex1, String hashHex2, int expectedScore1, int expectedScore2) { + TLSHBuilder builder = new TLSHBuilder(checksumOption, bucketOption); + TLSH tlsh1 = builder.fromHex(hashHex1); + TLSH tlsh2 = builder.fromHex(hashHex2); + TLSHScorer scorer = new TLSHScorer(); + + Assertions.assertEquals(0, scorer.score(tlsh1, tlsh1, true)); + Assertions.assertEquals(expectedScore1, scorer.score(tlsh1, tlsh2, true)); + Assertions.assertEquals(expectedScore2, scorer.score(tlsh1, tlsh2, false)); + } + + public static Stream tlshHashFromFileParams() { + return Stream.of( + Arguments.of(TLSHBuilder.CHECKSUM_OPTION.CHECKSUM_1, + TLSHBuilder.BUCKET_OPTION.BUCKET_128, + "DD6000030030000C000000000C300CC00000C000030000000000F00030F0C00300CCC0", + "F87000008008000822B80080002C82A000808002800C003020000B2830202008A83A22", + "45D18407A78523B35A030267671FA2C2F725402973629B25545EB43C3356679477F7FC", + 165, 137 + ), + Arguments.of(TLSHBuilder.CHECKSUM_OPTION.CHECKSUM_1, + TLSHBuilder.BUCKET_OPTION.BUCKET_256, + "DD6000C300F000030003003FC00000000000C003000000CC000030033000C000030000030030000C000000000C300CC00000C000030000000000F00030F0C00300CCC0", + "F87000200B0E0880008200A2800080C00000080000220222020080AC0280A0C0A2008A008008000822B80080002C82A000808002800C003020000B2830202008A83A22", + "45D1A40CE601EFD21E62648F2A9554F0E199E9B01B84213B6BE0DB5E2DA71FA898DFEB07A78123B35A030227671FA2C2F725402973629B25545EB43C3312679477F3FC", + 331, 206 + ), + Arguments.of(TLSHBuilder.CHECKSUM_OPTION.CHECKSUM_3, + TLSHBuilder.BUCKET_OPTION.BUCKET_128, + "DDB56E6000030030000C000000000C300CC00000C000030000000000F00030F0C00300CCC0", + "F861367000008008000822B80080002C82A000808002800C003020000B2830202008A83A22", + "4513E4D18407A78523B35A030267671FA2C2F725402973629B25545EB43C3356679477F7FC", + 165, 137 + ), + Arguments.of(TLSHBuilder.CHECKSUM_OPTION.CHECKSUM_3, + TLSHBuilder.BUCKET_OPTION.BUCKET_256, + "DDB56E6000C300F000030003003FC00000000000C003000000CC000030033000C000030000030030000C000000000C300CC00000C000030000000000F00030F0C00300CCC0", + "F861367000200B0E0880008200A2800080C00000080000220222020080AC0280A0C0A2008A008008000822B80080002C82A000808002800C003020000B2830202008A83A22", + "4513E4D1A40CE601EFD21E62648F2A9554F0E199E9B01B84213B6BE0DB5E2DA71FA898DFEB07A78123B35A030227671FA2C2F725402973629B25545EB43C3312679477F3FC", + 331, 206 + ) + ); + } + + @ParameterizedTest + @MethodSource("tlshHashFromFileParams") + public void testTLSHHashFromFile(TLSHBuilder.CHECKSUM_OPTION checksumOption, TLSHBuilder.BUCKET_OPTION bucketOption, + String expectedHash1, String expectedHash2, String expectedFileHash, int expectedScore, int expectedScoreFile) { + byte[] fileBytes = getFileBytes(new File("src/test/resources/0Alice.txt")); + byte[] file2Bytes = getFileBytes(new File("src/test/resources/website_course_descriptors06-07.txt")); + TLSHBuilder builder = new TLSHBuilder(checksumOption, bucketOption); + TLSHScorer scorer = new TLSHScorer(); + + TLSH tlsh1 = builder.getTLSH("Hello world!".getBytes()); + builder.clean(); + TLSH tlsh2 = builder.getTLSH("Goodbye Cruel World".getBytes()); + builder.clean(); + TLSH tlsh3 = builder.getTLSH(fileBytes); + builder.clean(); + TLSH tlsh4 = builder.getTLSH(file2Bytes); + final int score = scorer.score(tlsh1, tlsh2, false); + final int scoreFile = scorer.score(tlsh3, tlsh4, true); + + assertEquals(expectedHash1, tlsh1.getHash()); + assertEquals(expectedHash2, tlsh2.getHash()); + assertEquals(expectedFileHash, tlsh3.getHash()); + assertEquals(expectedScore, score); + assertEquals(expectedScoreFile, scoreFile); + } + + @Test + public void tlshDist_invalidInput() { + final Map variables = new HashMap<>(); + variables.put("hash1", 1); + variables.put("hash2", TLSH_EXPECTED); + assertThrows(Exception.class, () -> run("TLSH_DIST( hash1, hash1)", variables)); + assertThrows(Exception.class, () -> run("TLSH_DIST( hash1, hash1, { 'checksumBytes' : 1, 'bucketSize' : 128 })", variables)); + assertThrows(Exception.class, () -> run("TLSH_DIST( hash1, hash1, { 'checksumBytes' : 1, 'bucketSize' : 256 })", variables)); + assertThrows(Exception.class, () -> run("TLSH_DIST( hash1, hash1, { 'checksumBytes' : 3, 'bucketSize' : 128 })", variables)); + assertThrows(Exception.class, () -> run("TLSH_DIST( hash1, hash1, { 'checksumBytes' : 3, 'bucketSize' : 256 })", variables)); + } + + private String expectedHexString(MessageDigest expected) { + return new String(HEX.encode(expected.digest()), StandardCharsets.UTF_8); + } + + public static byte[] getFileBytes(File exampleFile) { + // Would be nice to use Map.computeIfAbsent but that requires 1.8-level + // source compatibility + byte[] bytes = fileCache.get(exampleFile); + if (bytes == null) { + try { + bytes = Files.readAllBytes(exampleFile.toPath()); + } catch (IOException e) { + throw new RuntimeException("Cannot read file " + exampleFile, e); + } + fileCache.put(exampleFile, bytes); + + } + return bytes; } - } - - @Test - public void tlshDist_invalidInput() { - final Map variables = new HashMap<>(); - variables.put("hash1", 1); - variables.put("hash2", TLSH_EXPECTED); - assertThrows(Exception.class, () -> run("TLSH_DIST( hash1, hash1)", variables)); - } - - @Test - public void tlsh_insufficientComplexity() { - final Map variables = new HashMap<>(); - String data = "Metron is the best"; - variables.put("toHash", data); - assertNull(run("HASH(toHash, 'tlsh')", variables)); - } - - @Test - public void tlsh_nullInput() { - final Map variables = new HashMap<>(); - String data = null; - variables.put("toHash", data); - assertNull(run("HASH(toHash, 'tlsh')", variables)); - } - - private String expectedHexString(MessageDigest expected) { - return new String(HEX.encode(expected.digest()), StandardCharsets.UTF_8); - } } diff --git a/flink-cyber/flink-stellar/src/test/resources/0Alice.txt b/flink-cyber/flink-stellar/src/test/resources/0Alice.txt new file mode 100644 index 000000000..e704a2931 --- /dev/null +++ b/flink-cyber/flink-stellar/src/test/resources/0Alice.txt @@ -0,0 +1,86 @@ + + +London Children’s Film Festival 2006: Teachers’ Resource +Alice in Wonderland (Alice au pays des merveilles) (7+) + +Introduction + +Alice in Wonderland has been adapted many times for the big screen, and this version, made in the late 1940s is little known, largely as a result of having been eclipsed on its release by the now far better-known Disney version. While the colour may now have faded, this imaginative combination of real actors, 3D puppet animation and some avant-garde set design in Wonderland is still as strange and charming as ever. + +Closer to Carroll’s words and Tenniel’s original illustrations than any other version, characters in this adaptation also sing in several scenes. Also, like The Wizard of Oz, reality and dream overlap as some of the people in Alice’s world reappear as inhabitants of Wonderland. + +Synopsis + +In the gardens of an Oxford College in Victorian England, Alice and her two older sisters are playing fancy dress with one of the young tutors, Mr Dodgson. Soon afterwards, the college prepares for an important visitor, Queen Victoria. When she arrives she is very haughty but also very interested in Mr Dodgson, who she knows to write under the name ‘Lewis Carroll’. + +To get away from the grown ups, Mr Dodgson takes the girls on a boat trip down the river, and as he rows he starts to make up a story about a little girl called Alice. The story tells of how one day, bored as she sits while her older sister reads, she notices a rabbit, fully dressed, dashing past and worrying about being late. Curious, she follows him down a rabbit hole and finds herself in… Wonderland! + +Audiences + +Alice in Wonderland is suitable for KS2 pupils and contains links to Literacy, Art & Design and History. + +Curriculum Links and Suggested Classroom Activities + +Literacy +Before the screening +* Discuss in groups what is known of the original text, mapping the main events and discussing the main characters. What might be difficult about adapting the book for the screen? Think in particular about what is ‘real’ and what is ‘imaginary’, a central theme of the book. + +* Read a chapter of the original text. Investigate it for word play, riddles, rhymes and ‘nonsense’. In Wonderland, where language is turned upside down, anything is possible. Look at the text of the Mad Hatter’s Tea Party in Chapter 7. Write a ‘normal’ dialogue between two characters and turn it into nonsense, thinking carefully about how meaning changes with structure, vocabulary and intent. + +After the screening +* Compare the film adaptation to the original text, in story, characters and themes. Compare it to other film adaptations. + +* Take no more than two pages from the original text and adapt it into a screenplay to be acted out. In screenplays, the characters’ names are in capitals, the text is centred on the page, there are no speech marks and minimal direction for the actors. + +* Compose a nonsense poem or simple rhyme as a shape poem to fit an outline of one of the characters, either about them or from their point of view. + +* Did children spot any of the actors ‘reappearing’ as voices of the puppet characters in Wonderland? What did these fantasy characters have in common with their real life counterparts? Identify the common characteristics of animals and each child design an animal character to resemble themselves and write a description of them (clothing, habits, interests, favourite food). + +Art & Design +Model animation +* The inhabitants of Wonderland are all puppets in this film, filmed with stop-motion animation like the Wallace and Gromit films. The figures are photographed one frame at a time, with tiny movements made in between. When the film is played, the figures seem to move of their own accord. Compare Tenniels’ illustrations in the original publication with the puppets and actors in the film. What has been changed? Do children prefer one or the other, and why? Children can design their own 3D models of the characters. + +The special effects +* In Wonderland, by combining the actress who played Alice with oversize props, as with the ‘Drink Me’ bottle, the filmmakers created the illusion is that she had actually shrunk to a few centimetres tall. Then, putting her among tiny props, as in The White Rabbit’s house, she appears to grow to giant size. Pupils could design and create different size props (tiny chair, huge chair) and explore in-camera special effects, making a short animated film using stop-frame animation with a digital camera in which characters appear to grow and shrink like Alice. + +History +* Discuss the Victorian setting of the film. How do the costumes, horse-drawn carriages, behaviour of characters and other elements in the film compare with what is known of life in Victorian England? Find images of these examples and compare them with images from the film from the website listed in ‘Weblinks’, below. + +* The film was made in the late 1940s. Which parts of the set design seem ‘modern’, and which seem more traditionally Victorian? Why do you think the filmmakers decided to include such diverse looking sets – how do they help us to understand the story? Think about the room full of doors, the Duchess’s kitchen, the formal gardens, the tea party – how do they compare to what we know from images of Victorian England? + + + +Weblinks + +Alice in Wonderland: Film and TV productions over the years +A detailed overview of this film adaptation, including colour stills and information on the production. +http://www.alice-in-wonderland.fsnet.co.uk/film_tv_marsh.htm + +The Lewis Carroll Society +An academic site, useful for dates and details about the life of the author and his work. +www.lewiscarrollsociety.org.uk + +Film Street website with more information about animation +http://www.filmstreet.co.uk + +We suggest that teachers and parents check the suitability of recommended books, films and other media for the children in their care before use. London Children’s Film Festival cannot be responsible for the content of any recommended media, including websites. + +Film Details + +Country UK / France 1951 +Director Dallas Bower +Running time 83 min +Language English + +  + +  + + + + + + + + © Barbican Education 2006 + diff --git a/flink-cyber/flink-stellar/src/test/resources/website_course_descriptors06-07.txt b/flink-cyber/flink-stellar/src/test/resources/website_course_descriptors06-07.txt new file mode 100644 index 000000000..72b46925a --- /dev/null +++ b/flink-cyber/flink-stellar/src/test/resources/website_course_descriptors06-07.txt @@ -0,0 +1,130 @@ +Generic Courses +Courses running to July 2007 + +Good Practice and Child Protection (3 Hours) +Protect yourself, the young people you are coaching and your employer by understanding and following good coaching practice. Learn about child abuse and how to handle situations if you have concerns. +This workshop will help you, the coach to: +* Identify good coaching practice to promote a positive relationship with children +* Identify sport situations and coaching practice that might constitute either poor practice or possible abuse +* Identify ways of dealing with your own feelings about child abuse and state what constitutes neglect, physical, sexual and emotional abuse +* Recognise the signs and symptoms of abuse and appreciate why reporting it is often so difficult +* Identity appropriate action if a child discloses he/she has been abused +* Identity appropriate action if abuse is suspected and explain the role and responsibilities of other experts (e.g. police, social services) +* Describe appropriate practice that reduces the likelihood of abuse occurring + +Equity in Your Coaching (3 Hours) +Everyone should have access to sport, and as a coach, you have an important role to play in ensuring this happens. This workshop will help you to apply and extend your existing skills to meet the needs of present and potential participants. +This workshop will help you, the coach to: +* Explain what equity means and why it is important +* Identify barriers to participation +* Use appropriate language and terminology +* Identify and challenge inequitable behaviour +* Interpret the legal framework that affects coaching +* Identify how they can become more equitable +* Establish where to go for further information + +How to Coach Disabled People in Sport (2 Hours) +This workshop tackles all the frequently asked questions posed by sports teachers, coaches and participants about how to work with disabled sports people. This includes a whole spectrum of new ideas for inclusion, the workshop will introduce and offer guidance to any coach involved with disabled people in sport, the emphasis being to introduce coaches to the Inclusion Spectrum and effective practice. +At the end of the workshop, coaches will be able to: +* Determine how to include disabled people in sport +* Select appropriate coaching activities +* Create effective coaching environments + +Emergency Aid (3 Hours) +Having an up to date Emergency Aid certificate is a must for any coach. Therefore, if your certificate has just expired or you need to get some basic emergency aid training then book onto this course. +The tutor will run through and give you professional guidance on the following areas: +* Choking and heart attacks +* Cuts and bruises +* Sprains +* Head injuries +* C.P.R +This is a certificated course. Limited spaces available. + +Appointed Persons 1st Aid (8 Hours) +This in an extensive overview of first aid practices for sport. It covers the above, but in more detail. Limited spaces available. + +* Awareness of personal hygiene/use of gloves/looking after equipment +* Management of the first aid scene/ what to do in an emergency +* Calling the emergency services +* Casualty assessment - primary/ secondary +* Management of the unconscious casualty +* Managing a casualty who is not breathing +* Managing a casualty who has no pulse +* Managing a casualty who is wounded/bleeding/choking +* Managing a casualty who is having a heart attack +* Treating shock +* Miscellaneous injuries and conditions (depending on local conditions) +* Understanding the duties of the appointed person +* Know and be able to use the contents of a first aid kit +* Maintain simple factual records, & recognise the importance of personal hygiene +* Due to the short nature of the course some subjects may not be covered in any depth. + +Injury Prevention and Management (3 Hours) +Learn about why injuries occur and how to prevent them. Find out the best way to deal with them if they do happen and how to help players return to training quickly but safely. +This workshop will help you, the coach to: +* Use appropriate strategies to reduce the likelihood of injury +* Assess the severity of an injury or accident and respond appropriately +* Know how to deal with soft tissue injuries +* Assess if a player is ready to resume training or competition + +Coach Better Session 1 (3 Hours - Theory) +* Understand the difference between the science of coaching, and the art of coaching +* Explain what sets a ‘high quality’ coach apart from the rest +* Determine what type of coach you are +* Decide how YOU can best improve as a coach +* Create your own ‘learning environment’ around you +* Feel supported in your bid to become a better coach + +Coach Better Session 2 (3 Hours - Practical) +* Put Session 1 into practice +* Learn to review every session you deliver +* Save time when preparing and planning sessions +* Make the most of feedback you receive +* Learn lots of games and tricks from your coaching colleagues + Learn by experience, and have FUN… + + + + + +An Introduction to Long Term Athlete Development (3 hours) +This theory–based workshop is aimed at coaches to enable them to understand the key concepts of LTAD and what it means to them and their practice. + +This workshop can be tailored to suit the needs of the sports represented. +By the end of this workshop coaches will be able to +* Identify and recognise the reasons for adopting LTAD +* Identify and recognise the concepts and key principles of LTAD +* Recognise and respond to the implication for coaches and coaching +* Identify appropriate action to integrate LTAD into their coaching + +The FUNdamentals of Movement (3 hours - Practical) +This practical workshop explores the concepts of Agility, Balance, Co-ordination and speed. It compliments other coach education resources which may focus on the movement skills of running, hopping, skipping etc. This workshop assists coaches to observe, analyse and coach good movement patterns within their sessions whether multi skill or sport specific. + +A pre-requisite for this workshop is that coaches should have an understanding of the Long Term Athlete Development Model and associated player pathway within their sport. + +By the end of this workshop coaches should be familiar with the following concepts: +* Agility - dynamic stability, starting and stopping, momentum, acceleration, rhythm +* Balance - establishing a stable core, static stability, exploring centre gravity and base of support +* Coordination - related to disassociating body parts and the coordination of explosive actions including jumping, throwing, striking and kicking + +Speed Agility and Quickness (S.A.Q) Taster Session (2 hours) £15 + +Once thought to be genetic, we now know that the skills required to move laterally, linearly and vertically with speed and precision can be trained and nurtured. Agility is the ability to change direction without the loss of balance, strength, speed or body control, it is fundamental for improved performance in athletes of all standards. Often neglected in traditional training sessions, SAQ Programmes emphasise the importance of agility training to equip athletes with the best methods and techniques for greater quickness, speed, control and movement. Agility training has many other benefits for the athlete including a reduced risk of injury and improved body awareness. + + +Running Sports Courses +Funding Your Club (3 Hours) +Every sports club could use more money. If you need to know how to raise cash more effectively for your club, then this course will provide you with practical knowledge about generating funds from all kinds of sources. If you need to get more club members, generate more publicity to attract or keep sponsors or promote a specific event for spectators or participants then this is the perfect workshop for you. + + + + +A Club For All (3 Hours) +Do you want to make your club as equitable as possible? If so this workshop is perfect for introducing you to the best equitable practices in running a sports club. By the end of the workshop you will be able to define what sports equity means, list the benefits of sports equity for your club, identify equitable and inequitable practice, develop a basic action plan to address equity in your sports club and identify the organisations you can turn to for further advice and guidance on sports equity + + +For a more extensive list of the range of courses that can be offered as part of the generic programme please visit www.sportscoachuk.org + +For more information or to register your interest in a course not currently available, contact Sophie Barratt, Partnership Coaching Development Officer:- +Tel 01962845020 or email sophie.barratt@hants.gov.uk +