Skip to content

Commit

Permalink
feat: scrub substring values with their hash (#44)
Browse files Browse the repository at this point in the history
* feat: scrub mac address from objects

* chore: define the SQL scrubber

* chore: make scrubber generic

* chore: fix some sql scrubber thingies
  • Loading branch information
mrnagydavid authored Jul 19, 2024
1 parent c94b67d commit ae87afa
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 0 deletions.
50 changes: 50 additions & 0 deletions src/scrubber.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -430,3 +430,53 @@ test('getScrubberSql', () => {
expect(scrubber.getScrubberSql('pw')).toMatchInlineSnapshot(`"'notsecret'"`)
expect(scrubber.getScrubberSql('name')).toMatchInlineSnapshot(`"'Jane Doe'"`)
})

test('saltedHashSubstringScrubber should scrub substring values', () => {
const data = {
Data: [
{ id: '01' },
{ id: 'ab02cd' },
{ id: 'ab03cd', foo: '03' },
{ id: 'ab04cd', foo: 'ab04cd' },
{ id: '01\n02\n03' },
],
}

const result = scrub(data, {
fields: {
'id,foo': {
scrubber: 'saltedHashSubstringScrubber',
params: {
regex: ['\\d\\d'],
initializationVector: 'initializationVector',
},
},
},
})

expect(result).toMatchInlineSnapshot(`
{
"Data": [
{
"id": "42bb960e91b4abf82bd6bdcc8e49cb405678ba5655a1cdc0210a4089cf2980f9",
},
{
"id": "ab5365d6a9320a362fe52dbd54a20bc58eaa775d548e20dccf58d761882201381acd",
},
{
"foo": "bb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622",
"id": "abbb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622cd",
},
{
"foo": "ab67fe825923d446fa7cd7711e66345232ab15a4bdc1cc9590b975353be70ad616cd",
"id": "ab67fe825923d446fa7cd7711e66345232ab15a4bdc1cc9590b975353be70ad616cd",
},
{
"id": "42bb960e91b4abf82bd6bdcc8e49cb405678ba5655a1cdc0210a4089cf2980f9
5365d6a9320a362fe52dbd54a20bc58eaa775d548e20dccf58d761882201381a
bb722ef61aa727e4a61aab72132badd39388204e9c6d8653c90a313a581bd622",
},
],
}
`)
})
77 changes: 77 additions & 0 deletions src/scrubbers.test.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { _stringMapEntries } from '@naturalcycles/js-lib'
import { nanoid } from '@naturalcycles/nodejs-lib'
import {
bcryptStringScrubber,
Expand All @@ -7,6 +8,7 @@ import {
charsFromRightScrubberSQL,
isoDateStringScrubber,
isoDateStringScrubberSQL,
saltedHashSubstringScrubber,
preserveOriginalScrubber,
preserveOriginalScrubberSQL,
randomEmailInContentScrubber,
Expand All @@ -25,6 +27,8 @@ import {
undefinedScrubberSQL,
unixTimestampScrubber,
unixTimestampScrubberSQL,
defaultScrubbers,
defaultScrubbersSQL,
} from './scrubbers'

const bryptStr1 = '$2a$12$HYNzBb8XYOZZeRwZDiVux.orKNqkSVAoXBDc9Gw7nSxr8rcZupbRK'
Expand Down Expand Up @@ -434,3 +438,76 @@ test('bcryptStringScrubberSQL', () => {
bcryptStringScrubberSQL({ replacements: '$2a$10$:$2a$10$456,$2a$12$:$2a$12$123' }),
).toMatchSnapshot()
})

describe('saltedHashSubstringScrubber', () => {
const initializationVector = nanoid()

test('should scrub the matching substring with a hash', () => {
const result = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', {
regex: '00:00:00:00:00:00',
initializationVector,
})

expect(result).toMatch(/foo\|.{64}\|bar/)
expect(result).not.toContain('00:00:00:00:00:00')
})

test('should scrub the same value with the same hash', () => {
const result1 = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', {
regex: '00:00:00:00:00:00',
initializationVector,
})

const result2 = saltedHashSubstringScrubber('bee|00:00:00:00:00:00|boo', {
regex: '00:00:00:00:00:00',
initializationVector,
})

expect(result1?.substring(4, 64)).toBe(result2?.substring(4, 64))
})

test('should scrub substring using regex', () => {
const result = saltedHashSubstringScrubber('foo|00:00:00:00:00:00|bar', {
regex:
'[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}:[0-9a-zA-Z]{2}',
initializationVector,
})

expect(result).toMatch(/foo\|.{64}\|bar/)
expect(result).not.toContain('00:00:00:00:00:00')
})

test('should scrub multiple occurrences', () => {
const result = saltedHashSubstringScrubber('foo|max|bar|max|boo', {
regex: 'max',
initializationVector,
})

expect(result).not.toContain('max')
})

test('should throw when the salt is missing', () => {
expect(() => saltedHashSubstringScrubber('foo|max|bar', { regex: 'max' } as any)).toThrow(
'Initialization vector is missing',
)
})

test('should throw when the regex or substring is missing', () => {
expect(() =>
saltedHashSubstringScrubber('foo|max|bar', {
initializationVector,
} as any),
).toThrow('Substring or regex is missing')
})
})

const scrubberNames = _stringMapEntries(defaultScrubbers).map(([k]) => k)
test.each(scrubberNames)('the %s should have its SQL scrubber counterpart', scrubberName => {
console.log(scrubberName, defaultScrubbersSQL[scrubberName])
expect(defaultScrubbersSQL[scrubberName]).toBeDefined()
})

const sqlScrubberNames = _stringMapEntries(defaultScrubbersSQL).map(([k]) => k)
test.each(sqlScrubberNames)('the %s should have its scrubber counterpart', scrubberName => {
expect(defaultScrubbers[scrubberName]).toBeDefined()
})
38 changes: 38 additions & 0 deletions src/scrubbers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,42 @@ export const bcryptStringScrubberSQL: BcryptStringScrubberSQLFn = params => {
END`
}

export type SaltedHashSubstringScrubberFn = ScrubberFn<
string | undefined,
SaltedHashSubstringScrubberParams
>

export type SaltedHashSubstringScrubberSQLFn = ScrubberSQLFn<SaltedHashSubstringScrubberParams>

export interface SaltedHashSubstringScrubberParams {
initializationVector: string
regex: string
}

export const saltedHashSubstringScrubber: SaltedHashSubstringScrubberFn = (value, params) => {
_assert(params?.initializationVector, 'Initialization vector is missing')
_assert(params?.regex, 'Substring or regex is missing')

if (!value) return value

const regex = new RegExp(params.regex, 'g')

return value.replace(regex, substring =>
crypto.createHash('sha256').update(substring).update(params.initializationVector).digest('hex'),
)
}

export const saltedHashSubstringScrubberSQL: SaltedHashSubstringScrubberSQLFn = params => {
_assert(params?.initializationVector, 'Initialization vector is missing')
_assert(params?.regex, 'Substring or regex is missing')

const substringToReplace = `COALESCE(REGEXP_SUBSTR(${sqlValueToReplace}, '${params.regex}'), '')`
const hashedValue = `SHA2(${substringToReplace} || '${params.initializationVector}', 256)`
const replacedValue = `REGEXP_REPLACE(${sqlValueToReplace}, '${params.regex}', ${hashedValue})`

return replacedValue
}

function nthChar(str: string, character: string, n: number): number | undefined {
let count = 0
let i = 0
Expand Down Expand Up @@ -489,6 +525,7 @@ export const defaultScrubbers: ScrubbersMap = {
saltedHashScrubber,
saltedHashEmailScrubber,
bcryptStringScrubber,
saltedHashSubstringScrubber,
}

export const defaultScrubbersSQL: ScrubbersSQLMap = {
Expand All @@ -504,4 +541,5 @@ export const defaultScrubbersSQL: ScrubbersSQLMap = {
saltedHashScrubber: saltedHashScrubberSQL,
saltedHashEmailScrubber: saltedHashEmailScrubberSQL,
bcryptStringScrubber: bcryptStringScrubberSQL,
saltedHashSubstringScrubber: saltedHashSubstringScrubberSQL,
}

0 comments on commit ae87afa

Please sign in to comment.