From 8beda88066b4eec14c56b492cc50f0b20482ef31 Mon Sep 17 00:00:00 2001 From: Andreas Misje Date: Wed, 6 Sep 2023 12:53:07 +0200 Subject: [PATCH 1/7] Add a hostsfile artifact for all three OSes --- .../definitions/Generic/System/HostsFile.yaml | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100755 artifacts/definitions/Generic/System/HostsFile.yaml diff --git a/artifacts/definitions/Generic/System/HostsFile.yaml b/artifacts/definitions/Generic/System/HostsFile.yaml new file mode 100755 index 00000000000..769a21638a0 --- /dev/null +++ b/artifacts/definitions/Generic/System/HostsFile.yaml @@ -0,0 +1,88 @@ +name: Generic.System.HostsFile +description: | + The system hosts file maps hostnames to IP addresses. In some cases, + entries in this file take precedence and overrides the results from + the system DNS service. + + The file is a simple text file, with one line per IP address. Each + whitespace-separated word following the IP address is a hostname. + The Linux man page refers to the the first hostname as *canonical_hostname*, + and any following words as *aliases*. They are treated the same by this + artifact. + + The hosts file is typically present on all Linux-based systems (including macOS), + with entries for localhost. The same file format is also supported on Windows. + + The source *Hosts* returns each line in each hosts file that matches + the glob parameters for address and hostname. The hostname and aliases + are combined in a single column *Hostnames*. Columns returned: + + - OSPath + - Hostnames + - Comment + + Only comments that follows the hostname on the same line are captured in Comment. + Comments on their own lines are ignored. + + A second source *HostsFlattened* provides a flattened result, with each row + containing an IP address and a single hostname. + + This artifact also exports a function `parse_hostsfile()` that returns Hostname + and Aliases individually. + +reference: + - https://manpages.debian.org/bookworm/manpages/hosts.5.en.html + +export: | + LET parse_hostsfile(OSPath) = SELECT Address, Hostname, + filter(list=split(sep='''\s+''', string=Aliases), + regex='.') AS Aliases, + /* Remove any whitespace between comment character and comment: */ + regex_replace(re='''^\s+''', source=Comment, replace='$1') AS Comment + FROM parse_records_with_regex(file=OSPath, + regex='''(?m)^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n]+)?(?:[\t ]*#(?P[^\n]+))?$''') + + LET Files = SELECT OSPath FROM glob(globs=hostsFileGlobs.HostsFileGlobs) + + LET HostsFiles = SELECT * + FROM foreach(row=Files, query={ + SELECT OSPath, Address, Hostname, Aliases, Comment + FROM parse_hostsfile(OSPath=OSPath) + }) + WHERE Address =~ AddressRegex + AND (Hostname =~ HostnameRegex OR Aliases =~ HostnameRegex) + +parameters: + - name: hostsFileGlobs + description: Globs to find hosts files + type: csv + default: | + HostsFileGlobs + C:\Windows\System32\drivers\etc\hosts + /etc/hosts + - name: HostnameRegex + description: Hostname or aliases to match + default: . + type: regex + - name: AddressRegex + description: IP addresses to match + default: . + type: regex + +sources: + - name: Hosts + query: | + SELECT OSPath, Address, + join(array=array(a=Hostname, b=Aliases), sep=' ') AS Hostnames, + Comment + FROM HostsFiles + + - name: HostsFlattened + query: | + SELECT OSPath, Address, Hostnames, Comment + FROM flatten(query={ + SELECT OSPath, Address, + array(a=Hostname, b=Aliases) AS Hostnames, + Comment + FROM HostsFiles + }) From 3d1f1ace59d0274c6b43b3860c0aee82d58a360b Mon Sep 17 00:00:00 2001 From: Andreas Misje Date: Sat, 9 Sep 2023 11:19:17 +0200 Subject: [PATCH 2/7] Add full-line comments to test file to better test regex --- artifacts/testdata/files/hosts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/artifacts/testdata/files/hosts b/artifacts/testdata/files/hosts index e7a09f40d46..6582d41617b 100644 --- a/artifacts/testdata/files/hosts +++ b/artifacts/testdata/files/hosts @@ -19,7 +19,10 @@ # ::1 localhost 127.0.0.1 test.com +# Comment to ignore 127.0.0.2 test2.com +# Comment to ignore 127.0.3.3 standardcomment.com # testing standard comment +# Comment to ignore 127.0.3.4 second.com standardcomment2.com # testing standard comment 8.8.8.8 evil.com From b4701281b2f8aa1c0c395ffa2a6fd7445e6dd62a Mon Sep 17 00:00:00 2001 From: Andreas Misje Date: Sat, 9 Sep 2023 11:34:07 +0200 Subject: [PATCH 3/7] Rename artifact name in test to new Generic.HostsFile --- artifacts/testdata/server/testcases/hostsfile.in.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/artifacts/testdata/server/testcases/hostsfile.in.yaml b/artifacts/testdata/server/testcases/hostsfile.in.yaml index 60e6c925afa..c1906561634 100644 --- a/artifacts/testdata/server/testcases/hostsfile.in.yaml +++ b/artifacts/testdata/server/testcases/hostsfile.in.yaml @@ -1,12 +1,12 @@ Queries: - - SELECT * FROM Artifact.Windows.System.HostsFile( + - SELECT * FROM Artifact.Generic.System.HostsFile( HostsFile=srcDir + '/artifacts/testdata/files/hosts', HostnameRegex = 'second.com') - - SELECT * FROM Artifact.Windows.System.HostsFile( + - SELECT * FROM Artifact.Generic.System.HostsFile( HostsFile=srcDir + '/artifacts/testdata/files/hosts', ResolutionRegex = '127.0.0') - - SELECT * FROM Artifact.Windows.System.HostsFile( + - SELECT * FROM Artifact.Generic.System.HostsFile( HostsFile=srcDir + '/artifacts/testdata/files/hosts', - ResolutionRegex = '127.0.3.3') + ResolutionRegex = '0.3.3$') From fb6735a3a7be6157a8d60cf7087e5a1639f2627a Mon Sep 17 00:00:00 2001 From: Mike Cohen Date: Fri, 8 Sep 2023 15:52:20 +1000 Subject: [PATCH 4/7] Added tests and refactor Also added a README to describe how to run the golden tests. --- .../definitions/Generic/System/HostsFile.yaml | 47 ++++++------ artifacts/testdata/server/testcases/README.md | 75 +++++++++++++++++++ .../server/testcases/hostsfile.in.yaml | 26 ++++++- .../server/testcases/hostsfile.out.yaml | 31 ++++++++ 4 files changed, 150 insertions(+), 29 deletions(-) create mode 100644 artifacts/testdata/server/testcases/README.md diff --git a/artifacts/definitions/Generic/System/HostsFile.yaml b/artifacts/definitions/Generic/System/HostsFile.yaml index 769a21638a0..6d926e424e7 100755 --- a/artifacts/definitions/Generic/System/HostsFile.yaml +++ b/artifacts/definitions/Generic/System/HostsFile.yaml @@ -9,7 +9,7 @@ description: | The Linux man page refers to the the first hostname as *canonical_hostname*, and any following words as *aliases*. They are treated the same by this artifact. - + The hosts file is typically present on all Linux-based systems (including macOS), with entries for localhost. The same file format is also supported on Windows. @@ -20,13 +20,13 @@ description: | - OSPath - Hostnames - Comment - + Only comments that follows the hostname on the same line are captured in Comment. Comments on their own lines are ignored. - + A second source *HostsFlattened* provides a flattened result, with each row containing an IP address and a single hostname. - + This artifact also exports a function `parse_hostsfile()` that returns Hostname and Aliases individually. @@ -34,23 +34,20 @@ reference: - https://manpages.debian.org/bookworm/manpages/hosts.5.en.html export: | - LET parse_hostsfile(OSPath) = SELECT Address, Hostname, - filter(list=split(sep='''\s+''', string=Aliases), - regex='.') AS Aliases, + LET parse_hostsfile(OSPath) = SELECT Address, Hostname, + filter(list=split(sep='''\s+''', string=Aliases), regex='.') AS Aliases, + /* Remove any whitespace between comment character and comment: */ regex_replace(re='''^\s+''', source=Comment, replace='$1') AS Comment FROM parse_records_with_regex(file=OSPath, regex='''(?m)^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n]+)?(?:[\t ]*#(?P[^\n]+))?$''') - + LET Files = SELECT OSPath FROM glob(globs=hostsFileGlobs.HostsFileGlobs) - - LET HostsFiles = SELECT * - FROM foreach(row=Files, query={ + + LET HostsFiles = SELECT * FROM foreach(row=Files, query={ SELECT OSPath, Address, Hostname, Aliases, Comment - FROM parse_hostsfile(OSPath=OSPath) + FROM parse_hostsfile(OSPath=OSPath) }) - WHERE Address =~ AddressRegex - AND (Hostname =~ HostnameRegex OR Aliases =~ HostnameRegex) parameters: - name: hostsFileGlobs @@ -73,16 +70,16 @@ sources: - name: Hosts query: | SELECT OSPath, Address, - join(array=array(a=Hostname, b=Aliases), sep=' ') AS Hostnames, - Comment - FROM HostsFiles - + filter(list= (Hostname, ) + Aliases, regex=HostnameRegex) AS Hostname, + Comment + FROM HostsFiles + WHERE Hostname AND Address =~ AddressRegex + - name: HostsFlattened query: | - SELECT OSPath, Address, Hostnames, Comment - FROM flatten(query={ - SELECT OSPath, Address, - array(a=Hostname, b=Aliases) AS Hostnames, - Comment - FROM HostsFiles - }) + SELECT * FROM flatten(query={ + SELECT OSPath, Address, (Hostname, ) + Aliases AS Hostname, Comment + FROM HostsFiles + }) + WHERE Address =~ AddressRegex + AND Hostname =~ HostnameRegex diff --git a/artifacts/testdata/server/testcases/README.md b/artifacts/testdata/server/testcases/README.md new file mode 100644 index 00000000000..b558caf458a --- /dev/null +++ b/artifacts/testdata/server/testcases/README.md @@ -0,0 +1,75 @@ +## Velociraptor Golden Tests + +The files in this directory are the golden test suite used by the CI +pipeline. + +What are Golden tests? Golden testing is a methodology to quickly and +efficiently write tests: + +1. First a test case is written with the VQL queries that should be + run. These queries are written in a file with a `.in.yaml` + extension. +2. The `golden` test runner can be run on the test files using `make + golden` at the top level of this repository. +3. If the output of the queries is different from the existing output + (stored in `.out.yaml` files) the test will fail. The golden runner + will then update the output file with the new data. +4. The user can compare the changes in the output file (e.g. using + `git diff`) and if the changes are OK then simply `git add` the new + output file. Running the golden tests again should produce no + change. + +By default the makefile rule runs the debug race detector binary (you +can built this using just `make` at the top level. This will produce a +debug build in `./output/velociraptor`. This binary includes the race +detector and so it is quite slow to run but worth it for tests. + +If you find you need to iterate quicker you can manually run the +production binary (built using `make linux`) by modifying the command +run by the `make golden` command. + +Additionally you can run the `dlv` debugger in the golden output by +running `make debug_golden` at the top level. + +To filter the test cases (so they dont have to all run) you can set +the `GOLDEN` environment variable. For example to only run the tests +in `pe.in.yaml`: + +``` +$ GOLDEN=pe make golden +./output/velociraptor -v --config artifacts/testdata/windows/test.config.yaml golden artifacts/testdata/server/testcases/ --env srcDir=`pwd` --filter=pe +``` + + +## NOTES + +Golden Testing requires the output to not change between subsequent +runs and when running between different environment. This means that +output that naturally changes should be avoided - for example output +that depends on: + +- Time +- File paths +- Operating systems + +You can use a combination of mocking plugin output and selecting +specific columns to format the output in such a way that it does not +depends on ephemeral things. + + +## Developing artifacts + +When developing artifacts using TDD it is useful to load the raw +artifact YAML without needing to build the binary each time. This way +we can iterate over the artifact yaml and see the results immediately +in the golden out yaml. + +An example command line is: + +``` +./output/velociraptor-v0.7.0-linux-amd64 -v --config artifacts/testdata/windows/test.config.yaml golden artifacts/testdata/server/testcases/ --env srcDir=`pwd` --filter=hostsfile --definitions artifacts/definitions/Generic/System/ +``` + +Here the binary will force load the raw yaml definition at runtime +overriding the built in artifact definition. It will then run the +Golden test `hostsfile.in.yaml` diff --git a/artifacts/testdata/server/testcases/hostsfile.in.yaml b/artifacts/testdata/server/testcases/hostsfile.in.yaml index c1906561634..53b5293fbd0 100644 --- a/artifacts/testdata/server/testcases/hostsfile.in.yaml +++ b/artifacts/testdata/server/testcases/hostsfile.in.yaml @@ -1,12 +1,30 @@ Queries: - - SELECT * FROM Artifact.Generic.System.HostsFile( + - SELECT * FROM Artifact.Windows.System.HostsFile( HostsFile=srcDir + '/artifacts/testdata/files/hosts', HostnameRegex = 'second.com') - - SELECT * FROM Artifact.Generic.System.HostsFile( + - SELECT * FROM Artifact.Windows.System.HostsFile( HostsFile=srcDir + '/artifacts/testdata/files/hosts', ResolutionRegex = '127.0.0') - - SELECT * FROM Artifact.Generic.System.HostsFile( + - SELECT * FROM Artifact.Windows.System.HostsFile( HostsFile=srcDir + '/artifacts/testdata/files/hosts', - ResolutionRegex = '0.3.3$') + ResolutionRegex = '127.0.3.3') + + - LET hostsFileGlobs = (dict(HostsFileGlobs=srcDir + '/artifacts/testdata/files/hosts'),) + + - SELECT Address, Hostname, Comment + FROM Artifact.Generic.System.HostsFile( + hostsFileGlobs=hostsFileGlobs, HostnameRegex = 'second.com', source='HostsFlattened') + + - SELECT Address, Hostname, Comment + FROM Artifact.Generic.System.HostsFile( + hostsFileGlobs=hostsFileGlobs, AddressRegex = '127.0.0', source='HostsFlattened') + + - SELECT Address, Hostname, Comment + FROM Artifact.Generic.System.HostsFile( + hostsFileGlobs=hostsFileGlobs, AddressRegex = '0.3.3$', source='HostsFlattened') + + - SELECT Address, Hostname, Comment + FROM Artifact.Generic.System.HostsFile( + hostsFileGlobs=hostsFileGlobs, HostnameRegex = 'second.com', source='Hosts') diff --git a/artifacts/testdata/server/testcases/hostsfile.out.yaml b/artifacts/testdata/server/testcases/hostsfile.out.yaml index c60b4723981..bc95b9c7af6 100644 --- a/artifacts/testdata/server/testcases/hostsfile.out.yaml +++ b/artifacts/testdata/server/testcases/hostsfile.out.yaml @@ -28,4 +28,35 @@ SELECT * FROM Artifact.Windows.System.HostsFile( HostsFile=srcDir + '/artifacts/ "Comment": "testing standard comment", "_Source": "Windows.System.HostsFile" } +]LET hostsFileGlobs = (dict(HostsFileGlobs=srcDir + '/artifacts/testdata/files/hosts'),)[]SELECT Address, Hostname, Comment FROM Artifact.Generic.System.HostsFile( hostsFileGlobs=hostsFileGlobs, HostnameRegex = 'second.com', source='HostsFlattened')[ + { + "Address": "127.0.3.4", + "Hostname": "second.com", + "Comment": "testing standard comment" + } +]SELECT Address, Hostname, Comment FROM Artifact.Generic.System.HostsFile( hostsFileGlobs=hostsFileGlobs, AddressRegex = '127.0.0', source='HostsFlattened')[ + { + "Address": "127.0.0.1", + "Hostname": "test.com", + "Comment": "" + }, + { + "Address": "127.0.0.2", + "Hostname": "test2.com", + "Comment": "" + } +]SELECT Address, Hostname, Comment FROM Artifact.Generic.System.HostsFile( hostsFileGlobs=hostsFileGlobs, AddressRegex = '0.3.3$', source='HostsFlattened')[ + { + "Address": "127.0.3.3", + "Hostname": "standardcomment.com", + "Comment": "testing standard comment" + } +]SELECT Address, Hostname, Comment FROM Artifact.Generic.System.HostsFile( hostsFileGlobs=hostsFileGlobs, HostnameRegex = 'second.com', source='Hosts')[ + { + "Address": "127.0.3.4", + "Hostname": [ + "second.com" + ], + "Comment": "testing standard comment" + } ] \ No newline at end of file From a8cee653d6aa2509b1d5ed07d6adda781fc4a0d2 Mon Sep 17 00:00:00 2001 From: Mike Cohen Date: Fri, 8 Sep 2023 16:35:28 +1000 Subject: [PATCH 5/7] Fix test --- artifacts/definitions/Generic/System/HostsFile.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifacts/definitions/Generic/System/HostsFile.yaml b/artifacts/definitions/Generic/System/HostsFile.yaml index 6d926e424e7..d97b988129c 100755 --- a/artifacts/definitions/Generic/System/HostsFile.yaml +++ b/artifacts/definitions/Generic/System/HostsFile.yaml @@ -40,7 +40,7 @@ export: | /* Remove any whitespace between comment character and comment: */ regex_replace(re='''^\s+''', source=Comment, replace='$1') AS Comment FROM parse_records_with_regex(file=OSPath, - regex='''(?m)^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n]+)?(?:[\t ]*#(?P[^\n]+))?$''') + regex='''(?m)^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n\r]+)?(?:[\t ]*#(?P[^\n\r]+))?$''') LET Files = SELECT OSPath FROM glob(globs=hostsFileGlobs.HostsFileGlobs) From da635137e409008e15dcb49432b4a721ca54f44d Mon Sep 17 00:00:00 2001 From: Mike Cohen Date: Fri, 8 Sep 2023 18:00:17 +1000 Subject: [PATCH 6/7] Fix regex --- artifacts/definitions/Generic/System/HostsFile.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/artifacts/definitions/Generic/System/HostsFile.yaml b/artifacts/definitions/Generic/System/HostsFile.yaml index d97b988129c..fcbb4eaecc3 100755 --- a/artifacts/definitions/Generic/System/HostsFile.yaml +++ b/artifacts/definitions/Generic/System/HostsFile.yaml @@ -40,7 +40,7 @@ export: | /* Remove any whitespace between comment character and comment: */ regex_replace(re='''^\s+''', source=Comment, replace='$1') AS Comment FROM parse_records_with_regex(file=OSPath, - regex='''(?m)^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n\r]+)?(?:[\t ]*#(?P[^\n\r]+))?$''') + regex='''(?m)^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n\r]+)?(?:[\t ]*#(?P[^\n\r]+)[\n\r]*)?$''') LET Files = SELECT OSPath FROM glob(globs=hostsFileGlobs.HostsFileGlobs) From e1c850a8d404c36e77d3bad246332b2465580155 Mon Sep 17 00:00:00 2001 From: Mike Cohen Date: Fri, 8 Sep 2023 18:17:59 +1000 Subject: [PATCH 7/7] Refactored parser to use parse_string_with_regex --- .../definitions/Generic/System/HostsFile.yaml | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/artifacts/definitions/Generic/System/HostsFile.yaml b/artifacts/definitions/Generic/System/HostsFile.yaml index fcbb4eaecc3..b7b35e7d0fa 100755 --- a/artifacts/definitions/Generic/System/HostsFile.yaml +++ b/artifacts/definitions/Generic/System/HostsFile.yaml @@ -34,20 +34,26 @@ reference: - https://manpages.debian.org/bookworm/manpages/hosts.5.en.html export: | - LET parse_hostsfile(OSPath) = SELECT Address, Hostname, - filter(list=split(sep='''\s+''', string=Aliases), regex='.') AS Aliases, + LET _parse_hostsfile(OSPath) = SELECT parse_string_with_regex( + string=Line, + regex='''^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n\r]+)?(?:[\t ]*#(?P.+))?''') AS Parsed + FROM parse_lines(filename=OSPath) + WHERE Parsed.Address - /* Remove any whitespace between comment character and comment: */ - regex_replace(re='''^\s+''', source=Comment, replace='$1') AS Comment - FROM parse_records_with_regex(file=OSPath, - regex='''(?m)^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n\r]+)?(?:[\t ]*#(?P[^\n\r]+)[\n\r]*)?$''') + LET parse_hostsfile(OSPath) = SELECT Parsed.Address AS Address, + Parsed.Hostname AS Hostname, + filter(list=split(sep='''\s+''', string=Parsed.Aliases), regex='.') AS Aliases, - LET Files = SELECT OSPath FROM glob(globs=hostsFileGlobs.HostsFileGlobs) + /* Remove any whitespace between comment character and comment: */ + regex_replace(re='''^\s+''', source=Parsed.Comment, replace='$1') AS Comment + FROM _parse_hostsfile(OSPath=OSPath) - LET HostsFiles = SELECT * FROM foreach(row=Files, query={ - SELECT OSPath, Address, Hostname, Aliases, Comment - FROM parse_hostsfile(OSPath=OSPath) - }) + LET Files = SELECT OSPath FROM glob(globs=hostsFileGlobs.HostsFileGlobs) + + LET HostsFiles = SELECT * FROM foreach(row=Files, query={ + SELECT OSPath, Address, Hostname, Aliases, Comment + FROM parse_hostsfile(OSPath=OSPath) + }) parameters: - name: hostsFileGlobs @@ -69,11 +75,11 @@ parameters: sources: - name: Hosts query: | - SELECT OSPath, Address, - filter(list= (Hostname, ) + Aliases, regex=HostnameRegex) AS Hostname, - Comment - FROM HostsFiles - WHERE Hostname AND Address =~ AddressRegex + SELECT OSPath, Address, + filter(list= (Hostname, ) + Aliases, regex=HostnameRegex) AS Hostname, + Comment + FROM HostsFiles + WHERE Hostname AND Address =~ AddressRegex - name: HostsFlattened query: |