From eee128805afa28a26fd66f852269748fde42803d Mon Sep 17 00:00:00 2001 From: Andreas Misje Date: Sat, 9 Sep 2023 18:07:08 +0200 Subject: [PATCH] Add a generic hostsfile artifact (#2930) I wrote an artifact for parsing hosts files on Linux/macOS. Afterwards, I realised there already [exists one for Windows](https://github.com/Velocidex/velociraptor/blob/master/artifacts/definitions/Windows/System/HostsFile.yaml). The file format is the same on all three supported OSes, so I don't see a reason for not having a generic artifact that runs on all of them. The differences now are - I match the array of hostnames against the regex, allowing one to use anchors like "^"/"$" in the regex (instead of matching the string of hostname/aliases) - I provide flattened query as well, with one hostname per row - The parsing function is exported - The hosts filename parameter is now a glob Let me know what you want to do with this. Include it and leave the existing Windows.System.HostsFile? Replace the existing? Suggest this to belong in the exchange instead? Something else? I'll add the test when I know how to proceed. --------- Co-authored-by: Mike Cohen --- .../definitions/Generic/System/HostsFile.yaml | 91 +++++++++++++++++++ artifacts/testdata/files/hosts | 3 + artifacts/testdata/server/testcases/README.md | 75 +++++++++++++++ .../server/testcases/hostsfile.in.yaml | 18 ++++ .../server/testcases/hostsfile.out.yaml | 31 +++++++ 5 files changed, 218 insertions(+) create mode 100755 artifacts/definitions/Generic/System/HostsFile.yaml create mode 100644 artifacts/testdata/server/testcases/README.md diff --git a/artifacts/definitions/Generic/System/HostsFile.yaml b/artifacts/definitions/Generic/System/HostsFile.yaml new file mode 100755 index 00000000000..b7b35e7d0fa --- /dev/null +++ b/artifacts/definitions/Generic/System/HostsFile.yaml @@ -0,0 +1,91 @@ +name: Generic.System.HostsFile +description: | + The system hosts file maps hostnames to IP addresses. In some cases, + entries in this file take precedence and overrides the results from + the system DNS service. + + The file is a simple text file, with one line per IP address. Each + whitespace-separated word following the IP address is a hostname. + The Linux man page refers to the the first hostname as *canonical_hostname*, + and any following words as *aliases*. They are treated the same by this + artifact. + + The hosts file is typically present on all Linux-based systems (including macOS), + with entries for localhost. The same file format is also supported on Windows. + + The source *Hosts* returns each line in each hosts file that matches + the glob parameters for address and hostname. The hostname and aliases + are combined in a single column *Hostnames*. Columns returned: + + - OSPath + - Hostnames + - Comment + + Only comments that follows the hostname on the same line are captured in Comment. + Comments on their own lines are ignored. + + A second source *HostsFlattened* provides a flattened result, with each row + containing an IP address and a single hostname. + + This artifact also exports a function `parse_hostsfile()` that returns Hostname + and Aliases individually. + +reference: + - https://manpages.debian.org/bookworm/manpages/hosts.5.en.html + +export: | + LET _parse_hostsfile(OSPath) = SELECT parse_string_with_regex( + string=Line, + regex='''^[\t ]*(?P
[^\s#]+)[\t ]+(?P[^\s#]+)(?P[^#\n\r]+)?(?:[\t ]*#(?P.+))?''') AS Parsed + FROM parse_lines(filename=OSPath) + WHERE Parsed.Address + + LET parse_hostsfile(OSPath) = SELECT Parsed.Address AS Address, + Parsed.Hostname AS Hostname, + filter(list=split(sep='''\s+''', string=Parsed.Aliases), regex='.') AS Aliases, + + /* Remove any whitespace between comment character and comment: */ + regex_replace(re='''^\s+''', source=Parsed.Comment, replace='$1') AS Comment + FROM _parse_hostsfile(OSPath=OSPath) + + LET Files = SELECT OSPath FROM glob(globs=hostsFileGlobs.HostsFileGlobs) + + LET HostsFiles = SELECT * FROM foreach(row=Files, query={ + SELECT OSPath, Address, Hostname, Aliases, Comment + FROM parse_hostsfile(OSPath=OSPath) + }) + +parameters: + - name: hostsFileGlobs + description: Globs to find hosts files + type: csv + default: | + HostsFileGlobs + C:\Windows\System32\drivers\etc\hosts + /etc/hosts + - name: HostnameRegex + description: Hostname or aliases to match + default: . + type: regex + - name: AddressRegex + description: IP addresses to match + default: . + type: regex + +sources: + - name: Hosts + query: | + SELECT OSPath, Address, + filter(list= (Hostname, ) + Aliases, regex=HostnameRegex) AS Hostname, + Comment + FROM HostsFiles + WHERE Hostname AND Address =~ AddressRegex + + - name: HostsFlattened + query: | + SELECT * FROM flatten(query={ + SELECT OSPath, Address, (Hostname, ) + Aliases AS Hostname, Comment + FROM HostsFiles + }) + WHERE Address =~ AddressRegex + AND Hostname =~ HostnameRegex diff --git a/artifacts/testdata/files/hosts b/artifacts/testdata/files/hosts index e7a09f40d46..6582d41617b 100644 --- a/artifacts/testdata/files/hosts +++ b/artifacts/testdata/files/hosts @@ -19,7 +19,10 @@ # ::1 localhost 127.0.0.1 test.com +# Comment to ignore 127.0.0.2 test2.com +# Comment to ignore 127.0.3.3 standardcomment.com # testing standard comment +# Comment to ignore 127.0.3.4 second.com standardcomment2.com # testing standard comment 8.8.8.8 evil.com diff --git a/artifacts/testdata/server/testcases/README.md b/artifacts/testdata/server/testcases/README.md new file mode 100644 index 00000000000..b558caf458a --- /dev/null +++ b/artifacts/testdata/server/testcases/README.md @@ -0,0 +1,75 @@ +## Velociraptor Golden Tests + +The files in this directory are the golden test suite used by the CI +pipeline. + +What are Golden tests? Golden testing is a methodology to quickly and +efficiently write tests: + +1. First a test case is written with the VQL queries that should be + run. These queries are written in a file with a `.in.yaml` + extension. +2. The `golden` test runner can be run on the test files using `make + golden` at the top level of this repository. +3. If the output of the queries is different from the existing output + (stored in `.out.yaml` files) the test will fail. The golden runner + will then update the output file with the new data. +4. The user can compare the changes in the output file (e.g. using + `git diff`) and if the changes are OK then simply `git add` the new + output file. Running the golden tests again should produce no + change. + +By default the makefile rule runs the debug race detector binary (you +can built this using just `make` at the top level. This will produce a +debug build in `./output/velociraptor`. This binary includes the race +detector and so it is quite slow to run but worth it for tests. + +If you find you need to iterate quicker you can manually run the +production binary (built using `make linux`) by modifying the command +run by the `make golden` command. + +Additionally you can run the `dlv` debugger in the golden output by +running `make debug_golden` at the top level. + +To filter the test cases (so they dont have to all run) you can set +the `GOLDEN` environment variable. For example to only run the tests +in `pe.in.yaml`: + +``` +$ GOLDEN=pe make golden +./output/velociraptor -v --config artifacts/testdata/windows/test.config.yaml golden artifacts/testdata/server/testcases/ --env srcDir=`pwd` --filter=pe +``` + + +## NOTES + +Golden Testing requires the output to not change between subsequent +runs and when running between different environment. This means that +output that naturally changes should be avoided - for example output +that depends on: + +- Time +- File paths +- Operating systems + +You can use a combination of mocking plugin output and selecting +specific columns to format the output in such a way that it does not +depends on ephemeral things. + + +## Developing artifacts + +When developing artifacts using TDD it is useful to load the raw +artifact YAML without needing to build the binary each time. This way +we can iterate over the artifact yaml and see the results immediately +in the golden out yaml. + +An example command line is: + +``` +./output/velociraptor-v0.7.0-linux-amd64 -v --config artifacts/testdata/windows/test.config.yaml golden artifacts/testdata/server/testcases/ --env srcDir=`pwd` --filter=hostsfile --definitions artifacts/definitions/Generic/System/ +``` + +Here the binary will force load the raw yaml definition at runtime +overriding the built in artifact definition. It will then run the +Golden test `hostsfile.in.yaml` diff --git a/artifacts/testdata/server/testcases/hostsfile.in.yaml b/artifacts/testdata/server/testcases/hostsfile.in.yaml index 60e6c925afa..53b5293fbd0 100644 --- a/artifacts/testdata/server/testcases/hostsfile.in.yaml +++ b/artifacts/testdata/server/testcases/hostsfile.in.yaml @@ -10,3 +10,21 @@ Queries: - SELECT * FROM Artifact.Windows.System.HostsFile( HostsFile=srcDir + '/artifacts/testdata/files/hosts', ResolutionRegex = '127.0.3.3') + + - LET hostsFileGlobs = (dict(HostsFileGlobs=srcDir + '/artifacts/testdata/files/hosts'),) + + - SELECT Address, Hostname, Comment + FROM Artifact.Generic.System.HostsFile( + hostsFileGlobs=hostsFileGlobs, HostnameRegex = 'second.com', source='HostsFlattened') + + - SELECT Address, Hostname, Comment + FROM Artifact.Generic.System.HostsFile( + hostsFileGlobs=hostsFileGlobs, AddressRegex = '127.0.0', source='HostsFlattened') + + - SELECT Address, Hostname, Comment + FROM Artifact.Generic.System.HostsFile( + hostsFileGlobs=hostsFileGlobs, AddressRegex = '0.3.3$', source='HostsFlattened') + + - SELECT Address, Hostname, Comment + FROM Artifact.Generic.System.HostsFile( + hostsFileGlobs=hostsFileGlobs, HostnameRegex = 'second.com', source='Hosts') diff --git a/artifacts/testdata/server/testcases/hostsfile.out.yaml b/artifacts/testdata/server/testcases/hostsfile.out.yaml index c60b4723981..bc95b9c7af6 100644 --- a/artifacts/testdata/server/testcases/hostsfile.out.yaml +++ b/artifacts/testdata/server/testcases/hostsfile.out.yaml @@ -28,4 +28,35 @@ SELECT * FROM Artifact.Windows.System.HostsFile( HostsFile=srcDir + '/artifacts/ "Comment": "testing standard comment", "_Source": "Windows.System.HostsFile" } +]LET hostsFileGlobs = (dict(HostsFileGlobs=srcDir + '/artifacts/testdata/files/hosts'),)[]SELECT Address, Hostname, Comment FROM Artifact.Generic.System.HostsFile( hostsFileGlobs=hostsFileGlobs, HostnameRegex = 'second.com', source='HostsFlattened')[ + { + "Address": "127.0.3.4", + "Hostname": "second.com", + "Comment": "testing standard comment" + } +]SELECT Address, Hostname, Comment FROM Artifact.Generic.System.HostsFile( hostsFileGlobs=hostsFileGlobs, AddressRegex = '127.0.0', source='HostsFlattened')[ + { + "Address": "127.0.0.1", + "Hostname": "test.com", + "Comment": "" + }, + { + "Address": "127.0.0.2", + "Hostname": "test2.com", + "Comment": "" + } +]SELECT Address, Hostname, Comment FROM Artifact.Generic.System.HostsFile( hostsFileGlobs=hostsFileGlobs, AddressRegex = '0.3.3$', source='HostsFlattened')[ + { + "Address": "127.0.3.3", + "Hostname": "standardcomment.com", + "Comment": "testing standard comment" + } +]SELECT Address, Hostname, Comment FROM Artifact.Generic.System.HostsFile( hostsFileGlobs=hostsFileGlobs, HostnameRegex = 'second.com', source='Hosts')[ + { + "Address": "127.0.3.4", + "Hostname": [ + "second.com" + ], + "Comment": "testing standard comment" + } ] \ No newline at end of file