From f370c51729ad6cc6dcb99a92af197b29a2a6b2ae Mon Sep 17 00:00:00 2001 From: tmartin-s1 <121066578+tmartin-s1@users.noreply.github.com> Date: Tue, 19 Sep 2023 18:52:15 -0400 Subject: [PATCH] Added/updated dashboards and updated the readme --- README.md | 6 +- Splunk Dashboards/dataset_by_example.xml | 47 +- Splunk Dashboards/ingestion_summary.xml | 25 +- .../sentinelone_use_case_query_examples.xml | 27 +- Splunk Dashboards/soc_search_examples.xml | 131 +++++ Splunk Dashboards/splunk_app_usage.xml | 34 +- TA_dataset/default/data/ui/nav/default.xml | 8 + .../data/ui/views/dataset_by_example.xml | 551 +++++++++--------- .../data/ui/views/ingestion_summary.xml | 79 +++ .../sentinelone_use_case_query_examples.xml | 88 +++ .../data/ui/views/soc_search_examples.xml | 131 +++++ .../data/ui/views/splunk_app_usage.xml | 350 +++++++++++ 12 files changed, 1135 insertions(+), 342 deletions(-) create mode 100644 Splunk Dashboards/soc_search_examples.xml create mode 100644 TA_dataset/default/data/ui/views/ingestion_summary.xml create mode 100644 TA_dataset/default/data/ui/views/sentinelone_use_case_query_examples.xml create mode 100644 TA_dataset/default/data/ui/views/soc_search_examples.xml create mode 100644 TA_dataset/default/data/ui/views/splunk_app_usage.xml diff --git a/README.md b/README.md index 90452c00..5e278458 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# Security Data Lake Add-On for Splunk -The Security Data Lake Add-On for Splunk provides integration with [Singularity DataLake](https://www.sentinelone.com/platform/xdr-ingestion/) and [DataSet](https://www.dataset.com) by [SentinelOne](https://sentinelone.com). The key functions allow two-way integration: +# Singularity Data Lake Add-On for Splunk +The Singularity Data Lake Add-On for Splunk provides integration with [Singularity DataLake](https://www.sentinelone.com/platform/xdr-ingestion/) and [DataSet](https://www.dataset.com) by [SentinelOne](https://sentinelone.com). The key functions allow two-way integration: - SPL custom command to query directly from the Splunk UI. - Inputs to index alerts as CIM-compliant, or any user-defined query results. - Alert action to send events from Splunk. @@ -60,7 +60,7 @@ The add-on uses Splunk encrypted secrets storage, so admins require `admin_all_o 3. Optionally, configure logging level and proxy information on the associated tabs. 4. Click Save. -5. The included Security Data Lake by Example dashboard can be used to confirm connectivity and also shows example searches to get started. +5. The included Singularity Data Lake by Example dashboard can be used to confirm connectivity and also shows example searches to get started. ## SPL Command The `| dataset` command allows queries against the [DataSet APIs](https://app.scalyr.com/help/api) directly from Splunk's search bar. diff --git a/Splunk Dashboards/dataset_by_example.xml b/Splunk Dashboards/dataset_by_example.xml index 1d4c2eb8..c5343bb8 100644 --- a/Splunk Dashboards/dataset_by_example.xml +++ b/Splunk Dashboards/dataset_by_example.xml @@ -1,5 +1,5 @@
- + maxcount=10 @@ -12,7 +12,6 @@ Show Connection Test Results - 1 "1" @@ -24,6 +23,26 @@ + + + + + + + Searching Your Data in DataSet @@ -71,7 +90,7 @@
  • Timeseries Query - This will calculate numeric values over time. For repeated queries, summaries allow precomputed results (fastest).
  • - Depending on your use case, you may have a need for any or all of these. + Depending on your use case, you may have a need for any or all of these.

    Let's get started searching!

    @@ -101,7 +120,7 @@ serverHost serverHost - | dataset method=facet field=serverHost search="serverHost=* " + | dataset method=facet field=serverHost search="serverHost=* " | spath | rename value as serverHost | table serverHost count @@ -126,22 +145,6 @@ $baseQuery$ - - -
    @@ -149,7 +152,7 @@ SPL: | dataset method=query search="$baseQuery$" $myMaxCount$ | spath - | dataset method=query search="$baseQuery$" $myMaxCount$ + | dataset method=query search="$baseQuery$" $myMaxCount$ | spath $myTime.earliest$ $myTime.latest$ @@ -260,4 +263,4 @@ - + \ No newline at end of file diff --git a/Splunk Dashboards/ingestion_summary.xml b/Splunk Dashboards/ingestion_summary.xml index 39784b1e..e70e3339 100644 --- a/Splunk Dashboards/ingestion_summary.xml +++ b/Splunk Dashboards/ingestion_summary.xml @@ -2,10 +2,10 @@ This dashboard is provided to estimate daily ingestion for various sources of data in Splunk. - index="_internal" source="*metrics.log" group="per_sourcetype_thruput" + index="_internal" source="*metrics.log" group="per_sourcetype_thruput" | bucket _time span=1d | eval GB=kb/1024/1024 -| stats sum(GB) as "GB Ingest" avg(GB) as "Average GB" max(GB) as "Max GB" avg(eps) as "Events per Second" by _time, series +| stats sum(GB) as "GB Ingest" avg(GB) as "Average GB" max(GB) as "Max GB" avg(eps) as "Events per Second" by _time, series | stats sum("GB Ingest") as "Total Ingest(GB)", avg("GB Ingest") as "Daily Avg Ingest(GB)", max("GB Ingest") as "Daily Max Ingest(GB)" by series | eval "Total Ingest(GB)"=round('Total Ingest(GB)',4), "Daily Avg Ingest(GB)"=round('Daily Avg Ingest(GB)',4), "Daily Max Ingest(GB)"=round('Daily Max Ingest(GB)',4), "Events per Second"=round('Events per Second',4) | rename series as sourcetype @@ -23,7 +23,24 @@ - + + + + + + + + Ingestion by sourcetype @@ -59,4 +76,4 @@
    - + \ No newline at end of file diff --git a/Splunk Dashboards/sentinelone_use_case_query_examples.xml b/Splunk Dashboards/sentinelone_use_case_query_examples.xml index c740650c..71232311 100644 --- a/Splunk Dashboards/sentinelone_use_case_query_examples.xml +++ b/Splunk Dashboards/sentinelone_use_case_query_examples.xml @@ -23,26 +23,29 @@ A non-Windows process writes files to the temp directory Rundll or Regsvr executes a script - Bat or cmd files are dropped directly to a temp folder + Bat or cmd files are dropped directly to a temp folder A non-Windows process injects to a Windows process LOLBins command processors masquerade under a different name and path Rundll or Regsvr run content from a remote server - Suspicious Powershell with base64 in the commandline + Suspicious Powershell with base64 in the commandline New unsigned DLL is dropped in the Windows directory (possible DLL hijack attempt) NTDS Copy Removal of indicators on Host Suspicious data compression - Allow SMB and RDP on Defender Firewall + Allow SMB and RDP on Defender Firewall Unmanaged Powershell Signed Binary Proxy Execution: mshta - Signed Binary Proxy Execution: regsvr32 + Signed Binary Proxy Execution: regsvr32 Signed Binary Proxy Execution: Rundll32 Powershell Unnecessary Escaping Signed Binary Proxy Execution: CMSTP DHCP CalloutDLL os.name matches '^Windows' AND event.category = 'file' AND tgt.file.path contains 'temp' AND tgt.file.isExecutable = 'true' AND src.process.verifiedStatus != 'verified' AND src.process.publisher != 'MICROSOFT WINDOWS' os.name matches '^Windows' AND event.category = 'file' AND tgt.file.path contains 'temp' AND tgt.file.isExecutable = 'true' AND src.process.verifiedStatus != 'verified' AND src.process.publisher != 'MICROSOFT WINDOWS' + + $label$ + + + + + + + + Network Recon + + + | dataset account=xdr method=powerquery search="src.process.user = * (net_ipsubnet(dst.ip.address, '172.0.0.0/16') OR net_ipsubnet(dst.ip.address, '10.0.0.0/8')) NOT (net_ipsubnet(src.ip.address, '10.0.0.0/24')) +| group uniqueDestinations = estimate_distinct(dst.ip.address), uniquePorts = estimate_distinct(dst.port.number), fullPortList = array_sort(array_agg_distinct(dst.port.number)), dstList = array_sort(array_agg_distinct(dst.ip.address)) by src.ip.address, src.process.user +| filter (uniqueDestinations > 5) AND (uniquePorts > 5) +| let topPortList = array_slice(fullPortList, 0, 20) +| columns src.ip.address, src.process.user, uniqueDestinations, uniquePorts, dstList, fullPortList, topPortList" +| spath +| table src.ip.address, src.process.user, uniqueDestinations, uniquePorts, dstList, fullPortList, topPortList + -15m + now + + + + https://xdr.us1.sentinelone.net/query?filter=src.process.user+%3D+*+%28net_ipsubnet%28dst.ip.address%2C+%27172.0.0.0%2F16%27%29+OR+net_ipsubnet%28dst.ip.address%2C+%2710.0.0.0%2F8%27%29%29+NOT+%28net_ipsubnet%28src.ip.address%2C+%2710.0.0.0%2F24%27%29%29%0A%7C+group+uniqueDestinations+%3D+estimate_distinct%28dst.ip.address%29%2C+uniquePorts+%3D+estimate_distinct%28dst.port.number%29%2C+fullPortList+%3D+array_sort%28array_agg_distinct%28dst.port.number%29%29%2C+dstList+%3D+array_sort%28array_agg_distinct%28dst.ip.address%29%29+by+src.ip.address%2C+src.process.user%0A%7C+filter+%28uniqueDestinations+%3E+1%29+AND+%28uniquePorts+%3E+1%29%0A%7C+let+topPortList+%3D+array_slice%28fullPortList%2C+0%2C+20%29%0A%7C+columns+src.ip.address%2C+src.process.user%2C+uniqueDestinations%2C+uniquePorts%2C+dstList%2C+fullPortList%2C+topPortList%0A%2F%2F+src+user+is+not+null%2C+src+and+dst+IPs+are+within+defined+subnets%0A%2F%2F+get+distinct+count+of+dst+IPs+and+ports%2C+plus+arrays+of+dst+IPs+%28sorted%29+and+ports+for+each+src+IP+and+user%0A%2F%2F+filter+unique+destinations+and+ports+to+a+high+number%2C+in+this+case+a+static+number+of+1+for+testing%0A%2F%2F+create+a+2nd+shorter+array+of+ports+only+showing+the+first+20%0A%2F%2F+order+columns&teamEmails=-&view=xdr&startTime=10+min + +
    +
    +
    + + + Impossible Traveler + + + | dataset account=xdr method=powerquery search="//src.process.user = * src.ip.address = * +//| group first_ip = oldest(src.ip.address), last_ip = newest(src.ip.address) by src.process.user +| limit 1 +| let src.process.user = 'Matt Balcer', first_ip='87.203.45.78', last_ip='98.24.6.8' +| let first_location = geo_ip_location(first_ip), last_location = geo_ip_location(last_ip), first_country=geo_ip_country(first_ip), last_country=geo_ip_country(last_ip) +| let kilometers = geo_distance(first_location, last_location) +| let hours=(queryend() - querystart())/1000000000/60/60 +| let speed = kilometers / hours" +| spath +| table src.process.user, first_ip, last_ip, first_country, first_location, last_country, last_location, kilometers, speed + -24h@h + now + + + + + + + + + + + + + https://xdr.us1.sentinelone.net/query?view=edr&filter=%2F%2Fsrc.process.user+%3D+*+src.ip.address+%3D+*%0A%2F%2F%7C+group+first_ip+%3D+oldest%28src.ip.address%29%2C+last_ip+%3D+newest%28src.ip.address%29+by+src.process.user%0A%7C+limit+1%0A%7C+let+src.process.user+%3D+%27Matt+Balcer%27%2C+first_ip%3D%2787.203.45.78%27%2C+last_ip%3D%2798.24.6.8%27%0A%7C+let+first_location+%3D+geo_ip_location%28first_ip%29%2C+last_location+%3D+geo_ip_location%28last_ip%29%0A%7C+let+kilometers+%3D+geo_distance%28first_location%2C+last_location%29%0A%7C+let+hours%3D%28queryend%28%29+-+querystart%28%29%29%2F1000000000%2F60%2F60%0A%7C+let+speed+%3D+kilometers+%2F+hours%0A%7C+filter+speed+%3E+500%0A%2F%2F+logic%3A+get+IP+addresses%2C+get+geo+locations%2C+determine+distance%2C+convert+timestamps+from+nano+epoch+to+delta+in+hours%2C+then+filter+to+speed+%3E+500km%0A%2F%2F+usage%3A+for+real+use%2C+uncomment+lines+1%2C2+and+remove+lines+3%2C4&startTime=4+hours + +
    +
    +
    + + + Network Traffic Off-Hours + + + | dataset account=xdr method=powerquery search="bytes.sent = * +| let time_hour = number(strftime(timestamp, '%H')) +| group bytes=sum(bytes.sent), hour = oldest(time_hour) by timebucket('1h') +| let gb = (bytes/1024/1024/1024) +| filter gb > 0 AND (hour <= 9 OR hour >= 18) +// logic: get numeric 2-digit hour from timestamp, group bytes per hour, convert to gb, then filter to volume and hours of day +// usage: change the last filter line to anomalous gb traffic and adjust to business hours" +| spath +| table hour, gb + -24h@h + now + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + https://xdr.us1.sentinelone.net/query?filter=bytes.sent+%3D+*%0A%7C+let+time_hour+%3D+number%28strftime%28timestamp%2C+%27%25H%27%29%29%0A%7C+group+bytes%3Dsum%28bytes.sent%29%2C+hour+%3D+oldest%28time_hour%29+by+timebucket%28%271h%27%29%0A%7C+let+gb+%3D+%28bytes%2F1024%2F1024%2F1024%29%0A%7C+filter+gb+%3E+0+AND+%28hour+%3C%3D+9+OR+hour+%3E%3D+18%29%0A%2F%2F+logic%3A+get+numeric+2-digit+hour+from+timestamp%2C+group+bytes+per+hour%2C+convert+to+gb%2C+then+filter+to+volume+and+hours+of+day%0A%2F%2F+usage%3A+change+the+last+filter+line+to+anomalous+gb+traffic+and+adjust+to+business+hours&startTime=24+hours&view=xdr + + + + + \ No newline at end of file diff --git a/Splunk Dashboards/splunk_app_usage.xml b/Splunk Dashboards/splunk_app_usage.xml index 221ff218..347e9088 100644 --- a/Splunk Dashboards/splunk_app_usage.xml +++ b/Splunk Dashboards/splunk_app_usage.xml @@ -6,11 +6,11 @@ index=_internal sourcetype=splunk_web_access host=* user=* -| rex field=uri_path ".*/(?<title>[^/]*)$" +| rex field=uri_path ".*/(?<title>[^/]*)$" | join title [| rest /servicesNS/-/-/data/ui/views splunk_server=* -| search isDashboard=1 isVisible=1 -| rename eai:acl.app as app +| search isDashboard=1 isVisible=1 +| rename eai:acl.app as app | fields title app ] | rename title as dashboard | search NOT app IN($myExcludedApps$) @@ -20,14 +20,14 @@ index=_internal sourcetype=splunk_web_access host=* user=* $myTime.latest$ - | rest /servicesNS/-/-/data/ui/views + | rest /servicesNS/-/-/data/ui/views ``` get fields we want from all the dashboards for all the apps ``` -| fields eai:acl.app label id eai:data +| fields eai:acl.app label id eai:data | rename eai:acl.app as app_name, eai:data as xml_source, label as dashboard_title ``` now separate all the panels to individual events (rows) ``` -| rex field=id "http(s)?://([^/]+/)+(?<file_name>[^/]+)" +| rex field=id "http(s)?://([^/]+/)+(?<file_name>[^/]+)" | spath input=xml_source output=panel path=form.row.panel | mvexpand panel | fields app_name dashboard_title file_name panel xml_source @@ -37,14 +37,14 @@ index=_internal sourcetype=splunk_web_access host=* user=* | spath input=panel output=panel_title path=title | xpath field=panel outfield=query "//*/search/query" | xpath field=panel outfield=drilldown "//*/drilldown/*/link" -| rex field=panel "\s+\<(?<panel_type>[^\<]+)\>" +| rex field=panel "\s+\<(?<panel_type>[^\<]+)\>" ``` filter for the types of panels that typically have searches ``` | search panel_type IN ("chart","single","table","event","search","viz","map") | search NOT app_name IN($myExcludedApps$) ``` total, sort and print the results ``` -| eventstats count as total_panels, dc(dashboard_title) as total_dashboards +| eventstats count as total_panels, dc(dashboard_title) as total_dashboards | eventstats dc(dashboard_title) as dashboards_in_app by app_name | eventstats count as panels_on_dashboard by app_name, dashboard_title | sort app_name file_name dashboard_title panel_title @@ -209,7 +209,7 @@ index=_internal sourcetype=splunk_web_access host=* user=* Splunk Usage - What are you're users accessing most often? + What are you're users accessing most oftern? @@ -218,12 +218,12 @@ index=_internal sourcetype=splunk_web_access host=* user=* Most Used SPL Commands - index=_audit action=search info=completed search=* NOT "search_id='scheduler" NOT "search=|history" NOT "user=splunk-system-user" NOT "search='typeahead" NOT "search='| metadata type=* | search totalCount>0" app="*" -| fields search -| rex field=search "\|\s*(?<command>\w+)\s(?<attributes>[^|]*)" max_match=1000 -| mvexpand command + index=_audit action=search info=completed search=* NOT "search_id='scheduler" NOT "search=|history" NOT "user=splunk-system-user" NOT "search='typeahead" NOT "search='| metadata type=* | search totalCount>0" app="*" +| fields search +| rex field=search "\|\s*(?<command>\w+)\s(?<attributes>[^|]*)" max_match=1000 +| mvexpand command | search command!="" -| stats count as "execution", distinct_count(search) as "distinct_searches" by command +| stats count as "execution", distinct_count(search) as "distinct_searches" by command | sort -distinct_search, -execution | rename command as "SPL Command" $myTime.earliest$ @@ -332,9 +332,9 @@ index=_internal sourcetype=splunk_web_access host=* user=* | search NOT app_name IN(Splunk_Security_Essentials,lookup_editor,splunk_monitoring_console,splunk_secure_gateway,splunk_instrumentation,Splunk_SA_CIM) ``` total, sort and print the results ``` -| eventstats count as total_searches +| eventstats count as total_searches | eventstats dc(searches) as searches_in_app by app_name -| sort app_name title +| sort app_name title | table app_name title search viz cron_schedule alert_type alert_comparator alert_threshold alert_condition | search app_name = "*" search="*" | dedup app_name, title, search @@ -347,4 +347,4 @@ index=_internal sourcetype=splunk_web_access host=* user=*
    - + \ No newline at end of file diff --git a/TA_dataset/default/data/ui/nav/default.xml b/TA_dataset/default/data/ui/nav/default.xml index 35cfb780..21af67a7 100644 --- a/TA_dataset/default/data/ui/nav/default.xml +++ b/TA_dataset/default/data/ui/nav/default.xml @@ -19,4 +19,12 @@ + + + + + + + + diff --git a/TA_dataset/default/data/ui/views/dataset_by_example.xml b/TA_dataset/default/data/ui/views/dataset_by_example.xml index 057416ed..c5343bb8 100644 --- a/TA_dataset/default/data/ui/views/dataset_by_example.xml +++ b/TA_dataset/default/data/ui/views/dataset_by_example.xml @@ -1,287 +1,266 @@
    - - - maxcount=10 - - | group count=count() by tag" | spath | table tag count - | spath | rename value as tag | table tag count - | spath | timechart values(rate) as rate - | spath | stats count by attributes.status - -
    - - - Show Connection Test Results - 1 - - - "1" - - - - - - - -
    - - - Searching Your Data in DataSet - - This dashboard will help get you started on your journey. The first thing you'll want to do after - configuring your DataSet Read API Key is to run a simple test to make sure you can access Dataset. - - - - - - - API Connection Test - - SPL: | dataset maxcount=5 - - | dataset maxcount=6 - | spath - - $myTime.earliest$ - $myTime.latest$ - - - - -
    -
    -
    - - - - If you see data, your API Keys are working! - - - - - - -

    Now let's talk about executing queries against DataSet.

    - The first thing you need to know is that this Add-On provides four methods to query DataSet: -
      -
    1. - Base Data Query - - This will return the raw event data (fast, but very verbose). This type of query returns all - evetn data so be mindful of the amount of data pushed across the wire and held in memory. -
    2. -
    3. - PowerQuery - - This will aggregrate data by any supported operator (sum, count, average, etc.) and return the - summary level statistics (faster). -
    4. -
    5. - Facet Query - - This will summarize data by a specific field and return the summary level statistics for the - most common values of the field (fastest). -
    6. -
    7. - Timeseries Query - - This will calculate numeric values over time. For repeated queries, summaries allow - precomputed results (fastest). -
    8. -
    - Depending on your use case, you may have a need for any or all of these. -

    Let's get started searching!

    - -
    -
    - - - 1. Base Data Query: Get the raw events - - - - -4h@m - now - - - - - 10 - 50 - 1000 - 5000 - maxcount=10 - maxcount=10 - - - - All - serverHost - serverHost - - | dataset method=facet field=serverHost search="serverHost=* " - | spath - | rename value as serverHost - | table serverHost count - | sort serverHost - - $myTime.earliest$ - $myTime.latest$ - - - - serverHost=* - tag - - - serverHost='$value$' - tag - - - All - All - - - - $baseQuery$ - - - - - - - - - - SPL: | dataset method=query search="$baseQuery$" $myMaxCount$ | spath - - | dataset method=query search="$baseQuery$" $myMaxCount$ - | spath - - $myTime.earliest$ - $myTime.latest$ - 1 - - - - -
    -
    -
    - - - Now let's select a field to aggregate statistics on in DataSet. (This is exponentially better - performance than returning all data and using SPL to summarize.) - - - - tag - status - severity - description - Application - tag - tag - - - | group count=count() by $value$" | spath | table $value$ count - - | spath | rename value as $value$ | table $value$ count - - - - - - - - 2. PowerQuery: Aggregate in DataSet and display in Splunk! - - - $basePowerQuery$ - - - - - - - SPL: | dataset method=powerquery search="$baseQuery$ $basePowerQuery$ - - | dataset method=powerquery search="$baseQuery$ $basePowerQuery$ - - $myTime.earliest$ - $myTime.latest$ - - - - - - - - - - 3. Facet Query: Aggregate in DataSet, Facet by a specific field and display in Splunk! - - - $baseFacetQuery$ - - - - - - - - SPL: | dataset method=facet field=$myTag$ search="$baseQuery$" $baseFacetQuery$ - - | dataset method=facet field=$myTag$ search="$baseQuery$" $baseFacetQuery$ - - $myTime.earliest$ - $myTime.latest$ - - - - - - - - - - 4. Timeseries Query: This will calculate numeric values over time. - - - $baseTimeseriesQuery$ - - - - - - - - SPL: | dataset method=timeseries search="$baseQuery$" function="rate" buckets=24 - createsummaries=false onlyusesummaries=false $baseTimeseriesQuery$ - - - | dataset method=timeseries search="$baseQuery$" function="rate" buckets=24 - createsummaries=false onlyusesummaries=false $baseTimeseriesQuery$ - - $myTime.earliest$ - $myTime.latest$ - - - - - - - - - -
    + + + maxcount=10 + + | group count=count() by tag" | spath | table tag count + | spath | rename value as tag | table tag count + | spath | timechart values(rate) as rate + | spath | stats count by attributes.status + +
    + + + Show Connection Test Results + + + "1" + + + + + + + +
    + + + + + + + + + + Searching Your Data in DataSet + + This dashboard will help get you started on your journey. The first thing you'll want to do after configuring your DataSet Read API Key is to run a simple test to make sure you can access Dataset. + + + + + + API Connection Test + + SPL: | dataset maxcount=5 + + | dataset maxcount=6 +| spath + $myTime.earliest$ + $myTime.latest$ + + + + +
    +
    +
    + + + + If you see data, your API Keys are working! + + + + + + +

    Now let's talk about executing queries against DataSet.

    + The first thing you need to know is that this Add-On provides four methods to query DataSet: +
      +
    1. + Base Data Query - This will return the raw event data (fast, but very verbose). This type of query returns all evetn data so be mindful of the amount of data pushed across the wire and held in memory.
    2. +
    3. + PowerQuery - This will aggregrate data by any supported operator (sum, count, average, etc.) and return the summary level statistics (faster).
    4. +
    5. + Facet Query - This will summarize data by a specific field and return the summary level statistics for the most common values of the field (fastest).
    6. +
    7. + Timeseries Query - This will calculate numeric values over time. For repeated queries, summaries allow precomputed results (fastest).
    8. +
    + Depending on your use case, you may have a need for any or all of these. +

    Let's get started searching!

    + +
    +
    + + + 1. Base Data Query: Get the raw events + + + + -4h@m + now + + + + + 10 + 50 + 1000 + 5000 + maxcount=10 + maxcount=10 + + + + All + serverHost + serverHost + + | dataset method=facet field=serverHost search="serverHost=* " +| spath +| rename value as serverHost +| table serverHost count +| sort serverHost + $myTime.earliest$ + $myTime.latest$ + + + + serverHost=* + tag + + + serverHost='$value$' + tag + + + All + All + + + + $baseQuery$ + + + + + + + SPL: | dataset method=query search="$baseQuery$" $myMaxCount$ | spath + + | dataset method=query search="$baseQuery$" $myMaxCount$ +| spath + $myTime.earliest$ + $myTime.latest$ + 1 + + + + +
    +
    +
    + + + Now let's select a field to aggregate statistics on in DataSet. (This is exponentially better performance than returning all data and using SPL to summarize.) + + + tag + status + severity + description + Application + tag + tag + + + | group count=count() by $value$" | spath | table $value$ count + | spath | rename value as $value$ | table $value$ count + + + + + + + + 2. PowerQuery: Aggregate in DataSet and display in Splunk! + + + $basePowerQuery$ + + + + + + + SPL: | dataset method=powerquery search="$baseQuery$ $basePowerQuery$ + + | dataset method=powerquery search="$baseQuery$ $basePowerQuery$ + + $myTime.earliest$ + $myTime.latest$ + + + + + + + + + + 3. Facet Query: Aggregate in DataSet, Facet by a specific field and display in Splunk! + + + $baseFacetQuery$ + + + + + + + + SPL: | dataset method=facet field=$myTag$ search="$baseQuery$" $baseFacetQuery$ + + | dataset method=facet field=$myTag$ search="$baseQuery$" $baseFacetQuery$ + + $myTime.earliest$ + $myTime.latest$ + + + + + + + + + + 4. Timeseries Query: This will calculate numeric values over time. + + + $baseTimeseriesQuery$ + + + + + + + + SPL: | dataset method=timeseries search="$baseQuery$" function="rate" buckets=24 createsummaries=false onlyusesummaries=false $baseTimeseriesQuery$ + + | dataset method=timeseries search="$baseQuery$" function="rate" buckets=24 createsummaries=false onlyusesummaries=false $baseTimeseriesQuery$ + $myTime.earliest$ + $myTime.latest$ + + + + + + + + + + \ No newline at end of file diff --git a/TA_dataset/default/data/ui/views/ingestion_summary.xml b/TA_dataset/default/data/ui/views/ingestion_summary.xml new file mode 100644 index 00000000..e70e3339 --- /dev/null +++ b/TA_dataset/default/data/ui/views/ingestion_summary.xml @@ -0,0 +1,79 @@ +
    + + This dashboard is provided to estimate daily ingestion for various sources of data in Splunk. + + index="_internal" source="*metrics.log" group="per_sourcetype_thruput" +| bucket _time span=1d +| eval GB=kb/1024/1024 +| stats sum(GB) as "GB Ingest" avg(GB) as "Average GB" max(GB) as "Max GB" avg(eps) as "Events per Second" by _time, series +| stats sum("GB Ingest") as "Total Ingest(GB)", avg("GB Ingest") as "Daily Avg Ingest(GB)", max("GB Ingest") as "Daily Max Ingest(GB)" by series +| eval "Total Ingest(GB)"=round('Total Ingest(GB)',4), "Daily Avg Ingest(GB)"=round('Daily Avg Ingest(GB)',4), "Daily Max Ingest(GB)"=round('Daily Max Ingest(GB)',4), "Events per Second"=round('Events per Second',4) +| rename series as sourcetype +| sort -"Total Ingest(GB)" + $myTime.earliest$ + $myTime.latest$ + 1 + +
    + + + + -7d@h + now + + +
    + + + + + + + + + + Ingestion by sourcetype + + + + $myTime.earliest$ + $myTime.latest$ + 1 + + + + + + + + + + + + + $myTime.earliest$ + $myTime.latest$ + 1 + + + + + + + + + +
    +
    +
    +
    \ No newline at end of file diff --git a/TA_dataset/default/data/ui/views/sentinelone_use_case_query_examples.xml b/TA_dataset/default/data/ui/views/sentinelone_use_case_query_examples.xml new file mode 100644 index 00000000..71232311 --- /dev/null +++ b/TA_dataset/default/data/ui/views/sentinelone_use_case_query_examples.xml @@ -0,0 +1,88 @@ +
    + +
    + + + Notes + + This dashboard uses example searches from SentinelOne at https://support.sentinelone.com/hc/en-us/articles/360057861574-Use-Case-Query-Example + + + + + + Queries + + + + -4h@m + now + + + + + A non-Windows process writes files to the temp directory + Rundll or Regsvr executes a script + Bat or cmd files are dropped directly to a temp folder + A non-Windows process injects to a Windows process + LOLBins command processors masquerade under a different name and path + Rundll or Regsvr run content from a remote server + Suspicious Powershell with base64 in the commandline + New unsigned DLL is dropped in the Windows directory (possible DLL hijack attempt) + NTDS Copy + Removal of indicators on Host + Suspicious data compression + Allow SMB and RDP on Defender Firewall + Unmanaged Powershell + Signed Binary Proxy Execution: mshta + Signed Binary Proxy Execution: regsvr32 + Signed Binary Proxy Execution: Rundll32 + Powershell Unnecessary Escaping + Signed Binary Proxy Execution: CMSTP + DHCP CalloutDLL + os.name matches '^Windows' AND event.category = 'file' AND tgt.file.path contains 'temp' AND tgt.file.isExecutable = 'true' AND src.process.verifiedStatus != 'verified' AND src.process.publisher != 'MICROSOFT WINDOWS' + os.name matches '^Windows' AND event.category = 'file' AND tgt.file.path contains 'temp' AND tgt.file.isExecutable = 'true' AND src.process.verifiedStatus != 'verified' AND src.process.publisher != 'MICROSOFT WINDOWS' + + $label$ + + + + + + + + + + + + Use Case: $useCase$
    + SentinelOne Search: $baseQuery$ + +
    +
    + + + + SPL: | dataset method=powerquery search="$baseQuery$ | columns endpoint.name | group count=count() by endpoint.name" | spath | table endpoint.name, count + + | dataset account=xdr method=powerquery search="$baseQuery$ | columns endpoint.name | group count=count() by endpoint.name" | spath | table endpoint.name, count + $myTime.earliest$ + $myTime.latest$ + 1 + + + +
    +
    +
    +
    \ No newline at end of file diff --git a/TA_dataset/default/data/ui/views/soc_search_examples.xml b/TA_dataset/default/data/ui/views/soc_search_examples.xml new file mode 100644 index 00000000..f8891e4b --- /dev/null +++ b/TA_dataset/default/data/ui/views/soc_search_examples.xml @@ -0,0 +1,131 @@ + + + + + + + + + + + + Network Recon + + + | dataset account=xdr method=powerquery search="src.process.user = * (net_ipsubnet(dst.ip.address, '172.0.0.0/16') OR net_ipsubnet(dst.ip.address, '10.0.0.0/8')) NOT (net_ipsubnet(src.ip.address, '10.0.0.0/24')) +| group uniqueDestinations = estimate_distinct(dst.ip.address), uniquePorts = estimate_distinct(dst.port.number), fullPortList = array_sort(array_agg_distinct(dst.port.number)), dstList = array_sort(array_agg_distinct(dst.ip.address)) by src.ip.address, src.process.user +| filter (uniqueDestinations > 5) AND (uniquePorts > 5) +| let topPortList = array_slice(fullPortList, 0, 20) +| columns src.ip.address, src.process.user, uniqueDestinations, uniquePorts, dstList, fullPortList, topPortList" +| spath +| table src.ip.address, src.process.user, uniqueDestinations, uniquePorts, dstList, fullPortList, topPortList + -15m + now + + + + https://xdr.us1.sentinelone.net/query?filter=src.process.user+%3D+*+%28net_ipsubnet%28dst.ip.address%2C+%27172.0.0.0%2F16%27%29+OR+net_ipsubnet%28dst.ip.address%2C+%2710.0.0.0%2F8%27%29%29+NOT+%28net_ipsubnet%28src.ip.address%2C+%2710.0.0.0%2F24%27%29%29%0A%7C+group+uniqueDestinations+%3D+estimate_distinct%28dst.ip.address%29%2C+uniquePorts+%3D+estimate_distinct%28dst.port.number%29%2C+fullPortList+%3D+array_sort%28array_agg_distinct%28dst.port.number%29%29%2C+dstList+%3D+array_sort%28array_agg_distinct%28dst.ip.address%29%29+by+src.ip.address%2C+src.process.user%0A%7C+filter+%28uniqueDestinations+%3E+1%29+AND+%28uniquePorts+%3E+1%29%0A%7C+let+topPortList+%3D+array_slice%28fullPortList%2C+0%2C+20%29%0A%7C+columns+src.ip.address%2C+src.process.user%2C+uniqueDestinations%2C+uniquePorts%2C+dstList%2C+fullPortList%2C+topPortList%0A%2F%2F+src+user+is+not+null%2C+src+and+dst+IPs+are+within+defined+subnets%0A%2F%2F+get+distinct+count+of+dst+IPs+and+ports%2C+plus+arrays+of+dst+IPs+%28sorted%29+and+ports+for+each+src+IP+and+user%0A%2F%2F+filter+unique+destinations+and+ports+to+a+high+number%2C+in+this+case+a+static+number+of+1+for+testing%0A%2F%2F+create+a+2nd+shorter+array+of+ports+only+showing+the+first+20%0A%2F%2F+order+columns&teamEmails=-&view=xdr&startTime=10+min + +
    +
    +
    + + + Impossible Traveler + + + | dataset account=xdr method=powerquery search="//src.process.user = * src.ip.address = * +//| group first_ip = oldest(src.ip.address), last_ip = newest(src.ip.address) by src.process.user +| limit 1 +| let src.process.user = 'Matt Balcer', first_ip='87.203.45.78', last_ip='98.24.6.8' +| let first_location = geo_ip_location(first_ip), last_location = geo_ip_location(last_ip), first_country=geo_ip_country(first_ip), last_country=geo_ip_country(last_ip) +| let kilometers = geo_distance(first_location, last_location) +| let hours=(queryend() - querystart())/1000000000/60/60 +| let speed = kilometers / hours" +| spath +| table src.process.user, first_ip, last_ip, first_country, first_location, last_country, last_location, kilometers, speed + -24h@h + now + + + + + + + + + + + + + https://xdr.us1.sentinelone.net/query?view=edr&filter=%2F%2Fsrc.process.user+%3D+*+src.ip.address+%3D+*%0A%2F%2F%7C+group+first_ip+%3D+oldest%28src.ip.address%29%2C+last_ip+%3D+newest%28src.ip.address%29+by+src.process.user%0A%7C+limit+1%0A%7C+let+src.process.user+%3D+%27Matt+Balcer%27%2C+first_ip%3D%2787.203.45.78%27%2C+last_ip%3D%2798.24.6.8%27%0A%7C+let+first_location+%3D+geo_ip_location%28first_ip%29%2C+last_location+%3D+geo_ip_location%28last_ip%29%0A%7C+let+kilometers+%3D+geo_distance%28first_location%2C+last_location%29%0A%7C+let+hours%3D%28queryend%28%29+-+querystart%28%29%29%2F1000000000%2F60%2F60%0A%7C+let+speed+%3D+kilometers+%2F+hours%0A%7C+filter+speed+%3E+500%0A%2F%2F+logic%3A+get+IP+addresses%2C+get+geo+locations%2C+determine+distance%2C+convert+timestamps+from+nano+epoch+to+delta+in+hours%2C+then+filter+to+speed+%3E+500km%0A%2F%2F+usage%3A+for+real+use%2C+uncomment+lines+1%2C2+and+remove+lines+3%2C4&startTime=4+hours + +
    +
    +
    + + + Network Traffic Off-Hours + + + | dataset account=xdr method=powerquery search="bytes.sent = * +| let time_hour = number(strftime(timestamp, '%H')) +| group bytes=sum(bytes.sent), hour = oldest(time_hour) by timebucket('1h') +| let gb = (bytes/1024/1024/1024) +| filter gb > 0 AND (hour <= 9 OR hour >= 18) +// logic: get numeric 2-digit hour from timestamp, group bytes per hour, convert to gb, then filter to volume and hours of day +// usage: change the last filter line to anomalous gb traffic and adjust to business hours" +| spath +| table hour, gb + -24h@h + now + 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + https://xdr.us1.sentinelone.net/query?filter=bytes.sent+%3D+*%0A%7C+let+time_hour+%3D+number%28strftime%28timestamp%2C+%27%25H%27%29%29%0A%7C+group+bytes%3Dsum%28bytes.sent%29%2C+hour+%3D+oldest%28time_hour%29+by+timebucket%28%271h%27%29%0A%7C+let+gb+%3D+%28bytes%2F1024%2F1024%2F1024%29%0A%7C+filter+gb+%3E+0+AND+%28hour+%3C%3D+9+OR+hour+%3E%3D+18%29%0A%2F%2F+logic%3A+get+numeric+2-digit+hour+from+timestamp%2C+group+bytes+per+hour%2C+convert+to+gb%2C+then+filter+to+volume+and+hours+of+day%0A%2F%2F+usage%3A+change+the+last+filter+line+to+anomalous+gb+traffic+and+adjust+to+business+hours&startTime=24+hours&view=xdr + + + + +
    \ No newline at end of file diff --git a/TA_dataset/default/data/ui/views/splunk_app_usage.xml b/TA_dataset/default/data/ui/views/splunk_app_usage.xml new file mode 100644 index 00000000..347e9088 --- /dev/null +++ b/TA_dataset/default/data/ui/views/splunk_app_usage.xml @@ -0,0 +1,350 @@ +
    + + + InfoSec_App_for_Splunk,Splunk_Security_Essentials,lookup_editor,splunk_monitoring_console,splunk_secure_gateway,splunk_instrumentation,Splunk_SA_CIM + + + +index=_internal sourcetype=splunk_web_access host=* user=* +| rex field=uri_path ".*/(?<title>[^/]*)$" +| join title +[| rest /servicesNS/-/-/data/ui/views splunk_server=* +| search isDashboard=1 isVisible=1 +| rename eai:acl.app as app +| fields title app ] +| rename title as dashboard +| search NOT app IN($myExcludedApps$) +| stats count by _time user app dashboard host + + $myTime.earliest$ + $myTime.latest$ + + + | rest /servicesNS/-/-/data/ui/views + +``` get fields we want from all the dashboards for all the apps ``` +| fields eai:acl.app label id eai:data +| rename eai:acl.app as app_name, eai:data as xml_source, label as dashboard_title + +``` now separate all the panels to individual events (rows) ``` +| rex field=id "http(s)?://([^/]+/)+(?<file_name>[^/]+)" +| spath input=xml_source output=panel path=form.row.panel +| mvexpand panel +| fields app_name dashboard_title file_name panel xml_source +| search panel != "" + +``` now identify the chart types, searches and drilldown searches for each panel ``` +| spath input=panel output=panel_title path=title +| xpath field=panel outfield=query "//*/search/query" +| xpath field=panel outfield=drilldown "//*/drilldown/*/link" +| rex field=panel "\s+\<(?<panel_type>[^\<]+)\>" + +``` filter for the types of panels that typically have searches ``` +| search panel_type IN ("chart","single","table","event","search","viz","map") +| search NOT app_name IN($myExcludedApps$) + +``` total, sort and print the results ``` +| eventstats count as total_panels, dc(dashboard_title) as total_dashboards +| eventstats dc(dashboard_title) as dashboards_in_app by app_name +| eventstats count as panels_on_dashboard by app_name, dashboard_title +| sort app_name file_name dashboard_title panel_title +| table app_name dashboards_in_app file_name dashboard_title panels_on_dashboard panel_title panel_type query drilldown + $myTime.earliest$ + $myTime.latest$ + +
    + + + + -24h@h + now + + + + + All + app_name + app_name + + | stats count by app_name + + * + * + + + * + + + + + + app_name + app_name + + | stats count by app_name + + InfoSec_App_for_Splunk,Splunk_Security_Essentials,lookup_editor,splunk_monitoring_console,splunk_secure_gateway,splunk_instrumentation,Splunk_SA_CIM + InfoSec_App_for_Splunk,Splunk_Security_Essentials,lookup_editor,splunk_monitoring_console,splunk_secure_gateway,splunk_instrumentation,Splunk_SA_CIM + Splunk_SA_CIM + splunk_instrumentation + , + + + + All + dashboard_title + file_name + + | search app_name = "$myApp$" | stats count by file_name, dashboard_title + + * + * + +
    + + + + + + + + + + Applications + + + | search app_name = "$myApp$" file_name="$myDashboard$" +| stats dc(app_name) + + + + + + + + Interesting Dashboards + + + | search app_name = "$myApp$" file_name="$myDashboard$" +| stats dc(dashboard_title) + + + + + + + + Interesting Panels + + + | search app_name = "$myApp$" file_name="$myDashboard$" +| stats count + + + + + + + + + + Applications + + + | search app_name = "$myApp$" +| dedup app_name +| table app_name + + + + + * + $row.app_name$ + +
    +
    + + Interesting Dashboards + + + | search app_name = "$myApp$" file_name="$myDashboard$" +| dedup app_name, file_name dashboard_title +| table app_name file_name dashboard_title + + + + + $row.file_name$ + $row.app_name$ + +
    +
    + + Interesting Panels + + + | search app_name = "$myApp$" file_name="$myDashboard$" +| dedup panel_title +| table panel_title + + + +
    +
    +
    + + + Splunk Usage + + What are you're users accessing most oftern? + + + + + + Most Used SPL Commands + + + index=_audit action=search info=completed search=* NOT "search_id='scheduler" NOT "search=|history" NOT "user=splunk-system-user" NOT "search='typeahead" NOT "search='| metadata type=* | search totalCount>0" app="*" +| fields search +| rex field=search "\|\s*(?<command>\w+)\s(?<attributes>[^|]*)" max_match=1000 +| mvexpand command +| search command!="" +| stats count as "execution", distinct_count(search) as "distinct_searches" by command +| sort -distinct_search, -execution +| rename command as "SPL Command" + $myTime.earliest$ + $myTime.latest$ + + +
    +
    + + Most Viewed Dashboards + + + | search app = "$myApp$" dashboard = "$myDashboard$" +| stats count as Views dc(user) as Users by app, dashboard +| sort -Views + + + + $row.dashboard$ + $row.app$ + +
    +
    + + Most Active Users + + + | stats count by user +| sort -count + $myTime.earliest$ + $myTime.latest$ + + +
    +
    +
    + + + Individual Usage by User + + + | search app = "$myApp$" dashboard = "$myDashboard$" + + + + + + + + + +
    +
    +
    + + + Splunk Searches + + What are the underlying Splunk Searches for the panels on the selected dashboards? + + + + + + Dashboard Panels and Searches + + + | search app_name = "$myApp$" file_name="$myDashboard$" + + + +
    +
    +
    + + + Macros + + + | rest /servicesNS/-/-/data/macros +| fields eai:acl.app title definition args +| rename eai:acl.app as app_name, title as macro_name +| search definition != "()" +| search app_name = "$myApp$" (NOT app_name IN ($myExcludedApps$)) +| table app_name macro_name args definition + $myTime.earliest$ + $myTime.latest$ + + + +
    +
    +
    + + + Saved Searches and Alerts + + + +| rest /servicesNS/-/-/saved/searches + +``` get fields we want from all the dashboards for all the apps ``` +| fields eai:acl.app eai:data title search display.visualizations.type cron_schedule alert_type alert_comparator alert_threshold alert_condition +| rename eai:acl.app as app_name, display.visualizations.type as viz + +| search NOT app_name IN(Splunk_Security_Essentials,lookup_editor,splunk_monitoring_console,splunk_secure_gateway,splunk_instrumentation,Splunk_SA_CIM) + +``` total, sort and print the results ``` +| eventstats count as total_searches +| eventstats dc(searches) as searches_in_app by app_name +| sort app_name title +| table app_name title search viz cron_schedule alert_type alert_comparator alert_threshold alert_condition +| search app_name = "*" search="*" +| dedup app_name, title, search + + $myTime.earliest$ + $myTime.latest$ + + + +
    +
    +
    +
    \ No newline at end of file