From 5f6beba9cc18dc0a9290c4342961c4ade1c65ac3 Mon Sep 17 00:00:00 2001
From: Krzysztof Kiewicz <krzysztof@quesma.com>
Date: Wed, 12 Jun 2024 13:23:08 +0200
Subject: [PATCH] Cleanup

---
 bin/save_test.py                            | 14 ++--
 docker/mitmproxy/test_saver.py              | 82 ++++++---------------
 quesma/queryparser/query_translator_test.go |  7 +-
 quesma/tests/end2end/e2e_test.go            | 79 +++++++++++++-------
 quesma/tests/end2end/http.go                | 45 -----------
 quesma/tests/end2end/http_request_parser.go | 14 +++-
 quesma/util/utils.go                        | 15 ++--
 7 files changed, 104 insertions(+), 152 deletions(-)
 delete mode 100644 quesma/tests/end2end/http.go

diff --git a/bin/save_test.py b/bin/save_test.py
index 261ac2189..716fbcdbb 100755
--- a/bin/save_test.py
+++ b/bin/save_test.py
@@ -3,6 +3,10 @@
 import os
 import shutil
 
+# Usage: usually just run this script without any arguments, it will save all requests
+# If you want to save only some requests, use flags: -f, -l, -s, -e
+# More info about flags: ./bin/save_test.py -h
+
 TESTS_SRC_DIR = "docker/mitmproxy/requests/"
 TESTS_DST_DIR = "quesma/tests/end2end/testcases/"
 
@@ -32,7 +36,7 @@ def save_test(test_nrs: list[str]):
         cur_dst_nr += 1
         copied_test_nrs += [int(cur_src_nr)]
 
-    print(f"Tests {copied_test_nrs} saved in {suite_dir}/")
+    print(f"Requests {copied_test_nrs} saved in {suite_dir}/")
 
 
 def get_requests_to_save(requests_available: list[int], args: dict[str, any]) -> list[str]:
@@ -66,14 +70,10 @@ def parse_arguments():
     return vars(ap.parse_args())
 
 
-def main():
+if __name__ == "__main__":
     args = parse_arguments()
     requests_available = sorted([int(file.name[:-5])
                                  for file in os.scandir(TESTS_SRC_DIR)
                                  if file.is_file() and file.name.endswith(".http") and file.name[:-5].isdigit()])
-    print("Available tests:", requests_available, "\n")
+    print("Available test requests:", requests_available, "\n")
     save_test(get_requests_to_save(requests_available, args))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/docker/mitmproxy/test_saver.py b/docker/mitmproxy/test_saver.py
index ad54577d5..f7c30a214 100644
--- a/docker/mitmproxy/test_saver.py
+++ b/docker/mitmproxy/test_saver.py
@@ -1,87 +1,48 @@
 import glob
 import os
 import json
-from mitmproxy import http
 from urllib.parse import ParseResult, urlparse
-from typing import BinaryIO
+from threading import Lock
 
 from mitmproxy import http
 from mitmproxy import io
-import query
 
-LOG_FILE_PREFIX = "/var/mitmproxy/requests/"
-MITM_FILE = os.path.join(LOG_FILE_PREFIX, "requests.mitm")
-TXT_FILE = os.path.join(LOG_FILE_PREFIX, "requests.txt")
+LOG_DIR = "/var/mitmproxy/requests/"
 
 
 class Writer:
-    def __init__(self, path: str) -> None:
-        self.f: BinaryIO = open(path, "ab")
-        self.w = io.FlowWriter(self.f)
-        self.req_nr = 1
-        for file in glob.glob(os.path.join(LOG_FILE_PREFIX, '*.http')):
+    def __init__(self) -> None:
+        self.w = io.FlowWriter()
+        self.req_nr = 0
+        self.lock = Lock() # only for self.req_nr
+        # clean requests on (re)start
+        for file in glob.glob(os.path.join(LOG_DIR, '*.http')):
             os.remove(file)
 
     def response(self, flow: http.HTTPFlow) -> None:
         self.w.add(flow)
 
-    def done(self):
-        self.f.close()
-
-
-writer = Writer(MITM_FILE)
-
 
-def parse_json_body(index_name, method, body, ofile):
-    try:
-        json_body = json.loads(body)
-        if 'query' in json_body:
-            query.parsed_query_json(index_name, method, json_body['query'])
-            query_body = json_body['query']
-            filter_only = ('bool' in query_body and 'filter' in query_body['bool'])
-            for field in ['must', 'must_not', 'should']:
-                if field in query_body:
-                    if len(query_body[field]) > 0:
-                        filter_only = False
-            if filter_only:
-                ofile.write(b"\n Query filter:\n")
-                query_body = query_body['bool']['filter']
-            else:
-                ofile.write(b"\n Query:\n")
+writer = Writer()
 
-            ofile.write(json.dumps(query_body, indent=2).encode())
-    except:
-        pass
 
+def record_request(flow: http.HTTPFlow) -> None:
+    with writer.lock:
+        writer.req_nr += 1
+        cur_req_nr = writer.req_nr
 
-def record_request(index_name, method, flow: http.HTTPFlow) -> None:
-    with open(os.path.join(LOG_FILE_PREFIX, str(writer.req_nr) + '.http'), "ab") as ofile:
-        writer.req_nr += 1 # TODO add atomic
+    with open(os.path.join(LOG_DIR, str(cur_req_nr) + '.http'), "ab") as ofile:
         url = urlparse(flow.request.url)
-        trimmed_url = ParseResult('', '', *url[2:]).geturl()
-        print(trimmed_url)
-        ofile.write((trimmed_url + "\n").encode())
+        trimmed_url_to_save = ParseResult('', '', *url[2:]).geturl() # save only e.g. /(index/)/_search
+        ofile.write((trimmed_url_to_save + "\n").encode())
 
         body = flow.request.content.decode('utf-8')
         body_json = json.loads(body)
         ofile.write(json.dumps(body_json, indent=4).encode())
-        # parse_json_body(index_name, method, body, ofile)
 
     writer.response(flow)
 
 
-def extract_index_name(parsed_url, method):
-    result = parsed_url.path[:-len(method)]
-    if result.startswith('/'):
-        result = result[1:]
-
-    result = result.replace('*', 'X')  # For convience, replace wildcard with X
-
-    if len(result) == 0:
-        return 'default'
-    return result
-
-
 def request(flow: http.HTTPFlow) -> None:
     parsed_url = urlparse(flow.request.url)
     url_path = parsed_url.path
@@ -89,8 +50,11 @@ def request(flow: http.HTTPFlow) -> None:
 
     for method in search_methods:
         if url_path.endswith(method):
-            index_name = extract_index_name(parsed_url, method)
-            print(index_name)
-            if len(index_name) > 0 and index_name[0] != ".":
-                record_request(index_name, method, flow)
+            # so far we skip requests with prefixes: . and /.
+            # maybe that's to be changed
+            if len(url_path) > 0 and url_path[0] == '.':
+                break
+            if len(url_path) > 1 and url_path[:2] == '/.':
+                break
+            record_request(flow)
             break
diff --git a/quesma/queryparser/query_translator_test.go b/quesma/queryparser/query_translator_test.go
index 2cd8644e2..17f77328c 100644
--- a/quesma/queryparser/query_translator_test.go
+++ b/quesma/queryparser/query_translator_test.go
@@ -3,10 +3,8 @@ package queryparser
 import (
 	"context"
 	"encoding/json"
-	"github.com/k0kubun/pp"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"math"
 	"mitmproxy/quesma/clickhouse"
 	"mitmproxy/quesma/concurrent"
 	"mitmproxy/quesma/model"
@@ -156,7 +154,7 @@ func TestMakeResponseSearchQuery(t *testing.T) {
 			)
 			ourResponse, err := ourResponseRaw.Marshal()
 			assert.NoError(t, err)
-			actualMinusExpected, expectedMinusActual, err := util.JsonDifference(string(ourResponse), args[i].elasticResponseJson, false, false, math.MaxInt)
+			actualMinusExpected, expectedMinusActual, err := util.JsonDifferenceWeak(string(ourResponse), args[i].elasticResponseJson)
 			if err != nil {
 				t.Error(err)
 			}
@@ -455,8 +453,7 @@ func TestMakeResponseAsyncSearchQuery(t *testing.T) {
 			ourResponseBuf, err2 := ourResponse.Marshal()
 			assert.NoError(t, err2)
 
-			actualMinusExpected, expectedMinusActual, err := util.JsonDifference(string(ourResponseBuf), args[i].elasticResponseJson, false, false, math.MaxInt)
-			pp.Println(actualMinusExpected, expectedMinusActual)
+			actualMinusExpected, expectedMinusActual, err := util.JsonDifferenceWeak(string(ourResponseBuf), args[i].elasticResponseJson)
 			assert.NoError(t, err)
 
 			acceptableDifference := []string{"sort", "_score", "_version"}
diff --git a/quesma/tests/end2end/e2e_test.go b/quesma/tests/end2end/e2e_test.go
index 1dc57ebf6..92a33bdc4 100644
--- a/quesma/tests/end2end/e2e_test.go
+++ b/quesma/tests/end2end/e2e_test.go
@@ -1,40 +1,36 @@
 package end2end
 
 import (
-	"fmt"
+	"bytes"
 	"github.com/k0kubun/pp"
+	"io"
 	"mitmproxy/quesma/util"
+	"net/http"
 	"testing"
 )
 
-const index = "kibana_sample_data_logs"
-
-type singleE2ETest struct {
-	name        string
-	requestBody string
-	urlSuffix   string // without "http://name:port", so /index-pattern/...
-}
-
-var httpClient = newHttpClient()
+var httpClient = http.Client{}
 
 // useful if you want to debug one single request
 func TestE2ESingleRequest(t *testing.T) {
-	const testSuite = "1"
-	const testNr = "87"
 	t.Skip("It fails now, there are differences in output for every testcase")
+	const testSuite = "1"
+	const testNr = "1"
 	e2eRunSingleTest(t, testSuite, testNr)
 }
 
 // useful if you want to debug one single test suite
 func TestE2ESingleSuite(t *testing.T) {
-	const testSuite = "1"
 	t.Skip("It fails now, there are differences in output for every testcase")
+	const testSuite = "1"
 	e2eRunSingleSuite(t, testSuite)
 }
 
 // all tests
 func TestE2EAll(t *testing.T) {
+	t.Skip("It fails now, there are differences in output for every testcase")
 	parser := httpRequestParser{}
+
 	testSuites, err := parser.getAllTestSuites()
 	if err != nil {
 		t.Error(err)
@@ -53,34 +49,42 @@ func e2eRunSingleTest(t *testing.T, testSuite, testNr string) {
 	if err != nil {
 		t.Error(err)
 	}
-	fmt.Println(test.urlSuffix, test.requestBody)
-	elasticResponseString, err := httpClient.sendRequestToElastic(test.urlSuffix, test.requestBody)
+
+	elasticResponse, err := sendRequestToElastic(test.urlSuffix, test.requestBody)
 	if err != nil {
-		pp.Println(err)
+		_, _ = pp.Println(err)
+		t.Error(err)
 	}
+	// possibly useful for debugging
 	// elasticResp, err := types.ParseJSON(elasticResponseString)
 	// pp.Println(elasticResp)
-
 	// pp.Println("elastic hits:", elasticResp["hits"])
 
-	quesmaResponseString, _ := httpClient.sendRequestToQuesma(test.urlSuffix, test.requestBody)
+	quesmaResponse, _ := sendRequestToQuesma(test.urlSuffix, test.requestBody)
+	if err != nil {
+		_, _ = pp.Println(err)
+		t.Error(err)
+	}
+	// possibly useful for debugging
 	// quesmaResp, _ := types.ParseJSON(quesmaResponseString)
+	// pp.Println(quesmaResp)
 	// pp.Println("quesma hits", quesmaResp["hits"])
 
 	elasticMinusQuesma, quesmaMinusElastic, err := util.JsonDifference(
-		elasticResponseString, quesmaResponseString, true, true, 5)
+		elasticResponse, quesmaResponse, true, true, 5)
 
+	// first print all errors, only then fail the test
 	if err != nil {
-		pp.Println(err)
+		_, _ = pp.Println(err)
 	}
 	// maybe change below to
 	// assert.True(t, util.AlmostEmpty(actualMinusExpected, acceptableDifference))
 	// assert.True(t, util.AlmostEmpty(expectedMinusActual, acceptableDifference))
 	if len(elasticMinusQuesma) != 0 {
-		pp.Println("elasticMinusQuesma", elasticMinusQuesma)
+		_, _ = pp.Println("Present in Elastic response, but not in Quesma:", elasticMinusQuesma)
 	}
 	if len(quesmaMinusElastic) != 0 {
-		pp.Println("quesmaMinusElastic", quesmaMinusElastic)
+		_, _ = pp.Println("Present in Quesma response, but not in Elastic:", quesmaMinusElastic)
 	}
 
 	if err != nil {
@@ -100,14 +104,39 @@ func e2eRunSingleSuite(t *testing.T, testSuite string) {
 		t.Error(err)
 	}
 	if len(tests) == 0 {
-		t.Error("no tests found")
+		t.Error("no tests found for suite:", testSuite)
 	}
 
-	t.Skip("It fails now, there are differences in output for every testcase")
 	for _, test := range tests {
 		t.Run(testSuite+"/"+test.name, func(t *testing.T) {
-			fmt.Println("running test", test.name)
 			e2eRunSingleTest(t, testSuite, test.name)
 		})
 	}
 }
+
+func sendPost(url, body string) (string, error) {
+	req, err := http.NewRequest("POST", url, bytes.NewBuffer([]byte(body)))
+	if err != nil {
+		return "", err
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := httpClient.Do(req)
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+
+	response, err := io.ReadAll(resp.Body)
+	return string(response), err
+}
+
+func sendRequestToElastic(urlSuffix, body string) (string, error) {
+	const urlPrefix = "http://localhost:9202"
+	return sendPost(urlPrefix+urlSuffix, body)
+}
+
+func sendRequestToQuesma(urlSuffix, body string) (string, error) {
+	const urlPrefix = "http://localhost:8080"
+	return sendPost(urlPrefix+urlSuffix, body)
+}
diff --git a/quesma/tests/end2end/http.go b/quesma/tests/end2end/http.go
deleted file mode 100644
index 183639255..000000000
--- a/quesma/tests/end2end/http.go
+++ /dev/null
@@ -1,45 +0,0 @@
-package end2end
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-	"net/http"
-)
-
-type HttpClient struct {
-	client *http.Client
-}
-
-func newHttpClient() HttpClient {
-	return HttpClient{&http.Client{}}
-}
-
-func (cli *HttpClient) sendPost(url, body string) (string, error) {
-	fmt.Println("URL:>", url)
-
-	req, err := http.NewRequest("POST", url, bytes.NewBuffer([]byte(body)))
-	req.Header.Set("Content-Type", "application/json")
-
-	resp, err := cli.client.Do(req)
-	if err != nil {
-		return "", err
-	}
-	defer resp.Body.Close()
-
-	fmt.Println("response Status:", resp.Status)
-	fmt.Println("response Headers:", resp.Header)
-
-	response, err := io.ReadAll(resp.Body)
-	return string(response), err
-}
-
-func (cli *HttpClient) sendRequestToElastic(urlSuffix, body string) (string, error) {
-	const urlPrefix = "http://localhost:9202"
-	return cli.sendPost(urlPrefix+urlSuffix, body)
-}
-
-func (cli *HttpClient) sendRequestToQuesma(urlSuffix, body string) (string, error) {
-	const urlPrefix = "http://localhost:8080"
-	return cli.sendPost(urlPrefix+urlSuffix, body)
-}
diff --git a/quesma/tests/end2end/http_request_parser.go b/quesma/tests/end2end/http_request_parser.go
index 3fa740f55..a83c92a22 100644
--- a/quesma/tests/end2end/http_request_parser.go
+++ b/quesma/tests/end2end/http_request_parser.go
@@ -2,12 +2,18 @@ package end2end
 
 import (
 	"bufio"
-	"fmt"
 	"os"
 	"strings"
 )
 
-type httpRequestParser struct{}
+type (
+	httpRequestParser struct{}
+	singleE2ETest     struct {
+		name        string
+		requestBody string
+		urlSuffix   string // without "http://name:port", so /index-pattern/...
+	}
+)
 
 const testCasesDir = "testcases/"
 
@@ -16,7 +22,6 @@ func (p *httpRequestParser) getSingleTest(testSuite, testNr string) (*singleE2ET
 	if err != nil {
 		return nil, err
 	}
-	fmt.Println(file.Name(), err)
 	defer file.Close()
 
 	scanner := bufio.NewScanner(file)
@@ -46,6 +51,9 @@ func (p *httpRequestParser) getSingleTestSuite(testSuite string) (tests []*singl
 		test, err := p.getSingleTest(testSuite, testNr)
 		if err == nil {
 			tests = append(tests, test)
+		} else {
+			// to be on the safe side, we shouldn't have any errors here
+			return nil, err
 		}
 	}
 	return
diff --git a/quesma/util/utils.go b/quesma/util/utils.go
index 1f8eff02d..81adeeee2 100644
--- a/quesma/util/utils.go
+++ b/quesma/util/utils.go
@@ -82,11 +82,12 @@ func JsonToMap(jsonn string) (JsonMap, error) {
 
 // MapDifference returns pair of maps with fields that are present in one of input maps and not in the other
 // specifically (mActual - mExpected, mExpected - mActual)
+// * mActual - uses JsonMap fully: values are []JsonMap, or JsonMap, or base types
+// * mExpected - value can also be []any, because it's generated from Golang's json.Unmarshal
 // * compareBaseTypes - if true, we compare base type values as well (e.g. if mActual["key1"]["key2"] == 1,
 // and mExpected["key1"]["key2"] == 2, we say that they are different)
 // * compareFullArrays - if true, we compare entire arrays, if false just first element ([0])
-// * mActual - uses JsonMap fully: values are []JsonMap, or JsonMap, or base types
-// * mExpected - value can also be []any, because it's generated from Golang's json.Unmarshal
+// * limitArrayIndices - how many elements of arrays to compare, set e.g. to math.MaxInt to compare all
 func MapDifference(mActual, mExpected JsonMap, compareBaseTypes, compareFullArrays bool, limitArrayIndices int) (JsonMap, JsonMap) {
 	// We're adding 'mapToAdd' to 'resultDiff' at the key keysNested + name (`keysNested` is a list of keys, as JSONs can be nested)
 	// (or before, if such map doesn't exist on previous nested levels)
@@ -103,10 +104,6 @@ func MapDifference(mActual, mExpected JsonMap, compareBaseTypes, compareFullArra
 		cur[name] = mapToAdd
 	}
 
-	if limitArrayIndices == -1 {
-		limitArrayIndices = math.MaxInt
-	}
-
 	var descendRec func(_, _, _, _ JsonMap, _ []string)
 
 	// 1. Add values that are present in 'mActualCur' but not in 'mExpectedCur' to 'resultDiffThis'
@@ -159,7 +156,7 @@ func MapDifference(mActual, mExpected JsonMap, compareBaseTypes, compareFullArra
 				if !compareFullArrays {
 					lenActual, lenExpected = min(1, lenActual), min(1, lenExpected)
 				}
-				for i := 0; i < min(min(lenActual, lenExpected), limitArrayIndices); i++ {
+				for i := 0; i < min(lenActual, lenExpected, limitArrayIndices); i++ {
 					// Code below doesn't cover 100% of cases. But covers all we see, so I don't want to
 					// waste time improving it until there's need for it.
 					// Assumption: elements of arrays are maps or base types. From observations, it's always true.
@@ -188,7 +185,7 @@ func MapDifference(mActual, mExpected JsonMap, compareBaseTypes, compareFullArra
 				}
 				lenActual, lenExpected := len(vActualTyped), len(vExpectedArr)
 
-				for i := 0; i < min(min(lenActual, lenExpected), limitArrayIndices); i++ {
+				for i := 0; i < min(lenActual, lenExpected, limitArrayIndices); i++ {
 					// Code below doesn't cover the case, when elements of arrays are subarrays.
 					// But it should return that they are different, so it should be fine - we should notice that.
 					actualArrElementAsMap, okActualAsMap := vActualTyped[i].(JsonMap)
@@ -257,6 +254,8 @@ func JsonDifferenceCompareEverything(jsonActual, jsonExpected string) (JsonMap,
 	return JsonDifference(jsonActual, jsonExpected, true, true, math.MaxInt)
 }
 
+// JsonDifferenceWeak returns pair of maps with fields that are present in one of input JSONs and not in the other
+// It's weak, because it doesn't compare base types, and it compares only first element of arrays
 func JsonDifferenceWeak(jsonActual, jsonExpected string) (JsonMap, JsonMap, error) {
 	return JsonDifference(jsonActual, jsonExpected, false, false, math.MaxInt)
 }