diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 90d5ed44b..d08569980 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -94,6 +94,12 @@ go tool pprof -http=":8000" cpu.prof go tool pprof -http=":8001" mem.prof ``` +**Additional flags for memory profiling** +- `inuse_space`: Amount of memory allocated and not released yet (default). +- `inuse_objects`: Amount of objects allocated and not released yet. +- `alloc_space`: Total amount of memory allocated (regardless of released). +- `alloc_objects`: Total amount of objects allocated (regardless of released). + More on profiling please visit https://go.dev/doc/diagnostics#profiling ## Contribution workflow diff --git a/pkg/collect/redact.go b/pkg/collect/redact.go index 18b477641..3f689bc59 100644 --- a/pkg/collect/redact.go +++ b/pkg/collect/redact.go @@ -5,7 +5,6 @@ import ( "bytes" "compress/gzip" "io" - "io/ioutil" "os" "path/filepath" "strings" @@ -17,18 +16,29 @@ import ( "k8s.io/klog/v2" ) +// Max number of concurrent redactors to run +// Ensure the number is low enough since each of the redactors +// also spawns goroutines to redact files in tar archives and +// other goroutines for each redactor spec. +const MAX_CONCURRENT_REDACTORS = 10 + func RedactResult(bundlePath string, input CollectorResult, additionalRedactors []*troubleshootv1beta2.Redact) error { wg := &sync.WaitGroup{} // Error channel to capture errors from goroutines errorCh := make(chan error, len(input)) + limitCh := make(chan struct{}, MAX_CONCURRENT_REDACTORS) + defer close(limitCh) for k, v := range input { + limitCh <- struct{}{} wg.Add(1) go func(file string, data []byte) { defer wg.Done() + defer func() { <-limitCh }() // free up after the function execution has run + var reader io.Reader if data == nil { @@ -84,7 +94,7 @@ func RedactResult(bundlePath string, input CollectorResult, additionalRedactors // If the file is .tar, .tgz or .tar.gz, it must not be redacted. Instead it is // decompressed and each file inside the tar redacted and compressed back into the archive. if filepath.Ext(file) == ".tar" || filepath.Ext(file) == ".tgz" || strings.HasSuffix(file, ".tar.gz") { - tmpDir, err := ioutil.TempDir("", "troubleshoot-subresult-") + tmpDir, err := os.MkdirTemp("", "troubleshoot-subresult-") if err != nil { errorCh <- errors.Wrap(err, "failed to create temp dir") return diff --git a/pkg/collect/result.go b/pkg/collect/result.go index 997de6104..efd9b7ee1 100644 --- a/pkg/collect/result.go +++ b/pkg/collect/result.go @@ -132,7 +132,7 @@ func (r CollectorResult) SaveResult(bundlePath string, relativePath string, read return errors.Wrap(err, "failed to stat file") } - klog.V(2).Infof("Added %q (%d MB) to bundle output", relativePath, fileInfo.Size()/(1024*1024)) + klog.V(2).Infof("Added %q (%d KB) to bundle output", relativePath, fileInfo.Size()/(1024)) return nil } diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index c6037630e..ad9703ca3 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -83,6 +83,11 @@ const ( MESSAGE_TEXT_PADDING = 4 MESSAGE_TEXT_LINES_MARGIN_TO_BOTTOM = 4 - // Bufio Reader Constants - MAX_BUFFER_CAPACITY = 1024 * 1024 + // This is the initial size of the buffer allocated. + // Under the hood, an array of size N is allocated in memory + BUF_INIT_SIZE = 4096 // 4KB + + // This is the muximum size the buffer can grow to + // Its not what the buffer will be allocated to initially + SCANNER_MAX_SIZE = 10 * 1024 * 1024 // 10MB ) diff --git a/pkg/redact/literal.go b/pkg/redact/literal.go index f507e9ede..9fad6f503 100644 --- a/pkg/redact/literal.go +++ b/pkg/redact/literal.go @@ -2,23 +2,26 @@ package redact import ( "bufio" + "bytes" "fmt" "io" - "strings" + + "github.com/replicatedhq/troubleshoot/pkg/constants" + "k8s.io/klog/v2" ) type literalRedactor struct { - matchString string - filePath string - redactName string - isDefault bool + match []byte + filePath string + redactName string + isDefault bool } -func literalString(matchString, path, name string) Redactor { +func literalString(match []byte, path, name string) Redactor { return literalRedactor{ - matchString: matchString, - filePath: path, - redactName: name, + match: match, + filePath: path, + redactName: name, } } @@ -28,32 +31,37 @@ func (r literalRedactor) Redact(input io.Reader, path string) io.Reader { go func() { var err error defer func() { - if err == io.EOF { + if err == nil || err == io.EOF { writer.Close() } else { + if err == bufio.ErrTooLong { + s := fmt.Sprintf("Error redacting %q. A line in the file exceeded %d MB max length", path, constants.SCANNER_MAX_SIZE/1024/1024) + klog.V(2).Info(s) + } else { + klog.V(2).Info(fmt.Sprintf("Error redacting %q: %v", path, err)) + } writer.CloseWithError(err) } }() - reader := bufio.NewReader(input) + buf := make([]byte, constants.BUF_INIT_SIZE) + scanner := bufio.NewScanner(input) + scanner.Buffer(buf, constants.SCANNER_MAX_SIZE) + lineNum := 0 - for { + for scanner.Scan() { lineNum++ - var line string - line, err = readLine(reader) - if err != nil { - return - } + line := scanner.Bytes() - clean := strings.ReplaceAll(line, r.matchString, MASK_TEXT) + clean := bytes.ReplaceAll(line, r.match, maskTextBytes) - // io.WriteString would be nicer, but scanner strips new lines - fmt.Fprintf(writer, "%s\n", clean) + // Append newline since scanner strips it + err = writeBytes(writer, clean, NEW_LINE) if err != nil { return } - if clean != line { + if !bytes.Equal(clean, line) { addRedaction(Redaction{ RedactorName: r.redactName, CharactersRemoved: len(line) - len(clean), @@ -63,6 +71,9 @@ func (r literalRedactor) Redact(input io.Reader, path string) io.Reader { }) } } + if scanErr := scanner.Err(); scanErr != nil { + err = scanErr + } }() return out } diff --git a/pkg/redact/multi_line.go b/pkg/redact/multi_line.go index e4476a937..ec212a5ba 100644 --- a/pkg/redact/multi_line.go +++ b/pkg/redact/multi_line.go @@ -2,10 +2,9 @@ package redact import ( "bufio" - "fmt" + "bytes" "io" "regexp" - "strings" ) type MultiLineRedactor struct { @@ -20,19 +19,19 @@ type MultiLineRedactor struct { func NewMultiLineRedactor(re1 LineRedactor, re2 string, maskText, path, name string, isDefault bool) (*MultiLineRedactor, error) { var scanCompiled *regexp.Regexp - compiled1, err := regexp.Compile(re1.regex) + compiled1, err := compileRegex(re1.regex) if err != nil { return nil, err } if re1.scan != "" { - scanCompiled, err = regexp.Compile(re1.scan) + scanCompiled, err = compileRegex(re1.scan) if err != nil { return nil, err } } - compiled2, err := regexp.Compile(re2) + compiled2, err := compileRegex(re2) if err != nil { return nil, err } @@ -48,14 +47,18 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader { writer.CloseWithError(err) }() - substStr := getReplacementPattern(r.re2, r.maskText) + substStr := []byte(getReplacementPattern(r.re2, r.maskText)) reader := bufio.NewReader(input) line1, line2, err := getNextTwoLines(reader, nil) if err != nil { // this will print 2 blank lines for empty input... - fmt.Fprintf(writer, "%s\n", line1) - fmt.Fprintf(writer, "%s\n", line2) + // Append newlines since scanner strips them + err = writeBytes(writer, line1, NEW_LINE, line2, NEW_LINE) + if err != nil { + return + } + return } @@ -66,33 +69,41 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader { // is scan is not nil, then check if line1 matches scan by lowercasing it if r.scan != nil { - lowerLine1 := strings.ToLower(line1) - if !r.scan.MatchString(lowerLine1) { - fmt.Fprintf(writer, "%s\n", line1) - line1, line2, err = getNextTwoLines(reader, &line2) + lowerLine1 := bytes.ToLower(line1) + if !r.scan.Match(lowerLine1) { + // Append newline since scanner strips it + err = writeBytes(writer, line1, NEW_LINE) + if err != nil { + return + } + line1, line2, err = getNextTwoLines(reader, line2) flushLastLine = true continue } } // If line1 matches re1, then transform line2 using re2 - if !r.re1.MatchString(line1) { - fmt.Fprintf(writer, "%s\n", line1) - line1, line2, err = getNextTwoLines(reader, &line2) + if !r.re1.Match(line1) { + // Append newline since scanner strips it + err = writeBytes(writer, line1, NEW_LINE) + if err != nil { + return + } + line1, line2, err = getNextTwoLines(reader, line2) flushLastLine = true continue } flushLastLine = false - clean := r.re2.ReplaceAllString(line2, substStr) + clean := r.re2.ReplaceAll(line2, substStr) - // io.WriteString would be nicer, but reader strips new lines - fmt.Fprintf(writer, "%s\n%s\n", line1, clean) + // Append newlines since scanner strips them + err = writeBytes(writer, line1, NEW_LINE, clean, NEW_LINE) if err != nil { return } // if clean is not equal to line2, a redaction was performed - if clean != line2 { + if !bytes.Equal(clean, line2) { addRedaction(Redaction{ RedactorName: r.redactName, CharactersRemoved: len(line2) - len(clean), @@ -106,15 +117,18 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader { } if flushLastLine { - fmt.Fprintf(writer, "%s\n", line1) + // Append newline since scanner strip it + err = writeBytes(writer, line1, NEW_LINE) + if err != nil { + return + } } }() return out } -func getNextTwoLines(reader *bufio.Reader, curLine2 *string) (line1 string, line2 string, err error) { - line1 = "" - line2 = "" +func getNextTwoLines(reader *bufio.Reader, curLine2 []byte) (line1 []byte, line2 []byte, err error) { + line2 = []byte{} if curLine2 == nil { line1, err = readLine(reader) @@ -126,7 +140,7 @@ func getNextTwoLines(reader *bufio.Reader, curLine2 *string) (line1 string, line return } - line1 = *curLine2 + line1 = curLine2 line2, err = readLine(reader) if err != nil { return @@ -134,3 +148,15 @@ func getNextTwoLines(reader *bufio.Reader, curLine2 *string) (line1 string, line return } + +// writeBytes writes all byte slices to the writer +// in the order they are passed in the variadic argument +func writeBytes(w io.Writer, bs ...[]byte) error { + for _, b := range bs { + _, err := w.Write(b) + if err != nil { + return err + } + } + return nil +} diff --git a/pkg/redact/multi_line_test.go b/pkg/redact/multi_line_test.go index 984cec7a7..70665d117 100644 --- a/pkg/redact/multi_line_test.go +++ b/pkg/redact/multi_line_test.go @@ -2,13 +2,15 @@ package redact import ( "bytes" - "io/ioutil" + "io" + "strings" "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -func Test_NewMultiLineRedactorr(t *testing.T) { +func Test_NewMultiLineRedactor(t *testing.T) { tests := []struct { name string selector LineRedactor @@ -101,7 +103,7 @@ func Test_NewMultiLineRedactorr(t *testing.T) { req.NoError(err) outReader := reRunner.Redact(bytes.NewReader([]byte(tt.inputString)), "") - gotBytes, err := ioutil.ReadAll(outReader) + gotBytes, err := io.ReadAll(outReader) req.NoError(err) req.Equal(tt.wantString, string(gotBytes)) GetRedactionList() @@ -109,3 +111,40 @@ func Test_NewMultiLineRedactorr(t *testing.T) { }) } } + +func Test_writeBytes(t *testing.T) { + tests := []struct { + name string + inputBytes [][]byte + want string + }{ + { + name: "No newline", + inputBytes: [][]byte{[]byte("hello"), []byte("world")}, + want: "helloworld", + }, + { + name: "With newline", + inputBytes: [][]byte{[]byte("hello"), NEW_LINE, []byte("world"), NEW_LINE}, + want: "hello\nworld\n", + }, + { + name: "Empty line", + inputBytes: [][]byte{NEW_LINE}, + want: "\n", + }, + { + name: "Nothing", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var w strings.Builder + err := writeBytes(&w, tt.inputBytes...) + require.NoError(t, err) + + assert.Equal(t, tt.want, w.String()) + }) + } +} diff --git a/pkg/redact/redact.go b/pkg/redact/redact.go index 499c1009c..a7e5f6a00 100644 --- a/pkg/redact/redact.go +++ b/pkg/redact/redact.go @@ -17,9 +17,16 @@ const ( MASK_TEXT = "***HIDDEN***" ) -var allRedactions RedactionList -var redactionListMut sync.Mutex -var pendingRedactions sync.WaitGroup +var ( + allRedactions RedactionList + redactionListMut sync.Mutex + pendingRedactions sync.WaitGroup + + // A regex cache to avoid recompiling the same regexes over and over + regexCache = map[string]*regexp.Regexp{} + regexCacheLock sync.Mutex + maskTextBytes = []byte(MASK_TEXT) +) func init() { allRedactions = RedactionList{ @@ -28,6 +35,24 @@ func init() { } } +// A regex cache to avoid recompiling the same regexes over and over +func compileRegex(pattern string) (*regexp.Regexp, error) { + regexCacheLock.Lock() + defer regexCacheLock.Unlock() + + if cached, ok := regexCache[pattern]; ok { + return cached, nil + } + + compiled, err := regexp.Compile(pattern) + if err != nil { + return nil, err + } + + regexCache[pattern] = compiled + return compiled, nil +} + type Redactor interface { Redact(input io.Reader, path string) io.Reader } @@ -85,10 +110,19 @@ func ResetRedactionList() { ByRedactor: map[string][]Redaction{}, ByFile: map[string][]Redaction{}, } + + // Clear the regex cache as well. We do not want + // to keep this around in long running processes + // that continually redact files + regexCacheLock.Lock() + defer regexCacheLock.Unlock() + + regexCache = map[string]*regexp.Regexp{} } func buildAdditionalRedactors(path string, redacts []*troubleshootv1beta2.Redact) ([]Redactor, error) { additionalRedactors := []Redactor{} + for i, redact := range redacts { if redact == nil { continue @@ -104,7 +138,7 @@ func buildAdditionalRedactors(path string, redacts []*troubleshootv1beta2.Redact } for j, literal := range redact.Removals.Values { - additionalRedactors = append(additionalRedactors, literalString(literal, path, redactorName(i, j, redact.Name, "literal"))) + additionalRedactors = append(additionalRedactors, literalString([]byte(literal), path, redactorName(i, j, redact.Name, "literal"))) } for j, re := range redact.Removals.Regex { @@ -458,13 +492,13 @@ func getReplacementPattern(re *regexp.Regexp, maskText string) string { return substStr } -func readLine(r *bufio.Reader) (string, error) { +func readLine(r *bufio.Reader) ([]byte, error) { var completeLine []byte for { var line []byte line, isPrefix, err := r.ReadLine() if err != nil { - return "", err + return nil, err } completeLine = append(completeLine, line...) @@ -472,7 +506,7 @@ func readLine(r *bufio.Reader) (string, error) { break } } - return string(completeLine), nil + return completeLine, nil } func addRedaction(redaction Redaction) { diff --git a/pkg/redact/single_line.go b/pkg/redact/single_line.go index 7fb7d4934..93ec26e51 100644 --- a/pkg/redact/single_line.go +++ b/pkg/redact/single_line.go @@ -2,12 +2,13 @@ package redact import ( "bufio" + "bytes" "fmt" "io" "regexp" - "strings" "github.com/replicatedhq/troubleshoot/pkg/constants" + "k8s.io/klog/v2" ) type SingleLineRedactor struct { @@ -19,15 +20,17 @@ type SingleLineRedactor struct { isDefault bool } +var NEW_LINE = []byte{'\n'} + func NewSingleLineRedactor(re LineRedactor, maskText, path, name string, isDefault bool) (*SingleLineRedactor, error) { var scanCompiled *regexp.Regexp - compiled, err := regexp.Compile(re.regex) + compiled, err := compileRegex(re.regex) if err != nil { return nil, err } if re.scan != "" { - scanCompiled, err = regexp.Compile(re.scan) + scanCompiled, err = compileRegex(re.scan) if err != nil { return nil, err } @@ -42,50 +45,62 @@ func (r *SingleLineRedactor) Redact(input io.Reader, path string) io.Reader { go func() { var err error defer func() { - if err == io.EOF { + if err == nil || err == io.EOF { writer.Close() } else { + if err == bufio.ErrTooLong { + s := fmt.Sprintf("Error redacting %q. A line in the file exceeded %d MB max length", path, constants.SCANNER_MAX_SIZE/1024/1024) + klog.V(2).Info(s) + } else { + klog.V(2).Info(fmt.Sprintf("Error redacting %q: %v", path, err)) + } writer.CloseWithError(err) } }() - substStr := getReplacementPattern(r.re, r.maskText) + substStr := []byte(getReplacementPattern(r.re, r.maskText)) - buf := make([]byte, constants.MAX_BUFFER_CAPACITY) + buf := make([]byte, constants.BUF_INIT_SIZE) scanner := bufio.NewScanner(input) - scanner.Buffer(buf, constants.MAX_BUFFER_CAPACITY) + scanner.Buffer(buf, constants.SCANNER_MAX_SIZE) lineNum := 0 for scanner.Scan() { lineNum++ - line := scanner.Text() + line := scanner.Bytes() // is scan is not nil, then check if line matches scan by lowercasing it if r.scan != nil { - lowerLine := strings.ToLower(line) - if !r.scan.MatchString(lowerLine) { - fmt.Fprintf(writer, "%s\n", line) + lowerLine := bytes.ToLower(line) + if !r.scan.Match(lowerLine) { + // Append newline since scanner strips it + err = writeBytes(writer, line, NEW_LINE) + if err != nil { + return + } continue } } // if scan matches, but re does not, do not redact - if !r.re.MatchString(line) { - fmt.Fprintf(writer, "%s\n", line) + if !r.re.Match(line) { + // Append newline since scanner strips it + err = writeBytes(writer, line, NEW_LINE) + if err != nil { + return + } continue } - clean := r.re.ReplaceAllString(line, substStr) - - // io.WriteString would be nicer, but scanner strips new lines - fmt.Fprintf(writer, "%s\n", clean) - + clean := r.re.ReplaceAll(line, substStr) + // Append newline since scanner strips it + err = writeBytes(writer, clean, NEW_LINE) if err != nil { return } // if clean is not equal to line, a redaction was performed - if clean != line { + if !bytes.Equal(clean, line) { addRedaction(Redaction{ RedactorName: r.redactName, CharactersRemoved: len(line) - len(clean), diff --git a/pkg/redact/single_line_test.go b/pkg/redact/single_line_test.go index 1968334b7..1d771f803 100644 --- a/pkg/redact/single_line_test.go +++ b/pkg/redact/single_line_test.go @@ -2,7 +2,7 @@ package redact import ( "bytes" - "io/ioutil" + "io" "testing" "github.com/stretchr/testify/require" @@ -320,7 +320,7 @@ func TestNewSingleLineRedactor(t *testing.T) { req.NoError(err) outReader := reRunner.Redact(bytes.NewReader([]byte(tt.inputString)), "") - gotBytes, err := ioutil.ReadAll(outReader) + gotBytes, err := io.ReadAll(outReader) req.NoError(err) req.Equal(tt.wantString, string(gotBytes))