diff --git a/alphabet/alphabet.go b/alphabet/alphabet.go
index 7355a83f..c1359aa5 100644
--- a/alphabet/alphabet.go
+++ b/alphabet/alphabet.go
@@ -3,12 +3,14 @@ Package alphabet provides structs for defining biological sequence alphabets.
 */
 package alphabet
 
+// TODO: add Alphabet for codons
+
 import "fmt"
 
 // Alphabet is a struct that holds a list of symbols and a map of symbols to their index in the list.
 type Alphabet struct {
 	symbols  []string
-	encoding map[interface{}]int
+	encoding map[interface{}]uint8
 }
 
 // Error is an error type that is returned when a symbol is not in the alphabet.
@@ -23,23 +25,47 @@ func (e *Error) Error() string {
 
 // NewAlphabet creates a new alphabet from a list of symbols.
 func NewAlphabet(symbols []string) *Alphabet {
-	encoding := make(map[interface{}]int)
+	encoding := make(map[interface{}]uint8)
 	for index, symbol := range symbols {
-		encoding[symbol] = index
-		encoding[index] = index
+		encoding[symbol] = uint8(index)
+		encoding[index] = uint8(index)
 	}
 	return &Alphabet{symbols, encoding}
 }
 
 // Encode returns the index of a symbol in the alphabet.
-func (alphabet *Alphabet) Encode(symbol interface{}) (int, error) {
+func (alphabet *Alphabet) Encode(symbol interface{}) (uint8, error) {
 	c, ok := alphabet.encoding[symbol]
 	if !ok {
-		return 0, &Error{fmt.Sprintf("Symbol %v not in alphabet", symbol)}
+		return 0, fmt.Errorf("Symbol %v not in alphabet", symbol)
 	}
 	return c, nil
 }
 
+// TODO: compress more when len(symbols) << 2^8
+// TODO: DecodeAll
+func (alphabet *Alphabet) EncodeAll(seq string) ([]uint8, error) {
+	encoded := make([]uint8, len(seq))
+	for i, r := range seq {
+		encoding, err := alphabet.Encode(string(r))
+		if err != nil {
+			return nil, fmt.Errorf("Symbol %c in position %d not in alphabet", r, i)
+		}
+		encoded[i] = uint8(encoding)
+	}
+	return encoded, nil
+}
+
+func (alphabet *Alphabet) Check(seq string) int {
+	for i, r := range seq {
+		encoding, err := alphabet.Encode(string(r))
+		if err != nil {
+			return i
+		}
+	}
+	return -1
+}
+
 // Decode returns the symbol at a given index in the alphabet.
 func (alphabet *Alphabet) Decode(code interface{}) (string, error) {
 	c, ok := code.(int)
diff --git a/alphabet/alphabet_test.go b/alphabet/alphabet_test.go
index 81dd70ad..dc8ea04f 100644
--- a/alphabet/alphabet_test.go
+++ b/alphabet/alphabet_test.go
@@ -16,7 +16,7 @@ func TestAlphabet(t *testing.T) {
 		if err != nil {
 			t.Errorf("Unexpected error encoding symbol %s: %v", symbol, err)
 		}
-		if code != i {
+		if int(code) != i {
 			t.Errorf("Incorrect encoding of symbol %s: expected %d, got %d", symbol, i, code)
 		}
 	}
@@ -48,7 +48,7 @@ func TestAlphabet(t *testing.T) {
 		if err != nil {
 			t.Errorf("Unexpected error encoding symbol %s: %v", symbol, err)
 		}
-		if code != i {
+		if int(code) != i {
 			t.Errorf("Incorrect encoding of symbol %s: expected %d, got %d", symbol, i, code)
 		}
 	}
@@ -57,7 +57,7 @@ func TestAlphabet(t *testing.T) {
 		if err != nil {
 			t.Errorf("Unexpected error encoding symbol %s: %v", symbol, err)
 		}
-		if code != i+len(symbols) {
+		if int(code) != i+len(symbols) {
 			t.Errorf("Incorrect encoding of symbol %s: expected %d, got %d", symbol, i+len(symbols), code)
 		}
 	}
diff --git a/alphabet/kmer_counter.go b/alphabet/kmer_counter.go
new file mode 100644
index 00000000..73fefc32
--- /dev/null
+++ b/alphabet/kmer_counter.go
@@ -0,0 +1,175 @@
+// TODO: create benchmark for space and time costs vs https://github.com/shenwei356/bio/tree/master/sketches
+// TODO: create efficient storage for children + counts in top node via multidimensional array
+// TODO: integrate with IO parsers
+// TODO: enable reading and writing to binary format
+// TODO: iterate over observed kmers (what is the appropriate iteration method?)
+// TODO: comparison between two KmerCounter's
+// TODO: initialize upper levels while respecting cache locality
+// TODO: add counts from other KmerCounter, to enable reducing parallel counts
+// TODO: lookup neighbors
+// TODO: path compression for internal nodes
+// TODO: have last succ point to next node from same level for faster iteration
+
+package alphabet
+
+import (
+	"fmt"
+)
+
+type KmerCounter struct {
+	alphabet    *Alphabet
+	num_symbols uint8
+	max_k       uint8
+	total       uint64
+	children    []Node
+}
+
+type Node struct {
+	succ     *Node
+	child    *Node
+	encoding uint8
+	count    uint32
+}
+
+func NewKmerCounter(a *Alphabet, max_k uint8) *KmerCounter {
+	kc := new(KmerCounter)
+	kc.alphabet = a
+	kc.num_symbols = uint8(len(a.symbols))
+	kc.max_k = max_k
+
+	kc.children = make([]Node, kc.num_symbols)
+	for i := uint8(1); i < kc.num_symbols; i++ {
+		kc.children[i].encoding = i
+		kc.children[i-1].succ = &kc.children[i]
+	}
+	return kc
+}
+
+func lookupChild(n *Node, encoding uint8) *Node {
+	for c := n.child; c != nil && encoding <= c.encoding; c = c.succ {
+		if encoding == c.encoding {
+			return c
+		}
+	}
+	return nil
+}
+
+func insertChild(p *Node, encoding uint8, n *Node) {
+	if p.child == nil {
+		p.child = n
+	} else if n.encoding < p.child.encoding {
+		n.succ = p.child
+		p.child = n
+	} else {
+		for c := p.child; c.encoding < encoding; c = c.succ {
+			if encoding < c.succ.encoding {
+				n.succ = c.succ
+				c.succ = n
+			}
+		}
+	}
+	n.count++ // not sure why this is needed
+	n.encoding = encoding
+}
+
+func Observe(kc *KmerCounter, seq string) error {
+	cbuf, err := kc.alphabet.EncodeAll(seq[:kc.max_k])
+	if err != nil {
+		return err
+	}
+
+	CreateChildren := func(index int, remaining int) *Node {
+		if remaining == 0 {
+			return nil
+		} // this condition should never happen
+		nodes := make([]Node, remaining)
+
+		for j, n := range nodes {
+			n.count = 1
+			if j != 0 {
+				nodes[j-1].child = &n
+				n.encoding = cbuf[(index+j)%int(kc.max_k)]
+			}
+		}
+
+		return &nodes[0]
+	}
+
+	UpdateBuffer := func(index int) (max_k int, err error) {
+		max_k = int(kc.max_k)
+		lookahead := index + max_k - 1
+		var encoding uint8
+		if lookahead < len(seq) {
+			next := string(seq[lookahead])
+			encoding, err = kc.alphabet.Encode(next)
+			if err != nil {
+				err = fmt.Errorf("in position %d: %w", index, err)
+				return
+			}
+			cbuf[lookahead%int(kc.max_k)] = encoding
+		} else {
+			max_k = len(seq) - index
+		}
+		return
+	}
+
+	var encoding uint8
+	for i := range seq {
+		max_k, err := UpdateBuffer(i)
+		if err != nil {
+			return err
+		}
+
+		p := &kc.children[cbuf[i%int(kc.max_k)]]
+		kc.total++
+		for k := 1; k <= max_k; k++ {
+			p.count++
+			// fmt.Printf("%d %d %v\n", i, k, p)
+
+			if k != max_k {
+				encoding = cbuf[(i+k)%int(kc.max_k)]
+				c := lookupChild(p, encoding)
+				if c == nil {
+					insertChild(p, encoding, CreateChildren(i+k, max_k-k))
+					break // inserted nodes already have count = 1 added
+				}
+				p = c
+			}
+		}
+	}
+	return nil
+}
+
+func LookupCount(kc *KmerCounter, kmer string) (count uint32, err error) {
+	if len(kmer) > int(kc.max_k) {
+		err = fmt.Errorf("kmer_counter: attempted to lookup count of %d-mer which exceeds max supported length %d", len(kmer), kc.max_k)
+		return
+	}
+	if len(kmer) == 0 {
+		err = fmt.Errorf("kmer_counter: attempted to lookup count of 0mer")
+		return
+	}
+	encoded, err := kc.alphabet.EncodeAll(kmer)
+	if err != nil {
+		return
+	}
+
+	var k int
+	var p *Node
+	for k, p = 1, &kc.children[encoded[0]]; k < len(kmer) && p != nil; k++ {
+		p = lookupChild(p, encoded[k])
+	}
+	if p == nil {
+		return
+	}
+	count = p.count
+	return
+}
+
+func LookupFrequency(kc *KmerCounter, kmer string) (float64, error) {
+	count, err := LookupCount(kc, kmer)
+	if err != nil {
+		return 0, err
+	}
+	return float64(count) / float64(kc.total-uint64(len(kmer)-1)), nil
+}
diff --git a/alphabet/kmer_counter_test.go b/alphabet/kmer_counter_test.go
new file mode 100644
index 00000000..a3788196
--- /dev/null
+++ b/alphabet/kmer_counter_test.go
@@ -0,0 +1,67 @@
+package alphabet
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/TimothyStiles/poly/transform/variants"
+)
+
+func Assertf(t *testing.T, cond bool, format string, args ...any) {
+	if !cond {
+		t.Errorf(format, args...)
+	}
+}
+
+func TestKmerCounterEmpty(t *testing.T) {
+	kc := NewKmerCounter(DNA, 3)
+
+	count, err := LookupCount(kc, "A")
+	assert.EqualValues(t, 0, count)
+	assert.NoError(t, err)
+	count, err = LookupCount(kc, "X")
+	assert.Error(t, err)
+}
+
+func TestKmerCounterRepeated(t *testing.T) {
+	kc := NewKmerCounter(DNA, 3)
+	seq := strings.Repeat("A", 12)
+	assert.Equal(t, 12, len(seq))
+	err := Observe(kc, seq)
+	assert.NoError(t, err)
+
+	assert.Equal(t, 12, int(kc.total))
+
+	onemers, _ := variants.AllVariantsIUPAC("N")
+	for _, kmer := range onemers {
+		count, err := LookupCount(kc, kmer)
+		assert.NoError(t, err)
+		if kmer == "A" {
+			assert.EqualValues(t, 12, count, "Wrong count")
+		} else {
+			assert.EqualValues(t, 0, count, "Reports nonzero count for 1mer not present in sequence %v", kmer)
+		}
+	}
+	twomers, _ := variants.AllVariantsIUPAC("NN")
+	for _, kmer := range twomers {
+		count, err := LookupCount(kc, kmer)
+		assert.NoError(t, err)
+		if kmer == "AA" {
+			assert.EqualValues(t, 11, count, "Wrong count")
+		} else {
+			assert.EqualValues(t, 0, count, "Reports nonzero count for 2mer not present in sequence %v", kmer)
+		}
+	}
+	threemers, _ := variants.AllVariantsIUPAC("NNN")
+	for _, kmer := range threemers {
+		count, err := LookupCount(kc, kmer)
+		assert.NoError(t, err)
+		if kmer == "AAA" {
+			assert.EqualValues(t, 10, count, "Wrong count")
+		} else {
+			assert.EqualValues(t, 0, count, "Reports nonzero count for 3mer not present in sequence %v", kmer)
+		}
+	}
+}
diff --git a/io/genbank/genbank.go b/io/genbank/genbank.go
index 34062b36..0f353750 100644
--- a/io/genbank/genbank.go
+++ b/io/genbank/genbank.go
@@ -811,8 +811,8 @@ func getSourceOrganism(metadataData []string) (string, string, []string) {
 func parseLocation(locationString string) (Location, error) {
 	var location Location
 	location.GbkLocationString = locationString
-	if !(strings.ContainsAny(locationString, "(")) { // Case checks for simple expression of x..x
-		if !(strings.ContainsAny(locationString, ".")) { //Case checks for simple expression x
+	if !strings.ContainsAny(locationString, "(") { // Case checks for simple expression of x..x
+		if !strings.ContainsAny(locationString, ".") { //Case checks for simple expression x
 			position, err := strconv.Atoi(locationString)
 			if err != nil {
 				return Location{}, err
@@ -841,26 +841,34 @@ func parseLocation(locationString string) (Location, error) {
 			if strings.ContainsAny(expression, "(") {
 				firstInnerParentheses := strings.Index(expression, "(")
 				ParenthesesCount := 1
-				comma := 0
-				for i := 1; ParenthesesCount > 0; i++ { // "(" is at 0, so we start at 1
-					comma = i
-					switch expression[firstInnerParentheses+i] {
-					case []byte("(")[0]:
+				prevSubLocationStart := 0
+				for i := firstInnerParentheses+1; i < len(expression); i++ { // "(" is at 0, so we start at 1
+					switch expression[i] {
+					case '(':
 						ParenthesesCount++
-					case []byte(")")[0]:
+					case ')':
 						ParenthesesCount--
+					case ',':
+						if ParenthesesCount == 0 {
+							parsedSubLocation, err := parseLocation(expression[prevSubLocationStart:i])
+							if err != nil {
+								return Location{}, err
+							}
+							parsedSubLocation.GbkLocationString = locationString
+							location.SubLocations = append(location.SubLocations, parsedSubLocation)
+							prevSubLocationStart = i+1
+						}
 					}
 				}
-				parseLeftLocation, err := parseLocation(expression[:firstInnerParentheses+comma+1])
-				if err != nil {
-					return Location{}, err
+				if ParenthesesCount != 0 {
+					return Location{}, fmt.Errorf("Unbalanced parentheses")
 				}
-				parseRightLocation, err := parseLocation(expression[2+firstInnerParentheses+comma:])
+				parsedSubLocation, err := parseLocation(expression[prevSubLocationStart:])
 				if err != nil {
 					return Location{}, err
 				}
-
-				location.SubLocations = append(location.SubLocations, parseLeftLocation, parseRightLocation)
+				parsedSubLocation.GbkLocationString = locationString
+				location.SubLocations = append(location.SubLocations, parsedSubLocation)
 			} else { // This is the default join(x..x,x..x)
 				for _, numberRange := range strings.Split(expression, ",") {
 					joinLocation, err := parseLocation(numberRange)
@@ -899,6 +907,7 @@ func parseLocation(locationString string) (Location, error) {
 	return location, nil
 }
 
+
 // buildMetaString is a helper function to build the meta section of genbank files.
 func buildMetaString(name string, data string) string {
 	keyWhitespaceTrailLength := 12 - len(name) // I wish I was kidding.
diff --git a/io/genbank/genbank_test.go b/io/genbank/genbank_test.go
index 17090386..c4e0fb77 100644
--- a/io/genbank/genbank_test.go
+++ b/io/genbank/genbank_test.go
@@ -2,6 +2,7 @@ package genbank
 
 import (
 	"errors"
+	"fmt"
 	"io"
 	"os"
 	"path/filepath"
@@ -115,12 +116,11 @@ func TestGbkLocationStringBuilder(t *testing.T) {
 
 func TestGbLocationStringBuilder(t *testing.T) {
 	tmpDataDir, err := os.MkdirTemp("", "data-*")
-	if err != nil {
-		t.Error(err)
-	}
+	assert.NoError(t, err)
 	defer os.RemoveAll(tmpDataDir)
 
-	scrubbedGb, _ := Read("../../data/t4_intron.gb")
+	scrubbedGb, err := Read("../../data/t4_intron.gb")
+	assert.NoError(t, err)
 
 	// removing gbkLocationString from features to allow testing for gbkLocationBuilder
 	for featureIndex := range scrubbedGb.Features {
@@ -128,10 +128,13 @@ func TestGbLocationStringBuilder(t *testing.T) {
 	}
 
 	tmpGbFilePath := filepath.Join(tmpDataDir, "t4_intron_test.gb")
-	_ = Write(scrubbedGb, tmpGbFilePath)
+	err = Write(scrubbedGb, tmpGbFilePath)
+	assert.NoError(t, err)
 
-	testInputGb, _ := Read("../../data/t4_intron.gb")
-	testOutputGb, _ := Read(tmpGbFilePath)
+	testInputGb, err := Read("../../data/t4_intron.gb")
+	assert.NoError(t, err)
+	testOutputGb, err := Read(tmpGbFilePath)
+	assert.NoError(t, err)
 
 	if diff := cmp.Diff(testInputGb, testOutputGb, []cmp.Option{cmpopts.IgnoreFields(Feature{}, "ParentSequence")}...); diff != "" {
 		t.Errorf("Issue with either Join or complement location building. Parsing the output of Build() does not produce the same output as parsing the original file read with Read(). Got this diff:\n%s", diff)
@@ -160,6 +163,15 @@ func TestPartialLocationParseRegression(t *testing.T) {
 	}
 }
 
+func TestSubLocationStringParseRegression(t *testing.T) {
+	location := "join(complement(5306942..5307394),complement(5304401..5305029),complement(5303328..5303393),complement(5301928..5302004))"
+	parsedLocation, err := parseLocation(location)
+	if err != nil {
+		t.Errorf("Failed to parse location string. Got err: %s", err)
+	}
+	fmt.Println(parsedLocation)
+}
+
 func TestSnapgeneGenbankRegression(t *testing.T) {
 	snapgene, err := Read("../../data/puc19_snapgene.gb")
 
diff --git a/random/random.go b/random/random.go
index 7b933a9b..fa929974 100644
--- a/random/random.go
+++ b/random/random.go
@@ -6,8 +6,13 @@ package random
 import (
 	"errors"
 	"math/rand"
+
+	"github.com/TimothyStiles/poly/transform/variants"
 )
 
+var iupacToBases = variants.IUPAC2Bases()
+var iupacToAAs = variants.IUPAC2AAs()
+
 // ProteinSequence returns a random protein sequence string of a given length and seed.
 // All returned sequences start M (Methionine) and end with * (stop codon).
 func ProteinSequence(length int, seed int64) (string, error) {
@@ -39,11 +44,44 @@ func ProteinSequence(length int, seed int64) (string, error) {
 	return string(randomSequence), nil
 }
 
+func RandomRune(runes []rune) rune {
+	randomIndex := rand.Intn(len(runes))
+	return runes[randomIndex]
+}
+
+func ProteinSequenceFromPattern(pattern string, seed int64) string {
+	randomSequence := make([]rune, len(pattern))
+	rand.Seed(seed)
+
+	for res, peptide := range pattern {
+		if options, found := iupacToAAs[peptide]; found {
+			randomSequence[res] = RandomRune(options)
+		} else {
+			randomSequence[res] = peptide
+		}
+	}
+	return string(randomSequence)
+}
+
 // DNASequence returns a random DNA sequence string of a given length and seed.
 func DNASequence(length int, seed int64) (string, error) {
 	return randomNucelotideSequence(length, seed, []rune("ACTG")), nil
 }
 
+func DNASequenceFromPattern(pattern string, seed int64) string {
+	randomSequence := make([]rune, len(pattern))
+	rand.Seed(seed)
+
+	for i, base := range pattern {
+		if options, found := iupacToBases[base]; found {
+			randomSequence[i] = RandomRune(options)
+		} else {
+			randomSequence[i] = base
+		}
+	}
+	return string(randomSequence)
+}
+
 // RNASequence returns a random DNA sequence string of a given length and seed.
 func RNASequence(length int, seed int64) (string, error) {
 	return randomNucelotideSequence(length, seed, []rune("ACUG")), nil
diff --git a/synthesis/fix/patterns.go b/synthesis/fix/patterns.go
new file mode 100644
index 00000000..b13494d2
--- /dev/null
+++ b/synthesis/fix/patterns.go
@@ -0,0 +1,72 @@
+package fix
+
+import (
+	"fmt"
+	"log"
+	"regexp"
+	"strings"
+
+	"github.com/TimothyStiles/poly/transform/variants"
+)
+
+var translator = buildPatternTranslator(variants.IUPAC2Bases())
+
+func buildPatternTranslator(ambiguityCodes map[rune][]rune) map[rune]string {
+	trans := make(map[rune]string)
+	for r, options := range ambiguityCodes {
+		if len(options) > 1 {
+			trans[r] = "[" + string(options) + "]"
+		}
+	}
+	trans['('] = "(?:"
+
+	return trans
+}
+
+// TODO support (n/m) notation for point of cleavage for non-palindromic enzymes
+func patternToRegexp(buf *strings.Builder, pattern string, translator map[rune]string) {
+	buf.WriteRune('(')
+	for _, r := range pattern {
+		if match, found := translator[r]; found {
+			buf.WriteString(match)
+		} else {
+			buf.WriteRune(r)
+		}
+	}
+	buf.WriteRune(')')
+}
+
+func PatternsToRegexp(patterns []string, doubleStranded bool) (*regexp.Regexp, error) {
+	var buf strings.Builder
+	if len(patterns) == 0 {
+		return nil, nil
+	}
+
+	// expand to reduce reallocations
+	estSize := -1
+	for _, pat := range patterns {
+		estSize += len(pat) + 3
+	}
+	buf.Grow(estSize)
+
+	for i, pat := range patterns {
+		if i != 0 {
+			buf.WriteRune('|')
+		}
+		patternToRegexp(&buf, pat, translator)
+		if doubleStranded {
+			reverseComplementPat := transform.ReverseComplement(pat)
+			if reverseComplementPat != pat {
+				buf.WriteRune('|')
+				patternToRegexp(&buf, reverseComplementPat, translator)
+			}
+		}
+	}
+	// TODO: only print during testing
+	log.Println(buf.String())
+	return regexp.Compile(buf.String())
+}
+
+func HomopolymericRuns(length int) string {
+	return fmt.Sprintf("N{%d}", length)
+}
diff --git a/synthesis/fix/patterns_test.go b/synthesis/fix/patterns_test.go
new file mode 100644
index 00000000..3d0e3e82
--- /dev/null
+++ b/synthesis/fix/patterns_test.go
@@ -0,0 +1,66 @@
+package fix
+
+import (
+	"testing"
+
+	"github.com/TimothyStiles/poly/random"
+	"github.com/TimothyStiles/poly/transform/variants"
+)
+
+func Assertf(t *testing.T, cond bool, format string, args ...any) {
+	if !cond {
+		t.Errorf(format, args...)
+	}
+}
+
+func TestOverlapping(t *testing.T) {
+	re, err := patternsToRegexp([]string{"AA"})
+	Assertf(t, err == nil, "Encountered error building regexp")
+	matches := re.FindAllStringSubmatchIndex("AAAA", -1)
+	Assertf(t, len(matches) != 3, "Expected 3 matches")
+}
+
+func TestAmbiguous(t *testing.T) {
+	re, err := patternsToRegexp([]string{"N"})
+	Assertf(t, err == nil, "Encountered error building regexp")
+	matches := re.FindAllStringSubmatchIndex("AGCT", -1)
+	Assertf(t, len(matches) == 4, "Expected 4 matches, got %d", len(matches))
+}
+
+func TestMultiple(t *testing.T) {
+	re, err := patternsToRegexp([]string{"A", "C"})
+	Assertf(t, err == nil, "Encountered error building regexp")
+	matches := re.FindAllStringSubmatchIndex("AGCT", -1)
+	Assertf(t, len(matches) == 2, "Expected 2 matches, got %d", len(matches))
+}
+
+func keys[K comparable, V any](m map[K]V) []K {
+	array := make([]K, len(m))
+	var i = 0
+	for k := range m {
+		array[i] = k
+		i += 1
+	}
+	return array
+}
+
+func TestAcceptsRandomGen(t *testing.T) {
+	allBases := string(keys[rune, []rune](variants.IUPAC2Bases()))
+	re, err := patternsToRegexp([]string{allBases})
+	Assertf(t, err == nil, "Encountered error building regexp")
+	sample := random.DNASequenceFromPattern(allBases, 0)
+	Assertf(t, len(re.FindAllStringSubmatchIndex(sample, -1)) == 1, "Expected sample DNA sequence %s to match pattern %s", sample, allBases)
+}
+
+func TestCheckHomopolymericRuns(t *testing.T) {
+	testSeq := "GTAAAACGACGGCCAGT" // M13 fwd
+	if run := checkHomopolymericRuns(testSeq); run == false {
+		t.Errorf("checkHomopolymericRuns has changed on test. Got false instead of true")
+	}
+
+	testSeq = "ACGATGGCAGTAGCATGC" //"GTAAAACGACGGCCAGT" // M13 fwd
+
+	if run := checkHomopolymericRuns(testSeq); run == true {
+		t.Errorf("checkHomopolymericRuns has changed on test. Got true instead of false")
+	}
+}
diff --git a/synthesis/fix/synthesis.go b/synthesis/fix/synthesis.go
index 4aa50767..0799b7eb 100644
--- a/synthesis/fix/synthesis.go
+++ b/synthesis/fix/synthesis.go
@@ -54,9 +54,11 @@ type Change struct {
 	Reason   string `db:"reason"`
 }
 
+type ProblematicSequenceFunc func(string, chan DnaSuggestion, *sync.WaitGroup)
+
 // RemoveSequence is a generator for a problematicSequenceFuncs for specific
 // sequences.
-func RemoveSequence(sequencesToRemove []string, reason string) func(string, chan DnaSuggestion, *sync.WaitGroup) {
+func RemoveSequence(sequencesToRemove []string, reason string) ProblematicSequenceFunc {
 	return func(sequence string, c chan DnaSuggestion, waitgroup *sync.WaitGroup) {
 		var sequencesToRemoveForReverse []string
 		for _, seq := range sequencesToRemove {
@@ -82,8 +84,27 @@ func RemoveSequence(sequencesToRemove []string, reason string) func(string, chan
 	}
 }
 
+// TODO: support custom reasons per pattern
+// TODO: benchmark against RemoveSequence
+func RemovePatterns(patternsToRemove []string, reason string) (ProblematicSequenceFunc, err) {
+	re, err := PatternsToRegex(patternsToRemove)
+
+	return func(sequence string, c chan DnaSuggestion, waitgroup *sync.WaitGroup) {
+		for _, site := range sequencesToRemoveForReverse {
+			locations := re.FindAllStringSubmatchIndex(sequence, -1)
+			for _, location := range locations {
+				codonLength := 3
+				position := location[0] / codonLength
+				c <- DnaSuggestion{position, (location[1] / codonLength) - 1, "NA", 1, reason}
+			}
+		}
+		waitgroup.Done()
+	}
+}
+
 // RemoveRepeat is a generator to make a problematicSequenceFunc for repeats.
-func RemoveRepeat(repeatLen int) func(string, chan DnaSuggestion, *sync.WaitGroup) {
+// TODO: replace with more efficient kmer counter
+func RemoveRepeat(repeatLen int) ProblematicSequenceFunc {
 	return func(sequence string, c chan DnaSuggestion, waitgroup *sync.WaitGroup) {
 		// Get a kmer list
 		kmers := make(map[string]bool)
@@ -114,7 +135,7 @@ func RemoveRepeat(repeatLen int) func(string, chan DnaSuggestion, *sync.WaitGrou
 // base pairs in comparison to adenine and thymine base pairs. Usually, you
 // want the range to be somewhere around 50%, with a decent upperBound being
 // 80% GC and a decent lowerBound being 20%.
-func GcContentFixer(upperBound, lowerBound float64) func(string, chan DnaSuggestion, *sync.WaitGroup) {
+func GcContentFixer(upperBound, lowerBound float64) ProblematicSequenceFunc {
 	return func(sequence string, c chan DnaSuggestion, waitgroup *sync.WaitGroup) {
 		gcContent := checks.GcContent(sequence)
 		var numberOfChanges int
@@ -149,7 +170,7 @@ func getSuggestions(suggestions chan DnaSuggestion, suggestionOutputs chan []Dna
 
 // findProblems is a helper function in FixCDS that concurrently runs each
 // sequence check and returns a list of all the suggested changes.
-func findProblems(sequence string, problematicSequenceFuncs []func(string, chan DnaSuggestion, *sync.WaitGroup)) []DnaSuggestion {
+func findProblems(sequence string, problematicSequenceFuncs []ProblematicSequenceFunc) []DnaSuggestion {
 	// Run functions to get suggestions
 	suggestions := make(chan DnaSuggestion)
 	suggestionOutputs := make(chan []DnaSuggestion)
@@ -216,7 +237,7 @@ changes that were done to the sequence.
 // Cds fixes a CDS given the CDS sequence, a codon table, a list of
 // functions to solve for, and a number of iterations to attempt fixing.
 // Unless you are an advanced user, you should use CdsSimple.
-func Cds(sequence string, codontable codon.Table, problematicSequenceFuncs []func(string, chan DnaSuggestion, *sync.WaitGroup)) (string, []Change, error) {
+func Cds(sequence string, codontable codon.Table, problematicSequenceFuncs []ProblematicSequenceFunc) (string, []Change, error) {
 	codonLength := 3
 	if len(sequence)%codonLength != 0 {
 		return "", []Change{}, errors.New("this sequence isn't a complete CDS, please try to use a CDS without interrupted codons")
diff --git a/transform/variants/variants.go b/transform/variants/variants.go
index 8ea276a4..254cddec 100644
--- a/transform/variants/variants.go
+++ b/transform/variants/variants.go
@@ -13,32 +13,49 @@ import (
 	"strings"
 )
 
+var iupacToBases = map[rune][]rune{ // rune map of all iupac nucleotide variants
+	'G': {'G'},
+	'A': {'A'},
+	'T': {'T'},
+	'C': {'C'},
+	'R': {'G', 'A'},
+	'Y': {'T', 'C'},
+	'M': {'A', 'C'},
+	'K': {'G', 'T'},
+	'S': {'G', 'C'},
+	'W': {'A', 'T'},
+	'H': {'A', 'C', 'T'},
+	'B': {'G', 'T', 'C'},
+	'V': {'G', 'C', 'A'},
+	'D': {'G', 'A', 'T'},
+	'N': {'G', 'A', 'T', 'C'},
+}
+
+var iupacToAAs = map[rune][]rune{
+	'X': []rune("ACDEFGHIJLMNPQRSTVWY"), // Unknown
+	'B': {'D', 'N'},
+	'Z': {'E', 'Q'},
+	'J': {'I', 'L'},
+	'+': {'K', 'R', 'H'}, // Positively charged
+	'-': {'D', 'E'},      // Negatively charged
+}
+
+func IUPAC2Bases() map[rune][]rune {
+	return iupacToBases
+}
+
+func IUPAC2AAs() map[rune][]rune {
+	return iupacToAAs
+}
+
 // AllVariantsIUPAC takes a string as input
 // and returns all iupac variants as output
 func AllVariantsIUPAC(seq string) ([]string, error) {
 	seqVariantList := [][]rune{}
 	seqVariants := []string{}
 
-	iupac := map[rune][]rune{ // rune map of all iupac nucleotide variants
-		'G': {'G'},
-		'A': {'A'},
-		'T': {'T'},
-		'C': {'C'},
-		'R': {'G', 'A'},
-		'Y': {'T', 'C'},
-		'M': {'A', 'C'},
-		'K': {'G', 'T'},
-		'S': {'G', 'C'},
-		'W': {'A', 'T'},
-		'H': {'A', 'C', 'T'},
-		'B': {'G', 'T', 'C'},
-		'V': {'G', 'C', 'A'},
-		'D': {'G', 'A', 'T'},
-		'N': {'G', 'A', 'T', 'C'},
-	}
-
 	for _, s := range strings.ToUpper(seq) {
-		variantsIUPAC, ok := iupac[s]
+		variantsIUPAC, ok := iupacToBases[s]
 		if ok {
 			seqVariantList = append(seqVariantList, variantsIUPAC)
 		} else {