Source file test/bench/shootout/regex-dna.go
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
29
30 31 32 33 34
35
36 package main
37
38 import (
39 "fmt"
40 "io/ioutil"
41 "os"
42 "regexp"
43 )
44
45 var variants = []string{
46 "agggtaaa|tttaccct",
47 "[cgt]gggtaaa|tttaccc[acg]",
48 "a[act]ggtaaa|tttacc[agt]t",
49 "ag[act]gtaaa|tttac[agt]ct",
50 "agg[act]taaa|ttta[agt]cct",
51 "aggg[acg]aaa|ttt[cgt]ccct",
52 "agggt[cgt]aa|tt[acg]accct",
53 "agggta[cgt]a|t[acg]taccct",
54 "agggtaa[cgt]|[acg]ttaccct",
55 }
56
57 type Subst struct {
58 pat, repl string
59 }
60
61 var substs = []Subst{
62 Subst{"B", "(c|g|t)"},
63 Subst{"D", "(a|g|t)"},
64 Subst{"H", "(a|c|t)"},
65 Subst{"K", "(g|t)"},
66 Subst{"M", "(a|c)"},
67 Subst{"N", "(a|c|g|t)"},
68 Subst{"R", "(a|g)"},
69 Subst{"S", "(c|g)"},
70 Subst{"V", "(a|c|g)"},
71 Subst{"W", "(a|t)"},
72 Subst{"Y", "(c|t)"},
73 }
74
75 func countMatches(pat string, bytes []byte) int {
76 re := regexp.MustCompile(pat)
77 n := 0
78 for {
79 e := re.FindIndex(bytes)
80 if len(e) == 0 {
81 break
82 }
83 n++
84 bytes = bytes[e[1]:]
85 }
86 return n
87 }
88
89 func main() {
90 bytes, err := ioutil.ReadAll(os.Stdin)
91 if err != nil {
92 fmt.Fprintf(os.Stderr, "can't read input: %s\n", err)
93 os.Exit(2)
94 }
95 ilen := len(bytes)
96
97 bytes = regexp.MustCompile("(>[^\n]+)?\n").ReplaceAll(bytes, []byte{})
98 clen := len(bytes)
99 for _, s := range variants {
100 fmt.Printf("%s %d\n", s, countMatches(s, bytes))
101 }
102 for _, sub := range substs {
103 bytes = regexp.MustCompile(sub.pat).ReplaceAll(bytes, []byte(sub.repl))
104 }
105 fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, len(bytes))
106 }
View as plain text