1// Copyright 2012 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package present 6 7import ( 8 "errors" 9 "regexp" 10 "strconv" 11 "unicode/utf8" 12) 13 14// This file is stolen from go/src/cmd/godoc/codewalk.go. 15// It's an evaluator for the file address syntax implemented by acme and sam, 16// but using Go-native regular expressions. 17// To keep things reasonably close, this version uses (?m:re) for all user-provided 18// regular expressions. That is the only change to the code from codewalk.go. 19// See http://plan9.bell-labs.com/sys/doc/sam/sam.html Table II 20// for details on the syntax. 21 22// addrToByte evaluates the given address starting at offset start in data. 23// It returns the lo and hi byte offset of the matched region within data. 24func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) { 25 if addr == "" { 26 lo, hi = start, len(data) 27 return 28 } 29 var ( 30 dir byte 31 prevc byte 32 charOffset bool 33 ) 34 lo = start 35 hi = start 36 for addr != "" && err == nil { 37 c := addr[0] 38 switch c { 39 default: 40 err = errors.New("invalid address syntax near " + string(c)) 41 case ',': 42 if len(addr) == 1 { 43 hi = len(data) 44 } else { 45 _, hi, err = addrToByteRange(addr[1:], hi, data) 46 } 47 return 48 49 case '+', '-': 50 if prevc == '+' || prevc == '-' { 51 lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset) 52 } 53 dir = c 54 55 case '$': 56 lo = len(data) 57 hi = len(data) 58 if len(addr) > 1 { 59 dir = '+' 60 } 61 62 case '#': 63 charOffset = true 64 65 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 66 var i int 67 for i = 1; i < len(addr); i++ { 68 if addr[i] < '0' || addr[i] > '9' { 69 break 70 } 71 } 72 var n int 73 n, err = strconv.Atoi(addr[0:i]) 74 if err != nil { 75 break 76 } 77 lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset) 78 dir = 0 79 charOffset = false 80 prevc = c 81 addr = addr[i:] 82 continue 83 84 case '/': 85 var i, j int 86 Regexp: 87 for i = 1; i < len(addr); i++ { 88 switch addr[i] { 89 case '\\': 90 i++ 91 case '/': 92 j = i + 1 93 break Regexp 94 } 95 } 96 if j == 0 { 97 j = i 98 } 99 pattern := addr[1:i] 100 lo, hi, err = addrRegexp(data, lo, hi, dir, pattern) 101 prevc = c 102 addr = addr[j:] 103 continue 104 } 105 prevc = c 106 addr = addr[1:] 107 } 108 109 if err == nil && dir != 0 { 110 lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset) 111 } 112 if err != nil { 113 return 0, 0, err 114 } 115 return lo, hi, nil 116} 117 118// addrNumber applies the given dir, n, and charOffset to the address lo, hi. 119// dir is '+' or '-', n is the count, and charOffset is true if the syntax 120// used was #n. Applying +n (or +#n) means to advance n lines 121// (or characters) after hi. Applying -n (or -#n) means to back up n lines 122// (or characters) before lo. 123// The return value is the new lo, hi. 124func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) { 125 switch dir { 126 case 0: 127 lo = 0 128 hi = 0 129 fallthrough 130 131 case '+': 132 if charOffset { 133 pos := hi 134 for ; n > 0 && pos < len(data); n-- { 135 _, size := utf8.DecodeRune(data[pos:]) 136 pos += size 137 } 138 if n == 0 { 139 return pos, pos, nil 140 } 141 break 142 } 143 // find next beginning of line 144 if hi > 0 { 145 for hi < len(data) && data[hi-1] != '\n' { 146 hi++ 147 } 148 } 149 lo = hi 150 if n == 0 { 151 return lo, hi, nil 152 } 153 for ; hi < len(data); hi++ { 154 if data[hi] != '\n' { 155 continue 156 } 157 switch n--; n { 158 case 1: 159 lo = hi + 1 160 case 0: 161 return lo, hi + 1, nil 162 } 163 } 164 165 case '-': 166 if charOffset { 167 // Scan backward for bytes that are not UTF-8 continuation bytes. 168 pos := lo 169 for ; pos > 0 && n > 0; pos-- { 170 if data[pos]&0xc0 != 0x80 { 171 n-- 172 } 173 } 174 if n == 0 { 175 return pos, pos, nil 176 } 177 break 178 } 179 // find earlier beginning of line 180 for lo > 0 && data[lo-1] != '\n' { 181 lo-- 182 } 183 hi = lo 184 if n == 0 { 185 return lo, hi, nil 186 } 187 for ; lo >= 0; lo-- { 188 if lo > 0 && data[lo-1] != '\n' { 189 continue 190 } 191 switch n--; n { 192 case 1: 193 hi = lo 194 case 0: 195 return lo, hi, nil 196 } 197 } 198 } 199 200 return 0, 0, errors.New("address out of range") 201} 202 203// addrRegexp searches for pattern in the given direction starting at lo, hi. 204// The direction dir is '+' (search forward from hi) or '-' (search backward from lo). 205// Backward searches are unimplemented. 206func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) { 207 // We want ^ and $ to work as in sam/acme, so use ?m. 208 re, err := regexp.Compile("(?m:" + pattern + ")") 209 if err != nil { 210 return 0, 0, err 211 } 212 if dir == '-' { 213 // Could implement reverse search using binary search 214 // through file, but that seems like overkill. 215 return 0, 0, errors.New("reverse search not implemented") 216 } 217 m := re.FindIndex(data[hi:]) 218 if len(m) > 0 { 219 m[0] += hi 220 m[1] += hi 221 } else if hi > 0 { 222 // No match. Wrap to beginning of data. 223 m = re.FindIndex(data) 224 } 225 if len(m) == 0 { 226 return 0, 0, errors.New("no match for " + pattern) 227 } 228 return m[0], m[1], nil 229} 230