1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package present
6
7import (
8	"errors"
9	"regexp"
10	"strconv"
11	"unicode/utf8"
12)
13
14// This file is stolen from go/src/cmd/godoc/codewalk.go.
15// It's an evaluator for the file address syntax implemented by acme and sam,
16// but using Go-native regular expressions.
17// To keep things reasonably close, this version uses (?m:re) for all user-provided
18// regular expressions. That is the only change to the code from codewalk.go.
19// See http://plan9.bell-labs.com/sys/doc/sam/sam.html Table II
20// for details on the syntax.
21
22// addrToByte evaluates the given address starting at offset start in data.
23// It returns the lo and hi byte offset of the matched region within data.
24func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) {
25	if addr == "" {
26		lo, hi = start, len(data)
27		return
28	}
29	var (
30		dir        byte
31		prevc      byte
32		charOffset bool
33	)
34	lo = start
35	hi = start
36	for addr != "" && err == nil {
37		c := addr[0]
38		switch c {
39		default:
40			err = errors.New("invalid address syntax near " + string(c))
41		case ',':
42			if len(addr) == 1 {
43				hi = len(data)
44			} else {
45				_, hi, err = addrToByteRange(addr[1:], hi, data)
46			}
47			return
48
49		case '+', '-':
50			if prevc == '+' || prevc == '-' {
51				lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset)
52			}
53			dir = c
54
55		case '$':
56			lo = len(data)
57			hi = len(data)
58			if len(addr) > 1 {
59				dir = '+'
60			}
61
62		case '#':
63			charOffset = true
64
65		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
66			var i int
67			for i = 1; i < len(addr); i++ {
68				if addr[i] < '0' || addr[i] > '9' {
69					break
70				}
71			}
72			var n int
73			n, err = strconv.Atoi(addr[0:i])
74			if err != nil {
75				break
76			}
77			lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset)
78			dir = 0
79			charOffset = false
80			prevc = c
81			addr = addr[i:]
82			continue
83
84		case '/':
85			var i, j int
86		Regexp:
87			for i = 1; i < len(addr); i++ {
88				switch addr[i] {
89				case '\\':
90					i++
91				case '/':
92					j = i + 1
93					break Regexp
94				}
95			}
96			if j == 0 {
97				j = i
98			}
99			pattern := addr[1:i]
100			lo, hi, err = addrRegexp(data, lo, hi, dir, pattern)
101			prevc = c
102			addr = addr[j:]
103			continue
104		}
105		prevc = c
106		addr = addr[1:]
107	}
108
109	if err == nil && dir != 0 {
110		lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset)
111	}
112	if err != nil {
113		return 0, 0, err
114	}
115	return lo, hi, nil
116}
117
118// addrNumber applies the given dir, n, and charOffset to the address lo, hi.
119// dir is '+' or '-', n is the count, and charOffset is true if the syntax
120// used was #n.  Applying +n (or +#n) means to advance n lines
121// (or characters) after hi.  Applying -n (or -#n) means to back up n lines
122// (or characters) before lo.
123// The return value is the new lo, hi.
124func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) {
125	switch dir {
126	case 0:
127		lo = 0
128		hi = 0
129		fallthrough
130
131	case '+':
132		if charOffset {
133			pos := hi
134			for ; n > 0 && pos < len(data); n-- {
135				_, size := utf8.DecodeRune(data[pos:])
136				pos += size
137			}
138			if n == 0 {
139				return pos, pos, nil
140			}
141			break
142		}
143		// find next beginning of line
144		if hi > 0 {
145			for hi < len(data) && data[hi-1] != '\n' {
146				hi++
147			}
148		}
149		lo = hi
150		if n == 0 {
151			return lo, hi, nil
152		}
153		for ; hi < len(data); hi++ {
154			if data[hi] != '\n' {
155				continue
156			}
157			switch n--; n {
158			case 1:
159				lo = hi + 1
160			case 0:
161				return lo, hi + 1, nil
162			}
163		}
164
165	case '-':
166		if charOffset {
167			// Scan backward for bytes that are not UTF-8 continuation bytes.
168			pos := lo
169			for ; pos > 0 && n > 0; pos-- {
170				if data[pos]&0xc0 != 0x80 {
171					n--
172				}
173			}
174			if n == 0 {
175				return pos, pos, nil
176			}
177			break
178		}
179		// find earlier beginning of line
180		for lo > 0 && data[lo-1] != '\n' {
181			lo--
182		}
183		hi = lo
184		if n == 0 {
185			return lo, hi, nil
186		}
187		for ; lo >= 0; lo-- {
188			if lo > 0 && data[lo-1] != '\n' {
189				continue
190			}
191			switch n--; n {
192			case 1:
193				hi = lo
194			case 0:
195				return lo, hi, nil
196			}
197		}
198	}
199
200	return 0, 0, errors.New("address out of range")
201}
202
203// addrRegexp searches for pattern in the given direction starting at lo, hi.
204// The direction dir is '+' (search forward from hi) or '-' (search backward from lo).
205// Backward searches are unimplemented.
206func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) {
207	// We want ^ and $ to work as in sam/acme, so use ?m.
208	re, err := regexp.Compile("(?m:" + pattern + ")")
209	if err != nil {
210		return 0, 0, err
211	}
212	if dir == '-' {
213		// Could implement reverse search using binary search
214		// through file, but that seems like overkill.
215		return 0, 0, errors.New("reverse search not implemented")
216	}
217	m := re.FindIndex(data[hi:])
218	if len(m) > 0 {
219		m[0] += hi
220		m[1] += hi
221	} else if hi > 0 {
222		// No match.  Wrap to beginning of data.
223		m = re.FindIndex(data)
224	}
225	if len(m) == 0 {
226		return 0, 0, errors.New("no match for " + pattern)
227	}
228	return m[0], m[1], nil
229}
230