1//  Copyright (c) 2013 Couchbase, Inc.
2
3package collatejson
4
5import "bytes"
6import "errors"
7import "strconv"
8
9// error codes
10var ErrorSuffixDecoding = errors.New("collatejson.suffixDecoding")
11
12// Constants used in text representation of basic data types.
13const (
14	PLUS  = 43
15	MINUS = 45
16	LT    = 60
17	GT    = 62
18	DOT   = 46
19	ZERO  = 48
20)
21
22// Constants used to represent positive and negative numbers while encoding
23// them.
24const (
25	negPrefix = byte(MINUS)
26	posPrefix = byte(GT)
27)
28
29// Negative integers, in its text representation are 10's complement. This map
30// provides the lookup table to generate the complements.
31var negIntLookup = map[byte]byte{
32	48: 57, // 0 - 9
33	49: 56, // 1 - 8
34	50: 55, // 2 - 7
35	51: 54, // 3 - 6
36	52: 53, // 4 - 5
37	53: 52, // 5 - 4
38	54: 51, // 6 - 3
39	55: 50, // 7 - 2
40	56: 49, // 8 - 1
41	57: 48, // 9 - 0
42}
43
44// A simple lookup table to flip prefixes.
45var prefixOpp = map[byte]byte{
46	posPrefix: negPrefix,
47	negPrefix: posPrefix,
48}
49
50// EncodeInt encodes integer such that their natural order is preserved as a
51// lexicographic order of their representation. Additionally it must be
52// possible to get back the natural representation from its lexical
53// representation.
54//
55// Input `text` is also in textual representation, that is, strconv.Atoi(text)
56// is the actual integer that is encoded.
57//
58// Zero is encoded as '0'
59func EncodeInt(text, code []byte) []byte { // text -> code
60	if len(text) == 0 { // empty input
61		return code
62	}
63	if code, ok := isZero(text, code); ok {
64		return code
65	}
66
67	switch text[0] {
68	case PLUS: // positive int
69		code = encodePosInt(text[1:], code)
70	case MINUS: // negative int
71		code = encodeNegInt(text[1:], code)
72	default:
73		code = encodePosInt(text, code)
74	}
75	return code
76}
77
78// encode positive integer, local function gets called by EncodeInt
79//  encoding 7,          >7
80//  encoding 123,        >>3123
81//  encoding 1234567890, >>>2101234567890
82func encodePosInt(text []byte, code []byte) []byte {
83	code = append(code, posPrefix)
84	if len(text) > 1 {
85		// TODO: replace Itoa with more efficient function.
86		c := encodePosInt([]byte(strconv.Itoa(len(text))), code[1:])
87		code = code[:1+len(c)]
88	}
89	code = append(code, text...)
90	return code
91}
92
93// encode positive integer, local function gets called by EncodeInt
94//  encoding -1,         -8
95//  encoding -2,         -7
96//  encoding -9,         -0
97//  encoding -10,        --789
98//  encoding -11,        --788
99//  encoding -1234567891 ---7898765432108
100//  encoding -1234567890 ---7898765432109
101//  encoding -1234567889 ---7898765432110
102func encodeNegInt(text []byte, code []byte) []byte {
103	code = append(code, negPrefix)
104	if len(text) > 1 {
105		// TODO: replace Itoa with more efficient function.
106		c := encodeNegInt([]byte(strconv.Itoa(len(text))), code[1:])
107		code = code[:1+len(c)]
108	}
109	for _, x := range text {
110		code = append(code, negIntLookup[x])
111	}
112	return code
113}
114
115// Check whether input is a representation of zero, like,
116//    0 +0 -0
117func isZero(text, code []byte) ([]byte, bool) {
118	// Handle different forms of zero.
119	if (len(text) == 1) && (text[0] == ZERO) {
120		code = append(code, ZERO)
121		return code, true
122
123	} else if (len(text) == 2) && ((text[0] == PLUS) || (text[0] == MINUS)) &&
124		(text[1] == ZERO) {
125
126		code = append(code, ZERO)
127		return code, true
128	}
129	return code, false
130}
131
132// DecodeInt complements EncodeInt, it returns integer in text that can be
133// converted to integer value using strconv.AtoI(return_value)
134func DecodeInt(code, text []byte) (int, []byte) { // code -> text
135	if len(code) == 0 { // empty input
136		return 0, text
137	}
138	if code[0] == ZERO {
139		text = append(text, ZERO)
140		return 1, text
141	}
142
143	var skip int
144	var final []byte
145
146	switch code[0] {
147	case posPrefix:
148		text = append(text, PLUS)
149		skip, final = doDecodeInt(code[1:], text[1:])
150		text = append(text, final...)
151
152	case negPrefix:
153		text = append(text, MINUS)
154		skip, final = doDecodeInt(code[1:], text[1:])
155		for _, x := range final {
156			text = append(text, negIntLookup[x])
157		}
158	}
159	return skip + len(text), text
160}
161
162// local function called by DecodeInt
163func doDecodeInt(code, text []byte) (int, []byte) {
164	var skip int
165
166	if code[0] == posPrefix {
167		skip, text = doDecodeInt(code[1:], text)
168		s := skip + len(text) + 1
169		l, _ := strconv.Atoi(string(text))
170		text = append(text[:0], code[s:s+l]...)
171		return s, text
172
173	} else if code[0] == negPrefix {
174		skip, text = doDecodeInt(code[1:], text)
175		for i, x := range text {
176			text[i] = negIntLookup[x]
177		}
178		s := skip + len(text) + 1
179		l, _ := strconv.Atoi(string(text))
180		text = append(text[:0], code[s:s+l]...)
181		return s, text
182
183	} else {
184		text = append(text, code[0])
185		return 0, text
186	}
187}
188
189// EncodeFloat encodes floating point number such that their natural order is
190// preserved as lexicographic order of their representation. Additionally it
191// must be possible to get back the natural representation from its lexical
192// representation.
193//
194// A floating point number f takes a mantissa m ∈ [1/10 , 1) and an integer
195// exponent e such that f = (10^e) * ±m.
196//
197//  encoding −0.1 × 10^11    - --7888+
198//  encoding −0.1 × 10^10    - --7898+
199//  encoding -1.4            - -885+
200//  encoding -1.3            - -886+
201//  encoding -1              - -88+
202//  encoding -0.123          - 0876+
203//  encoding -0.0123         - +1876+
204//  encoding -0.001233       - +28766+
205//  encoding -0.00123        - +2876+
206//  encoding 0               0
207//  encoding +0.00123        + -7123-
208//  encoding +0.001233       + -71233-
209//  encoding +0.0123         + -8123-
210//  encoding +0.123          + 0123-
211//  encoding +1              + +11-
212//  encoding +1.3            + +113-
213//  encoding +1.4            + +114-
214//  encoding +0.1 × 10^10    + ++2101-
215//  encoding +0.1 × 10^11    + ++2111-
216func EncodeFloat(text, code []byte) []byte {
217	if len(text) == 0 { // empty input
218		return code
219	}
220	if val, e := strconv.ParseFloat(string(text), 64); e == nil && val == 0 {
221		code = append(code, ZERO)
222		return code
223	}
224
225	prefix, text := signPrefix(text)
226	code = append(code, prefix)
227
228	var exp, mant []byte
229	for i, x := range text {
230		if x == 'e' {
231			mant = text[:i]
232			exp = text[i+1:]
233			break
234		}
235	}
236
237	x := [128]byte{}
238	mantissa := append(x[:0], prefix, '0', '.')
239	for _, x := range mant {
240		if x == '.' {
241			continue
242		}
243		mantissa = append(mantissa, x)
244	}
245
246	expi, _ := strconv.Atoi(string(exp))
247	if prefix == negPrefix {
248		exp = []byte(strconv.Itoa(-(expi + 1)))
249	} else {
250		exp = []byte(strconv.Itoa(expi + 1))
251	}
252
253	// Encode integer.
254	code = append(code, EncodeInt(exp, code[1:])...)
255	// Encode and adjust the decimal part.
256	code = append(code, EncodeSD(mantissa, code[len(code):])[1:]...)
257	return code
258}
259
260var flipmap = map[byte]byte{PLUS: MINUS, MINUS: PLUS}
261
262// DecodeFloat complements EncodeFloat, it returns `exponent` and `mantissa`
263// in text format.
264func DecodeFloat(code, text []byte) []byte {
265	if len(code) == 0 { // empty input
266		return text
267	} else if len(code) == 1 && code[0] == ZERO {
268		text = append(text, ZERO)
269		return text
270	}
271
272	msign := code[0]
273
274	var skip int
275	var final, exponent []byte
276
277	x := [128]byte{}
278	switch code[1] {
279	case negPrefix, posPrefix:
280		skip, exponent = DecodeInt(code[1:], x[:0])
281
282	default:
283		exponent = append(x[:0], PLUS)
284		skip, final = DecodeInt(code[1:], text)
285		exponent = append(exponent, final...)
286	}
287
288	if msign == negPrefix {
289		exponent[0] = flipmap[exponent[0]]
290	}
291
292	y := [128]byte{}
293	mantissa := append(y[:0], msign)
294	mantissa = append(mantissa, code[1+skip:]...)
295	text = append(text, DecodeSD(mantissa, text)...)
296	text = append(text, 'e')
297	text = append(text, exponent...)
298	return text
299}
300
301// EncodeSD encodes small-decimal, values that are greater than -1.0 and less
302// than +1.0,such that their natural order is preserved as lexicographic order
303// of their representation. Additionally it must be possible to get back the
304// natural representation from its lexical representation.
305//
306// Small decimals is greater than -1.0 and less than 1.0
307//
308// Input `text` is also in textual representation, that is,
309// strconv.ParseFloat(text, 64) is the actual integer that is encoded.
310//
311//  encoding -0.9995    -0004>
312//  encoding -0.999     -000>
313//  encoding -0.0123    -9876>
314//  encoding -0.00123   -99876>
315//  encoding -0.0001233 -9998766>
316//  encoding -0.000123  -999876>
317//  encoding +0.000123  >000123-
318//  encoding +0.0001233 >0001233-
319//  encoding +0.00123   >00123-
320//  encoding +0.0123    >0123-
321//  encoding +0.999     >999-
322//  encoding +0.9995    >9995-
323//
324// Caveats:
325//  -0.0, 0.0 and +0.0 must be filtered out as integer ZERO `0`.
326func EncodeSD(text, code []byte) []byte {
327	if len(text) == 0 { // empty input
328		return code
329	}
330
331	var prefix byte
332	prefix, text = signPrefix(text)
333	code = append(code, prefix)
334
335	// Remove decimal point and all zeros before that.
336	for i, x := range text {
337		if x == '.' {
338			text = text[i+1:]
339			break
340		}
341	}
342	if prefix == negPrefix { // Do inversion if negative number
343		for _, x := range text {
344			code = append(code, negIntLookup[x])
345		}
346	} else { // if positive number just copy the text
347		code = append(code, text...)
348	}
349	code = append(code, prefixOpp[prefix])
350	return code
351}
352
353// DecodeSD complements EncodeSD, it returns integer in text that can be
354// converted to integer type using strconv.ParseFloat(return_value, 64).
355func DecodeSD(code, text []byte) []byte {
356	if len(code) == 0 {
357		return text
358	}
359
360	prefix, sign := code[0], prefixSign(code)
361	text = append(text, []byte{sign, ZERO, DOT}...)
362
363	// If negative number invert the digits.
364	if prefix == negPrefix {
365		for _, x := range code[1 : len(code)-1] {
366			text = append(text, negIntLookup[x])
367		}
368	} else {
369		text = append(text, code[1:len(code)-1]...)
370	}
371	return text
372}
373
374// EncodeLD encodes large-decimal, values that are greater than or equal to
375// +1.0 and less than or equal to -1.0, such that their natural order is
376// preserved as a lexicographic order of their representation. Additionally
377// it must be possible to get back the natural representation from its lexical
378// representation.
379//
380// Input `text` is also in textual representation, that is,
381// strconv.ParseFloat(text, 64) is the actual integer that is encoded.
382//
383//  encoding -100.5         --68994>
384//  encoding -10.5          --7>
385//  encoding -3.145         -3854>
386//  encoding -3.14          -385>
387//  encoding -1.01          -198>
388//  encoding -1             -1>
389//  encoding -0.0001233     -09998766>
390//  encoding -0.000123      -0999876>
391//  encoding +0.000123      >0000123-
392//  encoding +0.0001233     >00001233-
393//  encoding +1             >1-
394//  encoding +1.01          >101-
395//  encoding +3.14          >314-
396//  encoding +3.145         >3145-
397//  encoding +10.5          >>2105-
398//  encoding +100.5         >>31005-
399func EncodeLD(text, code []byte) []byte {
400	if len(text) == 0 { // empty input
401		return code
402	}
403
404	prefix, _ := signPrefix(text)
405
406	// Encode integer and decimal part.
407	texts := bytes.Split(text, []byte{DOT})
408	if len(texts[0]) == 1 && texts[0][0] == ZERO {
409		code = append(code, prefix, ZERO)
410	} else if len(texts[0]) == 2 {
411		if s := texts[0][0]; (s == PLUS || s == MINUS) && texts[0][1] == ZERO {
412			code = append(code, prefix, ZERO)
413		} else {
414			l := len(EncodeInt(texts[0], code))
415			code = code[:l]
416		}
417	} else {
418		l := len(EncodeInt(texts[0], code))
419		code = code[:l]
420	}
421
422	var codedec []byte
423	x := [128]byte{}
424	if len(texts) == 2 {
425		text = joinBytes([]byte{text[0], ZERO, DOT}, texts[1])
426		codedec = EncodeSD(text, x[:0])[1:]
427	} else {
428		// TODO: This is constant, optimize away.
429		text = []byte{text[0], ZERO, DOT, ZERO}
430		codedec = EncodeSD(text, x[:0])[1:]
431	}
432
433	// Adjust the decimal part
434	codedec[len(codedec)-1] = prefixOpp[prefix]
435	if len(codedec) == 2 && codedec[0] == ZERO {
436		codedec = codedec[1:]
437	}
438	code = append(code, codedec...)
439	return code
440}
441
442// DecodeLD complements EncodeLD, it returns integer in text that can be
443// converted to integer type using strconv.ParseFloat(return_value, 64).
444func DecodeLD(code, text []byte) []byte {
445	if len(code) == 0 { // empty input
446		return text
447	}
448
449	prefix, sign := code[0], prefixSign(code)
450	text = append(text, sign)
451	skip, textint := doDecodeInt(code[1:], text[1:])
452	l := len(textint) + 1
453	text = text[:l]
454
455	// If negative number invert the digits.
456	if sign == MINUS {
457		for i, x := range text[1:] {
458			text[i+1] = negIntLookup[x]
459		}
460	}
461
462	code = code[skip+l:]
463	textdec := DecodeSD(joinBytes([]byte{prefix}, code), text[len(text):])[2:]
464	text = append(text, textdec...)
465	return text
466}
467
468// local function that check the sign of the input number and returns the
469// encoding-prefix and remaining input. Used while encoding numbers.
470func signPrefix(text []byte) (byte, []byte) {
471	switch text[0] {
472	case PLUS:
473		return posPrefix, text[1:]
474	case MINUS:
475		return negPrefix, text[1:]
476	default:
477		return posPrefix, text
478	}
479}
480
481// local function that check the encoded sign of the coded number and returns
482// the actual sign of the number. Used while decoding numbers.
483func prefixSign(code []byte) byte {
484	var sign byte
485	switch code[0] {
486	case posPrefix:
487		sign = PLUS
488	case negPrefix:
489		sign = MINUS
490	}
491	return sign
492}
493
494// join byte slices into single byte slice.
495func joinBytes(args ...[]byte) []byte {
496	return bytes.Join(args, []byte{})
497}
498
499func suffixEncodeString(s []byte, code []byte) []byte {
500	text := []byte(s)
501	for _, x := range text {
502		code = append(code, x)
503		if x == Terminator {
504			code = append(code, 1)
505		}
506	}
507	code = append(code, Terminator)
508	return code
509}
510
511func suffixDecodeString(code []byte, text []byte) ([]byte, []byte, error) {
512	for i := 0; i < len(code); i++ {
513		x := code[i]
514		if x == Terminator {
515			i++
516			switch x = code[i]; x {
517			case 1:
518				text = append(text, 0)
519			case Terminator:
520				if i == (len(code) - 1) {
521					return text, nil, nil
522				}
523				return text, code[i+1:], nil
524			default:
525				return nil, nil, ErrorSuffixDecoding
526			}
527			continue
528		}
529		text = append(text, x)
530	}
531	return nil, nil, ErrorSuffixDecoding
532}
533