1// Copyright (c) 2013 Couchbase, Inc. 2 3package collatejson 4 5import "bytes" 6import "errors" 7import "strconv" 8 9// error codes 10var ErrorSuffixDecoding = errors.New("collatejson.suffixDecoding") 11 12// Constants used in text representation of basic data types. 13const ( 14 PLUS = 43 15 MINUS = 45 16 LT = 60 17 GT = 62 18 DOT = 46 19 ZERO = 48 20) 21 22// Constants used to represent positive and negative numbers while encoding 23// them. 24const ( 25 negPrefix = byte(MINUS) 26 posPrefix = byte(GT) 27) 28 29// Negative integers, in its text representation are 10's complement. This map 30// provides the lookup table to generate the complements. 31var negIntLookup = map[byte]byte{ 32 48: 57, // 0 - 9 33 49: 56, // 1 - 8 34 50: 55, // 2 - 7 35 51: 54, // 3 - 6 36 52: 53, // 4 - 5 37 53: 52, // 5 - 4 38 54: 51, // 6 - 3 39 55: 50, // 7 - 2 40 56: 49, // 8 - 1 41 57: 48, // 9 - 0 42} 43 44// A simple lookup table to flip prefixes. 45var prefixOpp = map[byte]byte{ 46 posPrefix: negPrefix, 47 negPrefix: posPrefix, 48} 49 50// EncodeInt encodes integer such that their natural order is preserved as a 51// lexicographic order of their representation. Additionally it must be 52// possible to get back the natural representation from its lexical 53// representation. 54// 55// Input `text` is also in textual representation, that is, strconv.Atoi(text) 56// is the actual integer that is encoded. 57// 58// Zero is encoded as '0' 59func EncodeInt(text, code []byte) []byte { // text -> code 60 if len(text) == 0 { // empty input 61 return code 62 } 63 if code, ok := isZero(text, code); ok { 64 return code 65 } 66 67 switch text[0] { 68 case PLUS: // positive int 69 code = encodePosInt(text[1:], code) 70 case MINUS: // negative int 71 code = encodeNegInt(text[1:], code) 72 default: 73 code = encodePosInt(text, code) 74 } 75 return code 76} 77 78// encode positive integer, local function gets called by EncodeInt 79// encoding 7, >7 80// encoding 123, >>3123 81// encoding 1234567890, >>>2101234567890 82func encodePosInt(text []byte, code []byte) []byte { 83 code = append(code, posPrefix) 84 if len(text) > 1 { 85 // TODO: replace Itoa with more efficient function. 86 c := encodePosInt([]byte(strconv.Itoa(len(text))), code[1:]) 87 code = code[:1+len(c)] 88 } 89 code = append(code, text...) 90 return code 91} 92 93// encode positive integer, local function gets called by EncodeInt 94// encoding -1, -8 95// encoding -2, -7 96// encoding -9, -0 97// encoding -10, --789 98// encoding -11, --788 99// encoding -1234567891 ---7898765432108 100// encoding -1234567890 ---7898765432109 101// encoding -1234567889 ---7898765432110 102func encodeNegInt(text []byte, code []byte) []byte { 103 code = append(code, negPrefix) 104 if len(text) > 1 { 105 // TODO: replace Itoa with more efficient function. 106 c := encodeNegInt([]byte(strconv.Itoa(len(text))), code[1:]) 107 code = code[:1+len(c)] 108 } 109 for _, x := range text { 110 code = append(code, negIntLookup[x]) 111 } 112 return code 113} 114 115// Check whether input is a representation of zero, like, 116// 0 +0 -0 117func isZero(text, code []byte) ([]byte, bool) { 118 // Handle different forms of zero. 119 if (len(text) == 1) && (text[0] == ZERO) { 120 code = append(code, ZERO) 121 return code, true 122 123 } else if (len(text) == 2) && ((text[0] == PLUS) || (text[0] == MINUS)) && 124 (text[1] == ZERO) { 125 126 code = append(code, ZERO) 127 return code, true 128 } 129 return code, false 130} 131 132// DecodeInt complements EncodeInt, it returns integer in text that can be 133// converted to integer value using strconv.AtoI(return_value) 134func DecodeInt(code, text []byte) (int, []byte) { // code -> text 135 if len(code) == 0 { // empty input 136 return 0, text 137 } 138 if code[0] == ZERO { 139 text = append(text, ZERO) 140 return 1, text 141 } 142 143 var skip int 144 var final []byte 145 146 switch code[0] { 147 case posPrefix: 148 text = append(text, PLUS) 149 skip, final = doDecodeInt(code[1:], text[1:]) 150 text = append(text, final...) 151 152 case negPrefix: 153 text = append(text, MINUS) 154 skip, final = doDecodeInt(code[1:], text[1:]) 155 for _, x := range final { 156 text = append(text, negIntLookup[x]) 157 } 158 } 159 return skip + len(text), text 160} 161 162// local function called by DecodeInt 163func doDecodeInt(code, text []byte) (int, []byte) { 164 var skip int 165 166 if code[0] == posPrefix { 167 skip, text = doDecodeInt(code[1:], text) 168 s := skip + len(text) + 1 169 l, _ := strconv.Atoi(string(text)) 170 text = append(text[:0], code[s:s+l]...) 171 return s, text 172 173 } else if code[0] == negPrefix { 174 skip, text = doDecodeInt(code[1:], text) 175 for i, x := range text { 176 text[i] = negIntLookup[x] 177 } 178 s := skip + len(text) + 1 179 l, _ := strconv.Atoi(string(text)) 180 text = append(text[:0], code[s:s+l]...) 181 return s, text 182 183 } else { 184 text = append(text, code[0]) 185 return 0, text 186 } 187} 188 189// EncodeFloat encodes floating point number such that their natural order is 190// preserved as lexicographic order of their representation. Additionally it 191// must be possible to get back the natural representation from its lexical 192// representation. 193// 194// A floating point number f takes a mantissa m ∈ [1/10 , 1) and an integer 195// exponent e such that f = (10^e) * ±m. 196// 197// encoding −0.1 × 10^11 - --7888+ 198// encoding −0.1 × 10^10 - --7898+ 199// encoding -1.4 - -885+ 200// encoding -1.3 - -886+ 201// encoding -1 - -88+ 202// encoding -0.123 - 0876+ 203// encoding -0.0123 - +1876+ 204// encoding -0.001233 - +28766+ 205// encoding -0.00123 - +2876+ 206// encoding 0 0 207// encoding +0.00123 + -7123- 208// encoding +0.001233 + -71233- 209// encoding +0.0123 + -8123- 210// encoding +0.123 + 0123- 211// encoding +1 + +11- 212// encoding +1.3 + +113- 213// encoding +1.4 + +114- 214// encoding +0.1 × 10^10 + ++2101- 215// encoding +0.1 × 10^11 + ++2111- 216func EncodeFloat(text, code []byte) []byte { 217 if len(text) == 0 { // empty input 218 return code 219 } 220 if val, e := strconv.ParseFloat(string(text), 64); e == nil && val == 0 { 221 code = append(code, ZERO) 222 return code 223 } 224 225 prefix, text := signPrefix(text) 226 code = append(code, prefix) 227 228 var exp, mant []byte 229 for i, x := range text { 230 if x == 'e' { 231 mant = text[:i] 232 exp = text[i+1:] 233 break 234 } 235 } 236 237 x := [128]byte{} 238 mantissa := append(x[:0], prefix, '0', '.') 239 for _, x := range mant { 240 if x == '.' { 241 continue 242 } 243 mantissa = append(mantissa, x) 244 } 245 246 expi, _ := strconv.Atoi(string(exp)) 247 if prefix == negPrefix { 248 exp = []byte(strconv.Itoa(-(expi + 1))) 249 } else { 250 exp = []byte(strconv.Itoa(expi + 1)) 251 } 252 253 // Encode integer. 254 code = append(code, EncodeInt(exp, code[1:])...) 255 // Encode and adjust the decimal part. 256 code = append(code, EncodeSD(mantissa, code[len(code):])[1:]...) 257 return code 258} 259 260var flipmap = map[byte]byte{PLUS: MINUS, MINUS: PLUS} 261 262// DecodeFloat complements EncodeFloat, it returns `exponent` and `mantissa` 263// in text format. 264func DecodeFloat(code, text []byte) []byte { 265 if len(code) == 0 { // empty input 266 return text 267 } else if len(code) == 1 && code[0] == ZERO { 268 text = append(text, ZERO) 269 return text 270 } 271 272 msign := code[0] 273 274 var skip int 275 var final, exponent []byte 276 277 x := [128]byte{} 278 switch code[1] { 279 case negPrefix, posPrefix: 280 skip, exponent = DecodeInt(code[1:], x[:0]) 281 282 default: 283 exponent = append(x[:0], PLUS) 284 skip, final = DecodeInt(code[1:], text) 285 exponent = append(exponent, final...) 286 } 287 288 if msign == negPrefix { 289 exponent[0] = flipmap[exponent[0]] 290 } 291 292 y := [128]byte{} 293 mantissa := append(y[:0], msign) 294 mantissa = append(mantissa, code[1+skip:]...) 295 text = append(text, DecodeSD(mantissa, text)...) 296 text = append(text, 'e') 297 text = append(text, exponent...) 298 return text 299} 300 301// EncodeSD encodes small-decimal, values that are greater than -1.0 and less 302// than +1.0,such that their natural order is preserved as lexicographic order 303// of their representation. Additionally it must be possible to get back the 304// natural representation from its lexical representation. 305// 306// Small decimals is greater than -1.0 and less than 1.0 307// 308// Input `text` is also in textual representation, that is, 309// strconv.ParseFloat(text, 64) is the actual integer that is encoded. 310// 311// encoding -0.9995 -0004> 312// encoding -0.999 -000> 313// encoding -0.0123 -9876> 314// encoding -0.00123 -99876> 315// encoding -0.0001233 -9998766> 316// encoding -0.000123 -999876> 317// encoding +0.000123 >000123- 318// encoding +0.0001233 >0001233- 319// encoding +0.00123 >00123- 320// encoding +0.0123 >0123- 321// encoding +0.999 >999- 322// encoding +0.9995 >9995- 323// 324// Caveats: 325// -0.0, 0.0 and +0.0 must be filtered out as integer ZERO `0`. 326func EncodeSD(text, code []byte) []byte { 327 if len(text) == 0 { // empty input 328 return code 329 } 330 331 var prefix byte 332 prefix, text = signPrefix(text) 333 code = append(code, prefix) 334 335 // Remove decimal point and all zeros before that. 336 for i, x := range text { 337 if x == '.' { 338 text = text[i+1:] 339 break 340 } 341 } 342 if prefix == negPrefix { // Do inversion if negative number 343 for _, x := range text { 344 code = append(code, negIntLookup[x]) 345 } 346 } else { // if positive number just copy the text 347 code = append(code, text...) 348 } 349 code = append(code, prefixOpp[prefix]) 350 return code 351} 352 353// DecodeSD complements EncodeSD, it returns integer in text that can be 354// converted to integer type using strconv.ParseFloat(return_value, 64). 355func DecodeSD(code, text []byte) []byte { 356 if len(code) == 0 { 357 return text 358 } 359 360 prefix, sign := code[0], prefixSign(code) 361 text = append(text, []byte{sign, ZERO, DOT}...) 362 363 // If negative number invert the digits. 364 if prefix == negPrefix { 365 for _, x := range code[1 : len(code)-1] { 366 text = append(text, negIntLookup[x]) 367 } 368 } else { 369 text = append(text, code[1:len(code)-1]...) 370 } 371 return text 372} 373 374// EncodeLD encodes large-decimal, values that are greater than or equal to 375// +1.0 and less than or equal to -1.0, such that their natural order is 376// preserved as a lexicographic order of their representation. Additionally 377// it must be possible to get back the natural representation from its lexical 378// representation. 379// 380// Input `text` is also in textual representation, that is, 381// strconv.ParseFloat(text, 64) is the actual integer that is encoded. 382// 383// encoding -100.5 --68994> 384// encoding -10.5 --7> 385// encoding -3.145 -3854> 386// encoding -3.14 -385> 387// encoding -1.01 -198> 388// encoding -1 -1> 389// encoding -0.0001233 -09998766> 390// encoding -0.000123 -0999876> 391// encoding +0.000123 >0000123- 392// encoding +0.0001233 >00001233- 393// encoding +1 >1- 394// encoding +1.01 >101- 395// encoding +3.14 >314- 396// encoding +3.145 >3145- 397// encoding +10.5 >>2105- 398// encoding +100.5 >>31005- 399func EncodeLD(text, code []byte) []byte { 400 if len(text) == 0 { // empty input 401 return code 402 } 403 404 prefix, _ := signPrefix(text) 405 406 // Encode integer and decimal part. 407 texts := bytes.Split(text, []byte{DOT}) 408 if len(texts[0]) == 1 && texts[0][0] == ZERO { 409 code = append(code, prefix, ZERO) 410 } else if len(texts[0]) == 2 { 411 if s := texts[0][0]; (s == PLUS || s == MINUS) && texts[0][1] == ZERO { 412 code = append(code, prefix, ZERO) 413 } else { 414 l := len(EncodeInt(texts[0], code)) 415 code = code[:l] 416 } 417 } else { 418 l := len(EncodeInt(texts[0], code)) 419 code = code[:l] 420 } 421 422 var codedec []byte 423 x := [128]byte{} 424 if len(texts) == 2 { 425 text = joinBytes([]byte{text[0], ZERO, DOT}, texts[1]) 426 codedec = EncodeSD(text, x[:0])[1:] 427 } else { 428 // TODO: This is constant, optimize away. 429 text = []byte{text[0], ZERO, DOT, ZERO} 430 codedec = EncodeSD(text, x[:0])[1:] 431 } 432 433 // Adjust the decimal part 434 codedec[len(codedec)-1] = prefixOpp[prefix] 435 if len(codedec) == 2 && codedec[0] == ZERO { 436 codedec = codedec[1:] 437 } 438 code = append(code, codedec...) 439 return code 440} 441 442// DecodeLD complements EncodeLD, it returns integer in text that can be 443// converted to integer type using strconv.ParseFloat(return_value, 64). 444func DecodeLD(code, text []byte) []byte { 445 if len(code) == 0 { // empty input 446 return text 447 } 448 449 prefix, sign := code[0], prefixSign(code) 450 text = append(text, sign) 451 skip, textint := doDecodeInt(code[1:], text[1:]) 452 l := len(textint) + 1 453 text = text[:l] 454 455 // If negative number invert the digits. 456 if sign == MINUS { 457 for i, x := range text[1:] { 458 text[i+1] = negIntLookup[x] 459 } 460 } 461 462 code = code[skip+l:] 463 textdec := DecodeSD(joinBytes([]byte{prefix}, code), text[len(text):])[2:] 464 text = append(text, textdec...) 465 return text 466} 467 468// local function that check the sign of the input number and returns the 469// encoding-prefix and remaining input. Used while encoding numbers. 470func signPrefix(text []byte) (byte, []byte) { 471 switch text[0] { 472 case PLUS: 473 return posPrefix, text[1:] 474 case MINUS: 475 return negPrefix, text[1:] 476 default: 477 return posPrefix, text 478 } 479} 480 481// local function that check the encoded sign of the coded number and returns 482// the actual sign of the number. Used while decoding numbers. 483func prefixSign(code []byte) byte { 484 var sign byte 485 switch code[0] { 486 case posPrefix: 487 sign = PLUS 488 case negPrefix: 489 sign = MINUS 490 } 491 return sign 492} 493 494// join byte slices into single byte slice. 495func joinBytes(args ...[]byte) []byte { 496 return bytes.Join(args, []byte{}) 497} 498 499func suffixEncodeString(s []byte, code []byte) []byte { 500 text := []byte(s) 501 for _, x := range text { 502 code = append(code, x) 503 if x == Terminator { 504 code = append(code, 1) 505 } 506 } 507 code = append(code, Terminator) 508 return code 509} 510 511func suffixDecodeString(code []byte, text []byte) ([]byte, []byte, error) { 512 for i := 0; i < len(code); i++ { 513 x := code[i] 514 if x == Terminator { 515 i++ 516 switch x = code[i]; x { 517 case 1: 518 text = append(text, 0) 519 case Terminator: 520 if i == (len(code) - 1) { 521 return text, nil, nil 522 } 523 return text, code[i+1:], nil 524 default: 525 return nil, nil, ErrorSuffixDecoding 526 } 527 continue 528 } 529 text = append(text, x) 530 } 531 return nil, nil, ErrorSuffixDecoding 532} 533