xref: /5.5.2/subjson/contrib/jsonsl/jsonsl.c (revision 5cf05eaf)
1/* Copyright (C) 2012-2015 Mark Nunberg.
2 *
3 * See included LICENSE file for license details.
4 */
5
6#include "jsonsl.h"
7#include <assert.h>
8#include <limits.h>
9#include <ctype.h>
10
11#ifdef JSONSL_USE_METRICS
12#define XMETRICS \
13    X(STRINGY_INSIGNIFICANT) \
14    X(STRINGY_SLOWPATH) \
15    X(ALLOWED_WHITESPACE) \
16    X(QUOTE_FASTPATH) \
17    X(SPECIAL_FASTPATH) \
18    X(SPECIAL_WSPOP) \
19    X(SPECIAL_SLOWPATH) \
20    X(GENERIC) \
21    X(STRUCTURAL_TOKEN) \
22    X(SPECIAL_SWITCHFIRST) \
23    X(STRINGY_CATCH) \
24    X(NUMBER_FASTPATH) \
25    X(ESCAPES) \
26    X(TOTAL) \
27
28struct jsonsl_metrics_st {
29#define X(m) \
30    unsigned long metric_##m;
31    XMETRICS
32#undef X
33};
34
35static struct jsonsl_metrics_st GlobalMetrics = { 0 };
36static unsigned long GenericCounter[0x100] = { 0 };
37static unsigned long StringyCatchCounter[0x100] = { 0 };
38
39#define INCR_METRIC(m) \
40    GlobalMetrics.metric_##m++;
41
42#define INCR_GENERIC(c) \
43        INCR_METRIC(GENERIC); \
44        GenericCounter[c]++; \
45
46#define INCR_STRINGY_CATCH(c) \
47    INCR_METRIC(STRINGY_CATCH); \
48    StringyCatchCounter[c]++;
49
50JSONSL_API
51void jsonsl_dump_global_metrics(void)
52{
53    int ii;
54    printf("JSONSL Metrics:\n");
55#define X(m) \
56    printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \
57           (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100);
58    XMETRICS
59#undef X
60    printf("Generic Characters:\n");
61    for (ii = 0; ii < 0xff; ii++) {
62        if (GenericCounter[ii]) {
63            printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]);
64        }
65    }
66    printf("Weird string loop\n");
67    for (ii = 0; ii < 0xff; ii++) {
68        if (StringyCatchCounter[ii]) {
69            printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]);
70        }
71    }
72}
73
74#else
75#define INCR_METRIC(m)
76#define INCR_GENERIC(c)
77#define INCR_STRINGY_CATCH(c)
78JSONSL_API
79void jsonsl_dump_global_metrics(void) { }
80#endif /* JSONSL_USE_METRICS */
81
82#define CASE_DIGITS \
83case '1': \
84case '2': \
85case '3': \
86case '4': \
87case '5': \
88case '6': \
89case '7': \
90case '8': \
91case '9': \
92case '0':
93
94static unsigned extract_special(unsigned);
95static int is_special_end(unsigned);
96static int is_allowed_whitespace(unsigned);
97static int is_allowed_escape(unsigned);
98static int is_simple_char(unsigned);
99static char get_escape_equiv(unsigned);
100
101JSONSL_API
102jsonsl_t jsonsl_new(int nlevels)
103{
104    struct jsonsl_st *jsn = (struct jsonsl_st *)
105            calloc(1, sizeof (*jsn) +
106                    ( (nlevels-1) * sizeof (struct jsonsl_state_st) )
107            );
108
109    jsn->levels_max = nlevels;
110    jsn->max_callback_level = -1;
111    jsonsl_reset(jsn);
112    return jsn;
113}
114
115JSONSL_API
116void jsonsl_reset(jsonsl_t jsn)
117{
118    unsigned int ii;
119    jsn->tok_last = 0;
120    jsn->can_insert = 1;
121    jsn->pos = 0;
122    jsn->level = 0;
123    jsn->stopfl = 0;
124    jsn->in_escape = 0;
125    jsn->expecting = 0;
126
127    memset(jsn->stack, 0, (jsn->levels_max * sizeof (struct jsonsl_state_st)));
128
129    for (ii = 0; ii < jsn->levels_max; ii++) {
130        jsn->stack[ii].level = ii;
131    }
132}
133
134JSONSL_API
135void jsonsl_destroy(jsonsl_t jsn)
136{
137    if (jsn) {
138        free(jsn);
139    }
140}
141
142
143#define FASTPARSE_EXHAUSTED 1
144#define FASTPARSE_BREAK 0
145
146/*
147 * This function is meant to accelerate string parsing, reducing the main loop's
148 * check if we are indeed a string.
149 *
150 * @param jsn the parser
151 * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position)
152 * @param[in,out] nbytes_p A pointer to the current size of the buffer
153 * @return true if all bytes have been exhausted (and thus the main loop can
154 * return), false if a special character was examined which requires greater
155 * examination.
156 */
157static int
158jsonsl__str_fastparse(jsonsl_t jsn,
159                      const jsonsl_uchar_t **bytes_p, size_t *nbytes_p)
160{
161    const jsonsl_uchar_t *bytes = *bytes_p;
162    const jsonsl_uchar_t *end;
163    for (end = bytes + *nbytes_p; bytes != end; bytes++) {
164        if (
165#ifdef JSONSL_USE_WCHAR
166                *bytes >= 0x100 ||
167#endif /* JSONSL_USE_WCHAR */
168                (is_simple_char(*bytes))) {
169            INCR_METRIC(TOTAL);
170            INCR_METRIC(STRINGY_INSIGNIFICANT);
171        } else {
172            /* Once we're done here, re-calculate the position variables */
173            jsn->pos += (bytes - *bytes_p);
174            *nbytes_p -= (bytes - *bytes_p);
175            *bytes_p = bytes;
176            return FASTPARSE_BREAK;
177        }
178    }
179
180    /* Once we're done here, re-calculate the position variables */
181    jsn->pos += (bytes - *bytes_p);
182    return FASTPARSE_EXHAUSTED;
183}
184
185/* Functions exactly like str_fastparse, except it also accepts a 'state'
186 * argument, since the number's value is updated in the state. */
187static int
188jsonsl__num_fastparse(jsonsl_t jsn,
189                      const jsonsl_uchar_t **bytes_p, size_t *nbytes_p,
190                      struct jsonsl_state_st *state)
191{
192    int exhausted = 1;
193    size_t nbytes = *nbytes_p;
194    const jsonsl_uchar_t *bytes = *bytes_p;
195
196    for (; nbytes; nbytes--, bytes++) {
197        jsonsl_uchar_t c = *bytes;
198        if (isdigit(c)) {
199            INCR_METRIC(TOTAL);
200            INCR_METRIC(NUMBER_FASTPATH);
201            state->nelem = (state->nelem * 10) + (c - 0x30);
202        } else {
203            exhausted = 0;
204            break;
205        }
206    }
207    jsn->pos += (*nbytes_p - nbytes);
208    if (exhausted) {
209        return FASTPARSE_EXHAUSTED;
210    }
211    *nbytes_p = nbytes;
212    *bytes_p = bytes;
213    return FASTPARSE_BREAK;
214}
215
216JSONSL_API
217void
218jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
219{
220
221#define INVOKE_ERROR(eb) \
222    if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \
223        goto GT_AGAIN; \
224    } \
225    return;
226
227#define STACK_PUSH \
228    if (jsn->level >= (levels_max-1)) { \
229        jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \
230        return; \
231    } \
232    state = jsn->stack + (++jsn->level); \
233    state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \
234    state->pos_begin = jsn->pos;
235
236#define STACK_POP_NOPOS \
237    state->pos_cur = jsn->pos; \
238    state = jsn->stack + (--jsn->level);
239
240
241#define STACK_POP \
242    STACK_POP_NOPOS; \
243    state->pos_cur = jsn->pos;
244
245#define CALLBACK_AND_POP_NOPOS(T) \
246        state->pos_cur = jsn->pos; \
247        DO_CALLBACK(T, POP); \
248        state->nescapes = 0; \
249        state = jsn->stack + (--jsn->level);
250
251#define CALLBACK_AND_POP(T) \
252        CALLBACK_AND_POP_NOPOS(T); \
253        state->pos_cur = jsn->pos;
254
255#define SPECIAL_POP \
256    CALLBACK_AND_POP(SPECIAL); \
257    jsn->expecting = 0; \
258    jsn->tok_last = 0; \
259
260#define CUR_CHAR (*(jsonsl_uchar_t*)c)
261
262#define DO_CALLBACK(T, action) \
263    if (jsn->call_##T && \
264            jsn->max_callback_level > state->level && \
265            state->ignore_callback == 0) { \
266        \
267        if (jsn->action_callback_##action) { \
268            jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
269        } else if (jsn->action_callback) { \
270            jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
271        } \
272        if (jsn->stopfl) { return; } \
273    }
274
275    /**
276     * Verifies that we are able to insert the (non-string) item into a hash.
277     */
278#define ENSURE_HVAL \
279    if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \
280        INVOKE_ERROR(HKEY_EXPECTED); \
281    }
282
283#define VERIFY_SPECIAL(lit) \
284        if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \
285            INVOKE_ERROR(SPECIAL_EXPECTED); \
286        }
287
288#define STATE_SPECIAL_LENGTH \
289    (state)->nescapes
290
291#define IS_NORMAL_NUMBER \
292    ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \
293        (state)->special_flags == JSONSL_SPECIALf_SIGNED)
294
295#define STATE_NUM_LAST jsn->tok_last
296
297#define CONTINUE_NEXT_CHAR() continue
298
299    const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes;
300    size_t levels_max = jsn->levels_max;
301    struct jsonsl_state_st *state = jsn->stack + jsn->level;
302    jsn->base = bytes;
303
304    for (; nbytes; nbytes--, jsn->pos++, c++) {
305        unsigned state_type;
306        INCR_METRIC(TOTAL);
307
308        GT_AGAIN:
309        state_type = state->type;
310        /* Most common type is typically a string: */
311        if (state_type & JSONSL_Tf_STRINGY) {
312            /* Special escape handling for some stuff */
313            if (jsn->in_escape) {
314                jsn->in_escape = 0;
315                if (!is_allowed_escape(CUR_CHAR)) {
316                    INVOKE_ERROR(ESCAPE_INVALID);
317                } else if (CUR_CHAR == 'u') {
318                    DO_CALLBACK(UESCAPE, UESCAPE);
319                    if (jsn->return_UESCAPE) {
320                        return;
321                    }
322                }
323                CONTINUE_NEXT_CHAR();
324            }
325
326            if (jsonsl__str_fastparse(jsn, &c, &nbytes) ==
327                    FASTPARSE_EXHAUSTED) {
328                /* No need to readjust variables as we've exhausted the iterator */
329                return;
330            } else {
331                if (CUR_CHAR == '"') {
332                    goto GT_QUOTE;
333                } else if (CUR_CHAR == '\\') {
334                    goto GT_ESCAPE;
335                } else {
336                    INVOKE_ERROR(WEIRD_WHITESPACE);
337                }
338            }
339            INCR_METRIC(STRINGY_SLOWPATH);
340
341        } else if (state_type == JSONSL_T_SPECIAL) {
342            /* Fast track for signed/unsigned */
343            if (IS_NORMAL_NUMBER) {
344                if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) ==
345                        FASTPARSE_EXHAUSTED) {
346                    return;
347                } else {
348                    goto GT_SPECIAL_NUMERIC;
349                }
350            } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
351                if (!isdigit(CUR_CHAR)) {
352                    INVOKE_ERROR(INVALID_NUMBER);
353                }
354
355                if (CUR_CHAR == '0') {
356                    state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED;
357                } else if (isdigit(CUR_CHAR)) {
358                    state->special_flags = JSONSL_SPECIALf_SIGNED;
359                    state->nelem = CUR_CHAR - 0x30;
360                } else {
361                    INVOKE_ERROR(INVALID_NUMBER);
362                }
363                CONTINUE_NEXT_CHAR();
364
365            } else if (state->special_flags == JSONSL_SPECIALf_ZERO) {
366                if (isdigit(CUR_CHAR)) {
367                    /* Following a zero! */
368                    INVOKE_ERROR(INVALID_NUMBER);
369                }
370                /* Unset the 'zero' flag: */
371                if (state->special_flags & JSONSL_SPECIALf_SIGNED) {
372                    state->special_flags = JSONSL_SPECIALf_SIGNED;
373                } else {
374                    state->special_flags = JSONSL_SPECIALf_UNSIGNED;
375                }
376                goto GT_SPECIAL_NUMERIC;
377            }
378
379            if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
380                GT_SPECIAL_NUMERIC:
381                switch (CUR_CHAR) {
382                CASE_DIGITS
383                    STATE_NUM_LAST = '1';
384                    CONTINUE_NEXT_CHAR();
385
386                case '.':
387                    if (state->special_flags & JSONSL_SPECIALf_FLOAT) {
388                        INVOKE_ERROR(INVALID_NUMBER);
389                    }
390                    state->special_flags |= JSONSL_SPECIALf_FLOAT;
391                    STATE_NUM_LAST = '.';
392                    CONTINUE_NEXT_CHAR();
393
394                case 'e':
395                case 'E':
396                    if (state->special_flags & JSONSL_SPECIALf_EXPONENT) {
397                        INVOKE_ERROR(INVALID_NUMBER);
398                    }
399                    state->special_flags |= JSONSL_SPECIALf_EXPONENT;
400                    STATE_NUM_LAST = 'e';
401                    CONTINUE_NEXT_CHAR();
402
403                case '-':
404                case '+':
405                    if (STATE_NUM_LAST != 'e') {
406                        INVOKE_ERROR(INVALID_NUMBER);
407                    }
408                    STATE_NUM_LAST = '-';
409                    CONTINUE_NEXT_CHAR();
410
411                default:
412                    if (is_special_end(CUR_CHAR)) {
413                        goto GT_SPECIAL_POP;
414                    }
415                    INVOKE_ERROR(INVALID_NUMBER);
416                    break;
417                }
418            }
419            /* else if (!NUMERIC) */
420            if (!is_special_end(CUR_CHAR)) {
421                STATE_SPECIAL_LENGTH++;
422
423                /* Verify TRUE, FALSE, NULL */
424                if (state->special_flags == JSONSL_SPECIALf_TRUE) {
425                    VERIFY_SPECIAL("true");
426                } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
427                    VERIFY_SPECIAL("false");
428                } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
429                    VERIFY_SPECIAL("null");
430                }
431                INCR_METRIC(SPECIAL_FASTPATH);
432                CONTINUE_NEXT_CHAR();
433            }
434
435            GT_SPECIAL_POP:
436            if (IS_NORMAL_NUMBER) {
437                /* Nothing */
438            } else if (state->special_flags == JSONSL_SPECIALf_ZERO ||
439                    state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) {
440                /* 0 is unsigned! */
441                state->special_flags = JSONSL_SPECIALf_UNSIGNED;
442            } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
443                /* Still in dash! */
444                INVOKE_ERROR(INVALID_NUMBER);
445            } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
446                /* Check that we're not at the end of a token */
447                if (STATE_NUM_LAST != '1') {
448                    INVOKE_ERROR(INVALID_NUMBER);
449                }
450            } else if (state->special_flags == JSONSL_SPECIALf_TRUE) {
451                if (STATE_SPECIAL_LENGTH != 4) {
452                    INVOKE_ERROR(SPECIAL_INCOMPLETE);
453                }
454                state->nelem = 1;
455            } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
456                if (STATE_SPECIAL_LENGTH != 5) {
457                    INVOKE_ERROR(SPECIAL_INCOMPLETE);
458                }
459            } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
460                if (STATE_SPECIAL_LENGTH != 4) {
461                    INVOKE_ERROR(SPECIAL_INCOMPLETE);
462                }
463            }
464            SPECIAL_POP;
465            jsn->expecting = ',';
466            if (is_allowed_whitespace(CUR_CHAR)) {
467                CONTINUE_NEXT_CHAR();
468            }
469            /**
470             * This works because we have a non-whitespace token
471             * which is not a special token. If this is a structural
472             * character then it will be gracefully handled by the
473             * switch statement. Otherwise it will default to the 'special'
474             * state again,
475             */
476            goto GT_STRUCTURAL_TOKEN;
477        } else if (is_allowed_whitespace(CUR_CHAR)) {
478            INCR_METRIC(ALLOWED_WHITESPACE);
479            /* So we're not special. Harmless insignificant whitespace
480             * passthrough
481             */
482            CONTINUE_NEXT_CHAR();
483        } else if (extract_special(CUR_CHAR)) {
484            /* not a string, whitespace, or structural token. must be special */
485            goto GT_SPECIAL_BEGIN;
486        }
487
488        INCR_GENERIC(CUR_CHAR);
489
490        if (CUR_CHAR == '"') {
491            GT_QUOTE:
492            jsn->can_insert = 0;
493            switch (state_type) {
494
495            /* the end of a string or hash key */
496            case JSONSL_T_STRING:
497                CALLBACK_AND_POP(STRING);
498                CONTINUE_NEXT_CHAR();
499            case JSONSL_T_HKEY:
500                CALLBACK_AND_POP(HKEY);
501                CONTINUE_NEXT_CHAR();
502
503            case JSONSL_T_OBJECT:
504                state->nelem++;
505                if ( (state->nelem-1) % 2 ) {
506                    /* Odd, this must be a hash value */
507                    if (jsn->tok_last != ':') {
508                        INVOKE_ERROR(MISSING_TOKEN);
509                    }
510                    jsn->expecting = ','; /* Can't figure out what to expect next */
511                    jsn->tok_last = 0;
512
513                    STACK_PUSH;
514                    state->type = JSONSL_T_STRING;
515                    DO_CALLBACK(STRING, PUSH);
516
517                } else {
518                    /* hash key */
519                    if (jsn->expecting != '"') {
520                        INVOKE_ERROR(STRAY_TOKEN);
521                    }
522                    jsn->tok_last = 0;
523                    jsn->expecting = ':';
524
525                    STACK_PUSH;
526                    state->type = JSONSL_T_HKEY;
527                    DO_CALLBACK(HKEY, PUSH);
528                }
529                CONTINUE_NEXT_CHAR();
530
531            case JSONSL_T_LIST:
532                state->nelem++;
533                STACK_PUSH;
534                state->type = JSONSL_T_STRING;
535                jsn->expecting = ',';
536                jsn->tok_last = 0;
537                DO_CALLBACK(STRING, PUSH);
538                CONTINUE_NEXT_CHAR();
539
540            case JSONSL_T_SPECIAL:
541                INVOKE_ERROR(STRAY_TOKEN);
542                break;
543
544            default:
545                INVOKE_ERROR(STRING_OUTSIDE_CONTAINER);
546                break;
547            } /* switch(state->type) */
548        } else if (CUR_CHAR == '\\') {
549            GT_ESCAPE:
550            INCR_METRIC(ESCAPES);
551        /* Escape */
552            if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) {
553                INVOKE_ERROR(ESCAPE_OUTSIDE_STRING);
554            }
555            state->nescapes++;
556            jsn->in_escape = 1;
557            CONTINUE_NEXT_CHAR();
558        } /* " or \ */
559
560        GT_STRUCTURAL_TOKEN:
561        switch (CUR_CHAR) {
562        case ':':
563            INCR_METRIC(STRUCTURAL_TOKEN);
564            if (jsn->expecting != CUR_CHAR) {
565                INVOKE_ERROR(STRAY_TOKEN);
566            }
567            jsn->tok_last = ':';
568            jsn->can_insert = 1;
569            jsn->expecting = '"';
570            CONTINUE_NEXT_CHAR();
571
572        case ',':
573            INCR_METRIC(STRUCTURAL_TOKEN);
574            /**
575             * The comma is one of the more generic tokens.
576             * In the context of an OBJECT, the can_insert flag
577             * should never be set, and no other action is
578             * necessary.
579             */
580            if (jsn->expecting != CUR_CHAR) {
581                /* make this branch execute only when we haven't manually
582                 * just placed the ',' in the expecting register.
583                 */
584                INVOKE_ERROR(STRAY_TOKEN);
585            }
586
587            if (state->type == JSONSL_T_OBJECT) {
588                /* end of hash value, expect a string as a hash key */
589                jsn->expecting = '"';
590            } else {
591                jsn->can_insert = 1;
592            }
593
594            jsn->tok_last = ',';
595            jsn->expecting = '"';
596            CONTINUE_NEXT_CHAR();
597
598            /* new list or object */
599            /* hashes are more common */
600        case '{':
601        case '[':
602            INCR_METRIC(STRUCTURAL_TOKEN);
603            if (!jsn->can_insert) {
604                INVOKE_ERROR(CANT_INSERT);
605            }
606
607            ENSURE_HVAL;
608            state->nelem++;
609
610            STACK_PUSH;
611            /* because the constants match the opening delimiters, we can do this: */
612            state->type = CUR_CHAR;
613            state->nelem = 0;
614            jsn->can_insert = 1;
615            if (CUR_CHAR == '{') {
616                /* If we're a hash, we expect a key first, which is quouted */
617                jsn->expecting = '"';
618            }
619            if (CUR_CHAR == JSONSL_T_OBJECT) {
620                DO_CALLBACK(OBJECT, PUSH);
621            } else {
622                DO_CALLBACK(LIST, PUSH);
623            }
624            jsn->tok_last = 0;
625            CONTINUE_NEXT_CHAR();
626
627            /* closing of list or object */
628        case '}':
629        case ']':
630            INCR_METRIC(STRUCTURAL_TOKEN);
631            if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) {
632                INVOKE_ERROR(TRAILING_COMMA);
633            }
634
635            jsn->can_insert = 0;
636            jsn->level--;
637            jsn->expecting = ',';
638            jsn->tok_last = 0;
639            if (CUR_CHAR == ']') {
640                if (state->type != '[') {
641                    INVOKE_ERROR(BRACKET_MISMATCH);
642                }
643                DO_CALLBACK(LIST, POP);
644            } else {
645                if (state->type != '{') {
646                    INVOKE_ERROR(BRACKET_MISMATCH);
647                } else if (state->nelem && state->nelem % 2 != 0) {
648                    INVOKE_ERROR(VALUE_EXPECTED);
649                }
650                DO_CALLBACK(OBJECT, POP);
651            }
652            state = jsn->stack + jsn->level;
653            state->pos_cur = jsn->pos;
654            CONTINUE_NEXT_CHAR();
655
656        default:
657            GT_SPECIAL_BEGIN:
658            /**
659             * Not a string, not a structural token, and not benign whitespace.
660             * Technically we should iterate over the character always, but since
661             * we are not doing full numerical/value decoding anyway (but only hinting),
662             * we only check upon entry.
663             */
664            if (state->type != JSONSL_T_SPECIAL) {
665                int special_flags = extract_special(CUR_CHAR);
666                if (!special_flags) {
667                    /**
668                     * Try to do some heuristics here anyway to figure out what kind of
669                     * error this is. The 'special' case is a fallback scenario anyway.
670                     */
671                    if (CUR_CHAR == '\0') {
672                        INVOKE_ERROR(FOUND_NULL_BYTE);
673                    } else if (CUR_CHAR < 0x20) {
674                        INVOKE_ERROR(WEIRD_WHITESPACE);
675                    } else {
676                        INVOKE_ERROR(SPECIAL_EXPECTED);
677                    }
678                }
679                ENSURE_HVAL;
680                state->nelem++;
681                if (!jsn->can_insert) {
682                    INVOKE_ERROR(CANT_INSERT);
683                }
684                STACK_PUSH;
685                state->type = JSONSL_T_SPECIAL;
686                state->special_flags = special_flags;
687                STATE_SPECIAL_LENGTH = 1;
688
689                if (special_flags == JSONSL_SPECIALf_UNSIGNED) {
690                    state->nelem = CUR_CHAR - 0x30;
691                    STATE_NUM_LAST = '1';
692                } else {
693                    STATE_NUM_LAST = '-';
694                    state->nelem = 0;
695                }
696                DO_CALLBACK(SPECIAL, PUSH);
697            }
698            CONTINUE_NEXT_CHAR();
699        }
700    }
701}
702
703JSONSL_API
704const char* jsonsl_strerror(jsonsl_error_t err)
705{
706    if (err == JSONSL_ERROR_SUCCESS) {
707        return "SUCCESS";
708    }
709#define X(t) \
710    if (err == JSONSL_ERROR_##t) \
711        return #t;
712    JSONSL_XERR;
713#undef X
714    return "<UNKNOWN_ERROR>";
715}
716
717JSONSL_API
718const char *jsonsl_strtype(jsonsl_type_t type)
719{
720#define X(o,c) \
721    if (type == JSONSL_T_##o) \
722        return #o;
723    JSONSL_XTYPE
724#undef X
725    return "UNKNOWN TYPE";
726
727}
728
729/*
730 *
731 * JPR/JSONPointer functions
732 *
733 *
734 */
735#ifndef JSONSL_NO_JPR
736static
737jsonsl_jpr_type_t
738populate_component(char *in,
739                   struct jsonsl_jpr_component_st *component,
740                   char **next,
741                   jsonsl_error_t *errp)
742{
743    unsigned long pctval;
744    char *c = NULL, *outp = NULL, *end = NULL;
745    size_t input_len;
746    jsonsl_jpr_type_t ret = JSONSL_PATH_NONE;
747
748    if (*next == NULL || *(*next) == '\0') {
749        return JSONSL_PATH_NONE;
750    }
751
752    /* Replace the next / with a NULL */
753    *next = strstr(in, "/");
754    if (*next != NULL) {
755        *(*next) = '\0'; /* drop the forward slash */
756        input_len = *next - in;
757        end = *next;
758        *next += 1; /* next character after the '/' */
759    } else {
760        input_len = strlen(in);
761        end = in + input_len + 1;
762    }
763
764    component->pstr = in;
765
766    /* Check for special components of interest */
767    if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) {
768        /* Lone wildcard */
769        ret = JSONSL_PATH_WILDCARD;
770        goto GT_RET;
771    } else if (isdigit(*in)) {
772        /* ASCII Numeric */
773        char *endptr;
774        component->idx = strtoul(in, &endptr, 10);
775        if (endptr && *endptr == '\0') {
776            ret = JSONSL_PATH_NUMERIC;
777            goto GT_RET;
778        }
779    }
780
781    /* Default, it's a string */
782    ret = JSONSL_PATH_STRING;
783    for (c = outp = in; c < end; c++, outp++) {
784        char origc;
785        if (*c != '%') {
786            goto GT_ASSIGN;
787        }
788        /*
789         * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' }
790         */
791
792        /* Need %XX */
793        if (c+2 >= end) {
794            *errp = JSONSL_ERROR_PERCENT_BADHEX;
795            return JSONSL_PATH_INVALID;
796        }
797        if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) {
798            *errp = JSONSL_ERROR_PERCENT_BADHEX;
799            return JSONSL_PATH_INVALID;
800        }
801
802        /* Temporarily null-terminate the characters */
803        origc = *(c+3);
804        *(c+3) = '\0';
805        pctval = strtoul(c+1, NULL, 16);
806        *(c+3) = origc;
807
808        *outp = (char) pctval;
809        c += 2;
810        continue;
811
812        GT_ASSIGN:
813        *outp = *c;
814    }
815    /* Null-terminate the string */
816    for (; outp < c; outp++) {
817        *outp = '\0';
818    }
819
820    GT_RET:
821    component->ptype = ret;
822    if (ret != JSONSL_PATH_WILDCARD) {
823        component->len = strlen(component->pstr);
824    }
825    return ret;
826}
827
828JSONSL_API
829jsonsl_jpr_t
830jsonsl_jpr_new(const char *path, jsonsl_error_t *errp)
831{
832    char *my_copy = NULL;
833    int count, curidx;
834    struct jsonsl_jpr_st *ret = NULL;
835    struct jsonsl_jpr_component_st *components = NULL;
836    size_t origlen;
837    jsonsl_error_t errstacked;
838
839#define JPR_BAIL(err) *errp = err; goto GT_ERROR;
840
841    if (errp == NULL) {
842        errp = &errstacked;
843    }
844
845    if (path == NULL || *path != '/') {
846        JPR_BAIL(JSONSL_ERROR_JPR_NOROOT);
847        return NULL;
848    }
849
850    count = 1;
851    path++;
852    {
853        const char *c = path;
854        for (; *c; c++) {
855            if (*c == '/') {
856                count++;
857                if (*(c+1) == '/') {
858                    JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH);
859                }
860            }
861        }
862    }
863    if(*path) {
864        count++;
865    }
866
867    components = (struct jsonsl_jpr_component_st *)
868            malloc(sizeof(*components) * count);
869    if (!components) {
870        JPR_BAIL(JSONSL_ERROR_ENOMEM);
871    }
872
873    my_copy = (char *)malloc(strlen(path) + 1);
874    if (!my_copy) {
875        JPR_BAIL(JSONSL_ERROR_ENOMEM);
876    }
877
878    strcpy(my_copy, path);
879
880    components[0].ptype = JSONSL_PATH_ROOT;
881
882    if (*my_copy) {
883        char *cur = my_copy;
884        int pathret = JSONSL_PATH_STRING;
885        curidx = 1;
886        while (pathret > 0 && curidx < count) {
887            pathret = populate_component(cur, components + curidx, &cur, errp);
888            if (pathret > 0) {
889                curidx++;
890            } else {
891                break;
892            }
893        }
894
895        if (pathret == JSONSL_PATH_INVALID) {
896            JPR_BAIL(JSONSL_ERROR_JPR_BADPATH);
897        }
898    } else {
899        curidx = 1;
900    }
901
902    path--; /*revert path to leading '/' */
903    origlen = strlen(path) + 1;
904    ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret));
905    if (!ret) {
906        JPR_BAIL(JSONSL_ERROR_ENOMEM);
907    }
908    ret->orig = (char *)malloc(origlen);
909    if (!ret->orig) {
910        JPR_BAIL(JSONSL_ERROR_ENOMEM);
911    }
912    ret->components = components;
913    ret->ncomponents = curidx;
914    ret->basestr = my_copy;
915    ret->norig = origlen-1;
916    strcpy(ret->orig, path);
917
918    return ret;
919
920    GT_ERROR:
921    free(my_copy);
922    free(components);
923    if (ret) {
924        free(ret->orig);
925    }
926    free(ret);
927    return NULL;
928#undef JPR_BAIL
929}
930
931void jsonsl_jpr_destroy(jsonsl_jpr_t jpr)
932{
933    free(jpr->components);
934    free(jpr->basestr);
935    free(jpr->orig);
936    free(jpr);
937}
938
939/**
940 * Call when there is a possibility of a match, either as a final match or
941 * as a path within a match
942 * @param jpr The JPR path
943 * @param component Component corresponding to the current element
944 * @param prlevel The level of the *parent*
945 * @param chtype The type of the child
946 * @return Match status
947 */
948static jsonsl_jpr_match_t
949jsonsl__match_continue(jsonsl_jpr_t jpr,
950                       const struct jsonsl_jpr_component_st *component,
951                       unsigned prlevel, unsigned chtype)
952{
953    const struct jsonsl_jpr_component_st *next_comp = component + 1;
954    if (prlevel == jpr->ncomponents - 1) {
955        /* This is the match. Check the expected type of the match against
956         * the child */
957        if (jpr->match_type == 0 || jpr->match_type == chtype) {
958            return JSONSL_MATCH_COMPLETE;
959        } else {
960            return JSONSL_MATCH_TYPE_MISMATCH;
961        }
962    }
963    if (chtype == JSONSL_T_LIST) {
964        if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
965            return JSONSL_MATCH_POSSIBLE;
966        } else {
967            return JSONSL_MATCH_TYPE_MISMATCH;
968        }
969    } else if (chtype == JSONSL_T_OBJECT) {
970        if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
971            return JSONSL_MATCH_TYPE_MISMATCH;
972        } else {
973            return JSONSL_MATCH_POSSIBLE;
974        }
975    } else {
976        return JSONSL_MATCH_TYPE_MISMATCH;
977    }
978}
979
980JSONSL_API
981jsonsl_jpr_match_t
982jsonsl_path_match(jsonsl_jpr_t jpr,
983                  const struct jsonsl_state_st *parent,
984                  const struct jsonsl_state_st *child,
985                  const char *key, size_t nkey)
986{
987    const struct jsonsl_jpr_component_st *comp;
988    if (!parent) {
989        /* No parent. Return immediately since it's always a match */
990        return jsonsl__match_continue(jpr, jpr->components, 0, child->type);
991    }
992
993    comp = jpr->components + parent->level;
994
995    /* note that we don't need to verify the type of the match, this is
996     * always done through the previous call to jsonsl__match_continue.
997     * If we are in a POSSIBLE tree then we can be certain the types (at
998     * least at this level) are correct */
999    if (parent->type == JSONSL_T_OBJECT) {
1000        if (comp->len != nkey || strncmp(key, comp->pstr, nkey) != 0) {
1001            return JSONSL_MATCH_NOMATCH;
1002        }
1003    } else {
1004        if (comp->idx != parent->nelem - 1) {
1005            return JSONSL_MATCH_NOMATCH;
1006        }
1007    }
1008    return jsonsl__match_continue(jpr, comp, parent->level, child->type);
1009}
1010
1011JSONSL_API
1012jsonsl_jpr_match_t
1013jsonsl_jpr_match(jsonsl_jpr_t jpr,
1014                   unsigned int parent_type,
1015                   unsigned int parent_level,
1016                   const char *key,
1017                   size_t nkey)
1018{
1019    /* find our current component. This is the child level */
1020    int cmpret;
1021    struct jsonsl_jpr_component_st *p_component;
1022    p_component = jpr->components + parent_level;
1023
1024    if (parent_level >= jpr->ncomponents) {
1025        return JSONSL_MATCH_NOMATCH;
1026    }
1027
1028    /* Lone query for 'root' element. Always matches */
1029    if (parent_level == 0) {
1030        if (jpr->ncomponents == 1) {
1031            return JSONSL_MATCH_COMPLETE;
1032        } else {
1033            return JSONSL_MATCH_POSSIBLE;
1034        }
1035    }
1036
1037    /* Wildcard, always matches */
1038    if (p_component->ptype == JSONSL_PATH_WILDCARD) {
1039        if (parent_level == jpr->ncomponents-1) {
1040            return JSONSL_MATCH_COMPLETE;
1041        } else {
1042            return JSONSL_MATCH_POSSIBLE;
1043        }
1044    }
1045
1046    /* Check numeric array index. This gets its special block so we can avoid
1047     * string comparisons */
1048    if (p_component->ptype == JSONSL_PATH_NUMERIC) {
1049        if (parent_type == JSONSL_T_LIST) {
1050            if (p_component->idx != nkey) {
1051                /* Wrong index */
1052                return JSONSL_MATCH_NOMATCH;
1053            } else {
1054                if (parent_level == jpr->ncomponents-1) {
1055                    /* This is the last element of the path */
1056                    return JSONSL_MATCH_COMPLETE;
1057                } else {
1058                    /* Intermediate element */
1059                    return JSONSL_MATCH_POSSIBLE;
1060                }
1061            }
1062        } else if (p_component->is_arridx) {
1063            /* Numeric and an array index (set explicitly by user). But not
1064             * a list for a parent */
1065            return JSONSL_MATCH_TYPE_MISMATCH;
1066        }
1067    } else if (parent_type == JSONSL_T_LIST) {
1068        return JSONSL_MATCH_TYPE_MISMATCH;
1069    }
1070
1071    /* Check lengths */
1072    if (p_component->len != nkey) {
1073        return JSONSL_MATCH_NOMATCH;
1074    }
1075
1076    /* Check string comparison */
1077    cmpret = strncmp(p_component->pstr, key, nkey);
1078    if (cmpret == 0) {
1079        if (parent_level == jpr->ncomponents-1) {
1080            return JSONSL_MATCH_COMPLETE;
1081        } else {
1082            return JSONSL_MATCH_POSSIBLE;
1083        }
1084    }
1085
1086    return JSONSL_MATCH_NOMATCH;
1087}
1088
1089JSONSL_API
1090void jsonsl_jpr_match_state_init(jsonsl_t jsn,
1091                                 jsonsl_jpr_t *jprs,
1092                                 size_t njprs)
1093{
1094    size_t ii, *firstjmp;
1095    if (njprs == 0) {
1096        return;
1097    }
1098    jsn->jprs = (jsonsl_jpr_t *)malloc(sizeof(jsonsl_jpr_t) * njprs);
1099    jsn->jpr_count = njprs;
1100    jsn->jpr_root = (size_t*)calloc(1, sizeof(size_t) * njprs * jsn->levels_max);
1101    memcpy(jsn->jprs, jprs, sizeof(jsonsl_jpr_t) * njprs);
1102    /* Set the initial jump table values */
1103
1104    firstjmp = jsn->jpr_root;
1105    for (ii = 0; ii < njprs; ii++) {
1106        firstjmp[ii] = ii+1;
1107    }
1108}
1109
1110JSONSL_API
1111void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn)
1112{
1113    if (jsn->jpr_count == 0) {
1114        return;
1115    }
1116
1117    free(jsn->jpr_root);
1118    free(jsn->jprs);
1119    jsn->jprs = NULL;
1120    jsn->jpr_root = NULL;
1121    jsn->jpr_count = 0;
1122}
1123
1124/**
1125 * This function should be called exactly once on each element...
1126 * This should also be called in recursive order, since we rely
1127 * on the parent having been initalized for a match.
1128 *
1129 * Since the parent is checked for a match as well, we maintain a 'serial' counter.
1130 * Whenever we traverse an element, we expect the serial to be the same as a global
1131 * integer. If they do not match, we re-initialize the context, and set the serial.
1132 *
1133 * This ensures a type of consistency without having a proactive reset by the
1134 * main lexer itself.
1135 *
1136 */
1137JSONSL_API
1138jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
1139                                    struct jsonsl_state_st *state,
1140                                    const char *key,
1141                                    size_t nkey,
1142                                    jsonsl_jpr_match_t *out)
1143{
1144    struct jsonsl_state_st *parent_state;
1145    jsonsl_jpr_t ret = NULL;
1146
1147    /* Jump and JPR tables for our own state and the parent state */
1148    size_t *jmptable, *pjmptable;
1149    size_t jmp_cur, ii, ourjmpidx;
1150
1151    if (!jsn->jpr_root) {
1152        *out = JSONSL_MATCH_NOMATCH;
1153        return NULL;
1154    }
1155
1156    pjmptable = jsn->jpr_root + (jsn->jpr_count * (state->level-1));
1157    jmptable = pjmptable + jsn->jpr_count;
1158
1159    /* If the parent cannot match, then invalidate it */
1160    if (*pjmptable == 0) {
1161        *jmptable = 0;
1162        *out = JSONSL_MATCH_NOMATCH;
1163        return NULL;
1164    }
1165
1166    parent_state = jsn->stack + state->level - 1;
1167
1168    if (parent_state->type == JSONSL_T_LIST) {
1169        nkey = (size_t) parent_state->nelem;
1170    }
1171
1172    *jmptable = 0;
1173    ourjmpidx = 0;
1174    memset(jmptable, 0, sizeof(int) * jsn->jpr_count);
1175
1176    for (ii = 0; ii <  jsn->jpr_count; ii++) {
1177        jmp_cur = pjmptable[ii];
1178        if (jmp_cur) {
1179            jsonsl_jpr_t jpr = jsn->jprs[jmp_cur-1];
1180            *out = jsonsl_jpr_match(jpr,
1181                                    parent_state->type,
1182                                    parent_state->level,
1183                                    key, nkey);
1184            if (*out == JSONSL_MATCH_COMPLETE) {
1185                ret = jpr;
1186                *jmptable = 0;
1187                return ret;
1188            } else if (*out == JSONSL_MATCH_POSSIBLE) {
1189                jmptable[ourjmpidx] = ii+1;
1190                ourjmpidx++;
1191            }
1192        } else {
1193            break;
1194        }
1195    }
1196    if (!*jmptable) {
1197        *out = JSONSL_MATCH_NOMATCH;
1198    }
1199    return NULL;
1200}
1201
1202JSONSL_API
1203const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match)
1204{
1205#define X(T,v) \
1206    if ( match == JSONSL_MATCH_##T ) \
1207        return #T;
1208    JSONSL_XMATCH
1209#undef X
1210    return "<UNKNOWN>";
1211}
1212
1213#endif /* JSONSL_WITH_JPR */
1214
1215static char *
1216jsonsl__writeutf8(uint32_t pt, char *out)
1217{
1218    #define ADD_OUTPUT(c) *out = (char)(c); out++;
1219
1220    if (pt < 0x80) {
1221        ADD_OUTPUT(pt);
1222    } else if (pt < 0x800) {
1223        ADD_OUTPUT((pt >> 6) | 0xC0);
1224        ADD_OUTPUT((pt & 0x3F) | 0x80);
1225    } else if (pt < 0x10000) {
1226        ADD_OUTPUT((pt >> 12) | 0xE0);
1227        ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80);
1228        ADD_OUTPUT((pt & 0x3F) | 0x80);
1229    } else {
1230        ADD_OUTPUT((pt >> 18) | 0xF0);
1231        ADD_OUTPUT(((pt >> 12) & 0x3F) | 0x80);
1232        ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80);
1233        ADD_OUTPUT((pt & 0x3F) | 0x80);
1234    }
1235    return out;
1236    #undef ADD_OUTPUT
1237}
1238
1239/* Thanks snej (https://github.com/mnunberg/jsonsl/issues/9) */
1240static int
1241jsonsl__digit2int(char ch) {
1242    int d = ch - '0';
1243    if ((unsigned) d < 10) {
1244        return d;
1245    }
1246    d = ch - 'a';
1247    if ((unsigned) d < 6) {
1248        return d + 10;
1249    }
1250    d = ch - 'A';
1251    if ((unsigned) d < 6) {
1252        return d + 10;
1253    }
1254    return -1;
1255}
1256
1257/* Assume 's' is at least 4 bytes long */
1258static int
1259jsonsl__get_uescape_16(const char *s)
1260{
1261    int ret = 0;
1262    int cur;
1263
1264    #define GET_DIGIT(off) \
1265        cur = jsonsl__digit2int(s[off]); \
1266        if (cur == -1) { return -1; } \
1267        ret |= (cur << (12 - (off * 4)));
1268
1269    GET_DIGIT(0);
1270    GET_DIGIT(1);
1271    GET_DIGIT(2);
1272    GET_DIGIT(3);
1273    #undef GET_DIGIT
1274    return ret;
1275}
1276
1277/**
1278 * Utility function to convert escape sequences
1279 */
1280JSONSL_API
1281size_t jsonsl_util_unescape_ex(const char *in,
1282                               char *out,
1283                               size_t len,
1284                               const int toEscape[128],
1285                               unsigned *oflags,
1286                               jsonsl_error_t *err,
1287                               const char **errat)
1288{
1289    const unsigned char *c = (const unsigned char*)in;
1290    char *begin_p = out;
1291    unsigned oflags_s;
1292    uint16_t last_codepoint = 0;
1293
1294    if (!oflags) {
1295        oflags = &oflags_s;
1296    }
1297    *oflags = 0;
1298
1299    #define UNESCAPE_BAIL(e,offset) \
1300        *err = JSONSL_ERROR_##e; \
1301        if (errat) { \
1302            *errat = (const char*)(c+ (ptrdiff_t)(offset)); \
1303        } \
1304        return 0;
1305
1306    for (; len; len--, c++, out++) {
1307        int uescval;
1308        if (*c != '\\') {
1309            /* Not an escape, so we don't care about this */
1310            goto GT_ASSIGN;
1311        }
1312
1313        if (len < 2) {
1314            UNESCAPE_BAIL(ESCAPE_INVALID, 0);
1315        }
1316        if (!is_allowed_escape(c[1])) {
1317            UNESCAPE_BAIL(ESCAPE_INVALID, 1)
1318        }
1319        if ((toEscape && toEscape[(unsigned char)c[1] & 0x7f] == 0 &&
1320                c[1] != '\\' && c[1] != '"')) {
1321            /* if we don't want to unescape this string, write the escape sequence to the output */
1322            *out++ = *c++;
1323            if (--len == 0)
1324                break;
1325            goto GT_ASSIGN;
1326        }
1327
1328        if (c[1] != 'u') {
1329            /* simple skip-and-replace using pre-defined maps.
1330             * TODO: should the maps actually reflect the desired
1331             * replacement character in toEscape?
1332             */
1333            char esctmp = get_escape_equiv(c[1]);
1334            if (esctmp) {
1335                /* Check if there is a corresponding replacement */
1336                *out = esctmp;
1337            } else {
1338                /* Just gobble up the 'reverse-solidus' */
1339                *out = c[1];
1340            }
1341            len--;
1342            c++;
1343            /* do not assign, just continue */
1344            continue;
1345        }
1346
1347        /* next == 'u' */
1348        if (len < 6) {
1349            /* Need at least six characters.. */
1350            UNESCAPE_BAIL(UESCAPE_TOOSHORT, 2);
1351        }
1352
1353        uescval = jsonsl__get_uescape_16((const char *)c + 2);
1354        if (uescval == -1) {
1355            UNESCAPE_BAIL(PERCENT_BADHEX, -1);
1356        } else if (uescval == 0) {
1357            UNESCAPE_BAIL(INVALID_CODEPOINT, 2);
1358        }
1359
1360        if (last_codepoint) {
1361            uint16_t w1 = last_codepoint, w2 = (uint16_t)uescval;
1362            uint32_t cp;
1363
1364            if (uescval < 0xDC00 || uescval > 0xDFFF) {
1365                UNESCAPE_BAIL(INVALID_CODEPOINT, -1);
1366            }
1367
1368            cp = (w1 & 0x3FF) << 10;
1369            cp |= (w2 & 0x3FF);
1370            cp += 0x10000;
1371
1372            out = jsonsl__writeutf8(cp, out) - 1;
1373            last_codepoint = 0;
1374
1375        } else if (uescval < 0xD800 || uescval > 0xDFFF) {
1376            *oflags |= JSONSL_SPECIALf_NONASCII;
1377            out = jsonsl__writeutf8(uescval, out) - 1;
1378
1379        } else if (uescval > 0xD7FF && uescval < 0xDC00) {
1380            *oflags |= JSONSL_SPECIALf_NONASCII;
1381            last_codepoint = (uint16_t)uescval;
1382            out--;
1383        } else {
1384            UNESCAPE_BAIL(INVALID_CODEPOINT, 2);
1385        }
1386
1387        /* Post uescape cleanup */
1388        len -= 5; /* Gobble up 5 chars after 'u' */
1389        c += 5;
1390        continue;
1391
1392        /* Only reached by previous branches */
1393        GT_ASSIGN:
1394        *out = *c;
1395    }
1396
1397    if (last_codepoint) {
1398        *err = JSONSL_ERROR_INVALID_CODEPOINT;
1399        return 0;
1400    }
1401
1402    *err = JSONSL_ERROR_SUCCESS;
1403    return out - begin_p;
1404}
1405
1406/**
1407 * Character Table definitions.
1408 * These were all generated via srcutil/genchartables.pl
1409 */
1410
1411/**
1412 * This table contains the beginnings of non-string
1413 * allowable (bareword) values.
1414 */
1415static unsigned short Special_Table[0x100] = {
1416        /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1417        /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */
1418        /* 0x2d */ JSONSL_SPECIALf_DASH /* <-> */, /* 0x2d */
1419        /* 0x2e */ 0,0, /* 0x2f */
1420        /* 0x30 */ JSONSL_SPECIALf_ZERO /* <0> */, /* 0x30 */
1421        /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* <1> */, /* 0x31 */
1422        /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* <2> */, /* 0x32 */
1423        /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* <3> */, /* 0x33 */
1424        /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* <4> */, /* 0x34 */
1425        /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* <5> */, /* 0x35 */
1426        /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* <6> */, /* 0x36 */
1427        /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* <7> */, /* 0x37 */
1428        /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* <8> */, /* 0x38 */
1429        /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* <9> */, /* 0x39 */
1430        /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x59 */
1431        /* 0x5a */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */
1432        /* 0x66 */ JSONSL_SPECIALf_FALSE /* <f> */, /* 0x66 */
1433        /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1434        /* 0x6e */ JSONSL_SPECIALf_NULL /* <n> */, /* 0x6e */
1435        /* 0x6f */ 0,0,0,0,0, /* 0x73 */
1436        /* 0x74 */ JSONSL_SPECIALf_TRUE /* <t> */, /* 0x74 */
1437        /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1438        /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1439        /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1440        /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1441        /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0, /* 0xfe */
1442};
1443
1444/**
1445 * Contains characters which signal the termination of any of the 'special' bareword
1446 * values.
1447 */
1448static int Special_Endings[0x100] = {
1449        /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1450        /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1451        /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1452        /* 0x0b */ 0,0, /* 0x0c */
1453        /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1454        /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1455        /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1456        /* 0x21 */ 0, /* 0x21 */
1457        /* 0x22 */ 1 /* " */, /* 0x22 */
1458        /* 0x23 */ 0,0,0,0,0,0,0,0,0, /* 0x2b */
1459        /* 0x2c */ 1 /* , */, /* 0x2c */
1460        /* 0x2d */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x39 */
1461        /* 0x3a */ 1 /* : */, /* 0x3a */
1462        /* 0x3b */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5a */
1463        /* 0x5b */ 1 /* [ */, /* 0x5b */
1464        /* 0x5c */ 1 /* \ */, /* 0x5c */
1465        /* 0x5d */ 1 /* ] */, /* 0x5d */
1466        /* 0x5e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7a */
1467        /* 0x7b */ 1 /* { */, /* 0x7b */
1468        /* 0x7c */ 0, /* 0x7c */
1469        /* 0x7d */ 1 /* } */, /* 0x7d */
1470        /* 0x7e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9d */
1471        /* 0x9e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbd */
1472        /* 0xbe */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdd */
1473        /* 0xde */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfd */
1474        /* 0xfe */ 0 /* 0xfe */
1475};
1476
1477/**
1478 * This table contains entries for the allowed whitespace as per RFC 4627
1479 */
1480static int Allowed_Whitespace[0x100] = {
1481        /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1482        /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1483        /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1484        /* 0x0b */ 0,0, /* 0x0c */
1485        /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1486        /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1487        /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1488        /* 0x21 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x40 */
1489        /* 0x41 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x60 */
1490        /* 0x61 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80 */
1491        /* 0x81 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0 */
1492        /* 0xa1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xc0 */
1493        /* 0xc1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xe0 */
1494        /* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1495};
1496
1497static const int String_No_Passthrough[0x100] = {
1498        /* 0x00 */ 1 /* <NUL> */, /* 0x00 */
1499        /* 0x01 */ 1 /* <SOH> */, /* 0x01 */
1500        /* 0x02 */ 1 /* <STX> */, /* 0x02 */
1501        /* 0x03 */ 1 /* <ETX> */, /* 0x03 */
1502        /* 0x04 */ 1 /* <EOT> */, /* 0x04 */
1503        /* 0x05 */ 1 /* <ENQ> */, /* 0x05 */
1504        /* 0x06 */ 1 /* <ACK> */, /* 0x06 */
1505        /* 0x07 */ 1 /* <BEL> */, /* 0x07 */
1506        /* 0x08 */ 1 /* <BS> */, /* 0x08 */
1507        /* 0x09 */ 1 /* <HT> */, /* 0x09 */
1508        /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1509        /* 0x0b */ 1 /* <VT> */, /* 0x0b */
1510        /* 0x0c */ 1 /* <FF> */, /* 0x0c */
1511        /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1512        /* 0x0e */ 1 /* <SO> */, /* 0x0e */
1513        /* 0x0f */ 1 /* <SI> */, /* 0x0f */
1514        /* 0x10 */ 1 /* <DLE> */, /* 0x10 */
1515        /* 0x11 */ 1 /* <DC1> */, /* 0x11 */
1516        /* 0x12 */ 1 /* <DC2> */, /* 0x12 */
1517        /* 0x13 */ 1 /* <DC3> */, /* 0x13 */
1518        /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */
1519        /* 0x22 */ 1 /* <"> */, /* 0x22 */
1520        /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */
1521        /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
1522        /* 0x5c */ 1 /* <\> */, /* 0x5c */
1523        /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */
1524        /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */
1525        /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */
1526        /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */
1527        /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */
1528        /* 0xfd */ 0,0, /* 0xfe */
1529};
1530
1531/**
1532 * Allowable two-character 'common' escapes:
1533 */
1534static int Allowed_Escapes[0x100] = {
1535        /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1536        /* 0x20 */ 0,0, /* 0x21 */
1537        /* 0x22 */ 1 /* <"> */, /* 0x22 */
1538        /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */
1539        /* 0x2f */ 1 /* </> */, /* 0x2f */
1540        /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */
1541        /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
1542        /* 0x5c */ 1 /* <\> */, /* 0x5c */
1543        /* 0x5d */ 0,0,0,0,0, /* 0x61 */
1544        /* 0x62 */ 1 /* <b> */, /* 0x62 */
1545        /* 0x63 */ 0,0,0, /* 0x65 */
1546        /* 0x66 */ 1 /* <f> */, /* 0x66 */
1547        /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1548        /* 0x6e */ 1 /* <n> */, /* 0x6e */
1549        /* 0x6f */ 0,0,0, /* 0x71 */
1550        /* 0x72 */ 1 /* <r> */, /* 0x72 */
1551        /* 0x73 */ 0, /* 0x73 */
1552        /* 0x74 */ 1 /* <t> */, /* 0x74 */
1553        /* 0x75 */ 1 /* <u> */, /* 0x75 */
1554        /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */
1555        /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */
1556        /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */
1557        /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */
1558        /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */
1559};
1560
1561/**
1562 * This table contains the _values_ for a given (single) escaped character.
1563 */
1564static unsigned char Escape_Equivs[0x100] = {
1565        /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1566        /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */
1567        /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */
1568        /* 0x60 */ 0,0, /* 0x61 */
1569        /* 0x62 */ 8 /* <b> */, /* 0x62 */
1570        /* 0x63 */ 0,0,0, /* 0x65 */
1571        /* 0x66 */ 12 /* <f> */, /* 0x66 */
1572        /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1573        /* 0x6e */ 10 /* <n> */, /* 0x6e */
1574        /* 0x6f */ 0,0,0, /* 0x71 */
1575        /* 0x72 */ 13 /* <r> */, /* 0x72 */
1576        /* 0x73 */ 0, /* 0x73 */
1577        /* 0x74 */ 9 /* <t> */, /* 0x74 */
1578        /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1579        /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1580        /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1581        /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1582        /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1583};
1584
1585/* Definitions of above-declared static functions */
1586static char get_escape_equiv(unsigned c) {
1587    return Escape_Equivs[c & 0xff];
1588}
1589static unsigned extract_special(unsigned c) {
1590    return Special_Table[c & 0xff];
1591}
1592static int is_special_end(unsigned c) {
1593    return Special_Endings[c & 0xff];
1594}
1595static int is_allowed_whitespace(unsigned c) {
1596    return c == ' ' || Allowed_Whitespace[c & 0xff];
1597}
1598static int is_allowed_escape(unsigned c) {
1599    return Allowed_Escapes[c & 0xff];
1600}
1601static int is_simple_char(unsigned c) {
1602    return !String_No_Passthrough[c & 0xff];
1603}
1604
1605/* Clean up all our macros! */
1606#undef INCR_METRIC
1607#undef INCR_GENERIC
1608#undef INCR_STRINGY_CATCH
1609#undef CASE_DIGITS
1610#undef INVOKE_ERROR
1611#undef STACK_PUSH
1612#undef STACK_POP_NOPOS
1613#undef STACK_POP
1614#undef CALLBACK_AND_POP_NOPOS
1615#undef CALLBACK_AND_POP
1616#undef SPECIAL_POP
1617#undef CUR_CHAR
1618#undef DO_CALLBACK
1619#undef ENSURE_HVAL
1620#undef VERIFY_SPECIAL
1621#undef STATE_SPECIAL_LENGTH
1622#undef IS_NORMAL_NUMBER
1623#undef STATE_NUM_LAST
1624#undef FASTPARSE_EXHAUSTED
1625#undef FASTPARSE_BREAK
1626