1 /* Copyright (C) 2012-2015 Mark Nunberg.
2  *
3  * See included LICENSE file for license details.
4  */
5 
6 #include "jsonsl.h"
7 #include <assert.h>
8 #include <limits.h>
9 #include <ctype.h>
10 
11 #ifdef JSONSL_USE_METRICS
12 #define XMETRICS \
13     X(STRINGY_INSIGNIFICANT) \
14     X(STRINGY_SLOWPATH) \
15     X(ALLOWED_WHITESPACE) \
16     X(QUOTE_FASTPATH) \
17     X(SPECIAL_FASTPATH) \
18     X(SPECIAL_WSPOP) \
19     X(SPECIAL_SLOWPATH) \
20     X(GENERIC) \
21     X(STRUCTURAL_TOKEN) \
22     X(SPECIAL_SWITCHFIRST) \
23     X(STRINGY_CATCH) \
24     X(NUMBER_FASTPATH) \
25     X(ESCAPES) \
26     X(TOTAL) \
27 
28 struct jsonsl_metrics_st {
29 #define X(m) \
30     unsigned long metric_##m;
31     XMETRICS
32 #undef X
33 };
34 
35 static struct jsonsl_metrics_st GlobalMetrics = { 0 };
36 static unsigned long GenericCounter[0x100] = { 0 };
37 static unsigned long StringyCatchCounter[0x100] = { 0 };
38 
39 #define INCR_METRIC(m) \
40     GlobalMetrics.metric_##m++;
41 
42 #define INCR_GENERIC(c) \
43         INCR_METRIC(GENERIC); \
44         GenericCounter[c]++; \
45 
46 #define INCR_STRINGY_CATCH(c) \
47     INCR_METRIC(STRINGY_CATCH); \
48     StringyCatchCounter[c]++;
49 
50 JSONSL_API
jsonsl_dump_global_metrics(void)51 void jsonsl_dump_global_metrics(void)
52 {
53     int ii;
54     printf("JSONSL Metrics:\n");
55 #define X(m) \
56     printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \
57            (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100);
58     XMETRICS
59 #undef X
60     printf("Generic Characters:\n");
61     for (ii = 0; ii < 0xff; ii++) {
62         if (GenericCounter[ii]) {
63             printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]);
64         }
65     }
66     printf("Weird string loop\n");
67     for (ii = 0; ii < 0xff; ii++) {
68         if (StringyCatchCounter[ii]) {
69             printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]);
70         }
71     }
72 }
73 
74 #else
75 #define INCR_METRIC(m)
76 #define INCR_GENERIC(c)
77 #define INCR_STRINGY_CATCH(c)
78 JSONSL_API
jsonsl_dump_global_metrics(void)79 void jsonsl_dump_global_metrics(void) { }
80 #endif /* JSONSL_USE_METRICS */
81 
82 #define CASE_DIGITS \
83 case '1': \
84 case '2': \
85 case '3': \
86 case '4': \
87 case '5': \
88 case '6': \
89 case '7': \
90 case '8': \
91 case '9': \
92 case '0':
93 
94 static unsigned extract_special(unsigned);
95 static int is_special_end(unsigned);
96 static int is_allowed_whitespace(unsigned);
97 static int is_allowed_escape(unsigned);
98 static int is_simple_char(unsigned);
99 static char get_escape_equiv(unsigned);
100 
101 JSONSL_API
jsonsl_new(int nlevels)102 jsonsl_t jsonsl_new(int nlevels)
103 {
104     struct jsonsl_st *jsn = (struct jsonsl_st *)
105             calloc(1, sizeof (*jsn) +
106                     ( (nlevels-1) * sizeof (struct jsonsl_state_st) )
107             );
108 
109     jsn->levels_max = nlevels;
110     jsn->max_callback_level = -1;
111     jsonsl_reset(jsn);
112     return jsn;
113 }
114 
115 JSONSL_API
jsonsl_reset(jsonsl_t jsn)116 void jsonsl_reset(jsonsl_t jsn)
117 {
118     unsigned int ii;
119     jsn->tok_last = 0;
120     jsn->can_insert = 1;
121     jsn->pos = 0;
122     jsn->level = 0;
123     jsn->stopfl = 0;
124     jsn->in_escape = 0;
125     jsn->expecting = 0;
126 
127     memset(jsn->stack, 0, (jsn->levels_max * sizeof (struct jsonsl_state_st)));
128 
129     for (ii = 0; ii < jsn->levels_max; ii++) {
130         jsn->stack[ii].level = ii;
131     }
132 }
133 
134 JSONSL_API
jsonsl_destroy(jsonsl_t jsn)135 void jsonsl_destroy(jsonsl_t jsn)
136 {
137     if (jsn) {
138         free(jsn);
139     }
140 }
141 
142 
143 #define FASTPARSE_EXHAUSTED 1
144 #define FASTPARSE_BREAK 0
145 
146 /*
147  * This function is meant to accelerate string parsing, reducing the main loop's
148  * check if we are indeed a string.
149  *
150  * @param jsn the parser
151  * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position)
152  * @param[in,out] nbytes_p A pointer to the current size of the buffer
153  * @return true if all bytes have been exhausted (and thus the main loop can
154  * return), false if a special character was examined which requires greater
155  * examination.
156  */
157 static int
jsonsl__str_fastparse(jsonsl_t jsn, const jsonsl_uchar_t **bytes_p, size_t *nbytes_p)158 jsonsl__str_fastparse(jsonsl_t jsn,
159                       const jsonsl_uchar_t **bytes_p, size_t *nbytes_p)
160 {
161     const jsonsl_uchar_t *bytes = *bytes_p;
162     const jsonsl_uchar_t *end;
163     for (end = bytes + *nbytes_p; bytes != end; bytes++) {
164         if (
165 #ifdef JSONSL_USE_WCHAR
166                 *bytes >= 0x100 ||
167 #endif /* JSONSL_USE_WCHAR */
168                 (is_simple_char(*bytes))) {
169             INCR_METRIC(TOTAL);
170             INCR_METRIC(STRINGY_INSIGNIFICANT);
171         } else {
172             /* Once we're done here, re-calculate the position variables */
173             jsn->pos += (bytes - *bytes_p);
174             *nbytes_p -= (bytes - *bytes_p);
175             *bytes_p = bytes;
176             return FASTPARSE_BREAK;
177         }
178     }
179 
180     /* Once we're done here, re-calculate the position variables */
181     jsn->pos += (bytes - *bytes_p);
182     return FASTPARSE_EXHAUSTED;
183 }
184 
185 /* Functions exactly like str_fastparse, except it also accepts a 'state'
186  * argument, since the number's value is updated in the state. */
187 static int
jsonsl__num_fastparse(jsonsl_t jsn, const jsonsl_uchar_t **bytes_p, size_t *nbytes_p, struct jsonsl_state_st *state)188 jsonsl__num_fastparse(jsonsl_t jsn,
189                       const jsonsl_uchar_t **bytes_p, size_t *nbytes_p,
190                       struct jsonsl_state_st *state)
191 {
192     int exhausted = 1;
193     size_t nbytes = *nbytes_p;
194     const jsonsl_uchar_t *bytes = *bytes_p;
195 
196     for (; nbytes; nbytes--, bytes++) {
197         jsonsl_uchar_t c = *bytes;
198         if (isdigit(c)) {
199             INCR_METRIC(TOTAL);
200             INCR_METRIC(NUMBER_FASTPATH);
201             state->nelem = (state->nelem * 10) + (c - 0x30);
202         } else {
203             exhausted = 0;
204             break;
205         }
206     }
207     jsn->pos += (*nbytes_p - nbytes);
208     if (exhausted) {
209         return FASTPARSE_EXHAUSTED;
210     }
211     *nbytes_p = nbytes;
212     *bytes_p = bytes;
213     return FASTPARSE_BREAK;
214 }
215 
216 JSONSL_API
217 void
jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)218 jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
219 {
220 
221 #define INVOKE_ERROR(eb) \
222     if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \
223         goto GT_AGAIN; \
224     } \
225     return;
226 
227 #define STACK_PUSH \
228     if (jsn->level >= (levels_max-1)) { \
229         jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \
230         return; \
231     } \
232     state = jsn->stack + (++jsn->level); \
233     state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \
234     state->pos_begin = jsn->pos;
235 
236 #define STACK_POP_NOPOS \
237     state->pos_cur = jsn->pos; \
238     state = jsn->stack + (--jsn->level);
239 
240 
241 #define STACK_POP \
242     STACK_POP_NOPOS; \
243     state->pos_cur = jsn->pos;
244 
245 #define CALLBACK_AND_POP_NOPOS(T) \
246         state->pos_cur = jsn->pos; \
247         DO_CALLBACK(T, POP); \
248         state->nescapes = 0; \
249         state = jsn->stack + (--jsn->level);
250 
251 #define CALLBACK_AND_POP(T) \
252         CALLBACK_AND_POP_NOPOS(T); \
253         state->pos_cur = jsn->pos;
254 
255 #define SPECIAL_POP \
256     CALLBACK_AND_POP(SPECIAL); \
257     jsn->expecting = 0; \
258     jsn->tok_last = 0; \
259 
260 #define CUR_CHAR (*(jsonsl_uchar_t*)c)
261 
262 #define DO_CALLBACK(T, action) \
263     if (jsn->call_##T && \
264             jsn->max_callback_level > state->level && \
265             state->ignore_callback == 0) { \
266         \
267         if (jsn->action_callback_##action) { \
268             jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
269         } else if (jsn->action_callback) { \
270             jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
271         } \
272         if (jsn->stopfl) { return; } \
273     }
274 
275     /**
276      * Verifies that we are able to insert the (non-string) item into a hash.
277      */
278 #define ENSURE_HVAL \
279     if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \
280         INVOKE_ERROR(HKEY_EXPECTED); \
281     }
282 
283 #define VERIFY_SPECIAL(lit) \
284         if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \
285             INVOKE_ERROR(SPECIAL_EXPECTED); \
286         }
287 
288 #define STATE_SPECIAL_LENGTH \
289     (state)->nescapes
290 
291 #define IS_NORMAL_NUMBER \
292     ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \
293         (state)->special_flags == JSONSL_SPECIALf_SIGNED)
294 
295 #define STATE_NUM_LAST jsn->tok_last
296 
297 #define CONTINUE_NEXT_CHAR() continue
298 
299     const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes;
300     size_t levels_max = jsn->levels_max;
301     struct jsonsl_state_st *state = jsn->stack + jsn->level;
302     jsn->base = bytes;
303 
304     for (; nbytes; nbytes--, jsn->pos++, c++) {
305         unsigned state_type;
306         INCR_METRIC(TOTAL);
307 
308         GT_AGAIN:
309         state_type = state->type;
310         /* Most common type is typically a string: */
311         if (state_type & JSONSL_Tf_STRINGY) {
312             /* Special escape handling for some stuff */
313             if (jsn->in_escape) {
314                 jsn->in_escape = 0;
315                 if (!is_allowed_escape(CUR_CHAR)) {
316                     INVOKE_ERROR(ESCAPE_INVALID);
317                 } else if (CUR_CHAR == 'u') {
318                     DO_CALLBACK(UESCAPE, UESCAPE);
319                     if (jsn->return_UESCAPE) {
320                         return;
321                     }
322                 }
323                 CONTINUE_NEXT_CHAR();
324             }
325 
326             if (jsonsl__str_fastparse(jsn, &c, &nbytes) ==
327                     FASTPARSE_EXHAUSTED) {
328                 /* No need to readjust variables as we've exhausted the iterator */
329                 return;
330             } else {
331                 if (CUR_CHAR == '"') {
332                     goto GT_QUOTE;
333                 } else if (CUR_CHAR == '\\') {
334                     goto GT_ESCAPE;
335                 } else {
336                     INVOKE_ERROR(WEIRD_WHITESPACE);
337                 }
338             }
339             INCR_METRIC(STRINGY_SLOWPATH);
340 
341         } else if (state_type == JSONSL_T_SPECIAL) {
342             /* Fast track for signed/unsigned */
343             if (IS_NORMAL_NUMBER) {
344                 if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) ==
345                         FASTPARSE_EXHAUSTED) {
346                     return;
347                 } else {
348                     goto GT_SPECIAL_NUMERIC;
349                 }
350             } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
351                 if (!isdigit(CUR_CHAR)) {
352                     INVOKE_ERROR(INVALID_NUMBER);
353                 }
354 
355                 if (CUR_CHAR == '0') {
356                     state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED;
357                 } else if (isdigit(CUR_CHAR)) {
358                     state->special_flags = JSONSL_SPECIALf_SIGNED;
359                     state->nelem = CUR_CHAR - 0x30;
360                 } else {
361                     INVOKE_ERROR(INVALID_NUMBER);
362                 }
363                 CONTINUE_NEXT_CHAR();
364 
365             } else if (state->special_flags == JSONSL_SPECIALf_ZERO) {
366                 if (isdigit(CUR_CHAR)) {
367                     /* Following a zero! */
368                     INVOKE_ERROR(INVALID_NUMBER);
369                 }
370                 /* Unset the 'zero' flag: */
371                 if (state->special_flags & JSONSL_SPECIALf_SIGNED) {
372                     state->special_flags = JSONSL_SPECIALf_SIGNED;
373                 } else {
374                     state->special_flags = JSONSL_SPECIALf_UNSIGNED;
375                 }
376                 goto GT_SPECIAL_NUMERIC;
377             }
378 
379             if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
380                 GT_SPECIAL_NUMERIC:
381                 switch (CUR_CHAR) {
382                 CASE_DIGITS
383                     STATE_NUM_LAST = '1';
384                     CONTINUE_NEXT_CHAR();
385 
386                 case '.':
387                     if (state->special_flags & JSONSL_SPECIALf_FLOAT) {
388                         INVOKE_ERROR(INVALID_NUMBER);
389                     }
390                     state->special_flags |= JSONSL_SPECIALf_FLOAT;
391                     STATE_NUM_LAST = '.';
392                     CONTINUE_NEXT_CHAR();
393 
394                 case 'e':
395                 case 'E':
396                     if (state->special_flags & JSONSL_SPECIALf_EXPONENT) {
397                         INVOKE_ERROR(INVALID_NUMBER);
398                     }
399                     state->special_flags |= JSONSL_SPECIALf_EXPONENT;
400                     STATE_NUM_LAST = 'e';
401                     CONTINUE_NEXT_CHAR();
402 
403                 case '-':
404                 case '+':
405                     if (STATE_NUM_LAST != 'e') {
406                         INVOKE_ERROR(INVALID_NUMBER);
407                     }
408                     STATE_NUM_LAST = '-';
409                     CONTINUE_NEXT_CHAR();
410 
411                 default:
412                     if (is_special_end(CUR_CHAR)) {
413                         goto GT_SPECIAL_POP;
414                     }
415                     INVOKE_ERROR(INVALID_NUMBER);
416                     break;
417                 }
418             }
419             /* else if (!NUMERIC) */
420             if (!is_special_end(CUR_CHAR)) {
421                 STATE_SPECIAL_LENGTH++;
422 
423                 /* Verify TRUE, FALSE, NULL */
424                 if (state->special_flags == JSONSL_SPECIALf_TRUE) {
425                     VERIFY_SPECIAL("true");
426                 } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
427                     VERIFY_SPECIAL("false");
428                 } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
429                     VERIFY_SPECIAL("null");
430                 }
431                 INCR_METRIC(SPECIAL_FASTPATH);
432                 CONTINUE_NEXT_CHAR();
433             }
434 
435             GT_SPECIAL_POP:
436             if (IS_NORMAL_NUMBER) {
437                 /* Nothing */
438             } else if (state->special_flags == JSONSL_SPECIALf_ZERO ||
439                     state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) {
440                 /* 0 is unsigned! */
441                 state->special_flags = JSONSL_SPECIALf_UNSIGNED;
442             } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
443                 /* Still in dash! */
444                 INVOKE_ERROR(INVALID_NUMBER);
445             } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
446                 /* Check that we're not at the end of a token */
447                 if (STATE_NUM_LAST != '1') {
448                     INVOKE_ERROR(INVALID_NUMBER);
449                 }
450             } else if (state->special_flags == JSONSL_SPECIALf_TRUE) {
451                 if (STATE_SPECIAL_LENGTH != 4) {
452                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
453                 }
454                 state->nelem = 1;
455             } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
456                 if (STATE_SPECIAL_LENGTH != 5) {
457                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
458                 }
459             } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
460                 if (STATE_SPECIAL_LENGTH != 4) {
461                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
462                 }
463             }
464             SPECIAL_POP;
465             jsn->expecting = ',';
466             if (is_allowed_whitespace(CUR_CHAR)) {
467                 CONTINUE_NEXT_CHAR();
468             }
469             /**
470              * This works because we have a non-whitespace token
471              * which is not a special token. If this is a structural
472              * character then it will be gracefully handled by the
473              * switch statement. Otherwise it will default to the 'special'
474              * state again,
475              */
476             goto GT_STRUCTURAL_TOKEN;
477         } else if (is_allowed_whitespace(CUR_CHAR)) {
478             INCR_METRIC(ALLOWED_WHITESPACE);
479             /* So we're not special. Harmless insignificant whitespace
480              * passthrough
481              */
482             CONTINUE_NEXT_CHAR();
483         } else if (extract_special(CUR_CHAR)) {
484             /* not a string, whitespace, or structural token. must be special */
485             goto GT_SPECIAL_BEGIN;
486         }
487 
488         INCR_GENERIC(CUR_CHAR);
489 
490         if (CUR_CHAR == '"') {
491             GT_QUOTE:
492             jsn->can_insert = 0;
493             switch (state_type) {
494 
495             /* the end of a string or hash key */
496             case JSONSL_T_STRING:
497                 CALLBACK_AND_POP(STRING);
498                 CONTINUE_NEXT_CHAR();
499             case JSONSL_T_HKEY:
500                 CALLBACK_AND_POP(HKEY);
501                 CONTINUE_NEXT_CHAR();
502 
503             case JSONSL_T_OBJECT:
504                 state->nelem++;
505                 if ( (state->nelem-1) % 2 ) {
506                     /* Odd, this must be a hash value */
507                     if (jsn->tok_last != ':') {
508                         INVOKE_ERROR(MISSING_TOKEN);
509                     }
510                     jsn->expecting = ','; /* Can't figure out what to expect next */
511                     jsn->tok_last = 0;
512 
513                     STACK_PUSH;
514                     state->type = JSONSL_T_STRING;
515                     DO_CALLBACK(STRING, PUSH);
516 
517                 } else {
518                     /* hash key */
519                     if (jsn->expecting != '"') {
520                         INVOKE_ERROR(STRAY_TOKEN);
521                     }
522                     jsn->tok_last = 0;
523                     jsn->expecting = ':';
524 
525                     STACK_PUSH;
526                     state->type = JSONSL_T_HKEY;
527                     DO_CALLBACK(HKEY, PUSH);
528                 }
529                 CONTINUE_NEXT_CHAR();
530 
531             case JSONSL_T_LIST:
532                 state->nelem++;
533                 STACK_PUSH;
534                 state->type = JSONSL_T_STRING;
535                 jsn->expecting = ',';
536                 jsn->tok_last = 0;
537                 DO_CALLBACK(STRING, PUSH);
538                 CONTINUE_NEXT_CHAR();
539 
540             case JSONSL_T_SPECIAL:
541                 INVOKE_ERROR(STRAY_TOKEN);
542                 break;
543 
544             default:
545                 INVOKE_ERROR(STRING_OUTSIDE_CONTAINER);
546                 break;
547             } /* switch(state->type) */
548         } else if (CUR_CHAR == '\\') {
549             GT_ESCAPE:
550             INCR_METRIC(ESCAPES);
551         /* Escape */
552             if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) {
553                 INVOKE_ERROR(ESCAPE_OUTSIDE_STRING);
554             }
555             state->nescapes++;
556             jsn->in_escape = 1;
557             CONTINUE_NEXT_CHAR();
558         } /* " or \ */
559 
560         GT_STRUCTURAL_TOKEN:
561         switch (CUR_CHAR) {
562         case ':':
563             INCR_METRIC(STRUCTURAL_TOKEN);
564             if (jsn->expecting != CUR_CHAR) {
565                 INVOKE_ERROR(STRAY_TOKEN);
566             }
567             jsn->tok_last = ':';
568             jsn->can_insert = 1;
569             jsn->expecting = '"';
570             CONTINUE_NEXT_CHAR();
571 
572         case ',':
573             INCR_METRIC(STRUCTURAL_TOKEN);
574             /**
575              * The comma is one of the more generic tokens.
576              * In the context of an OBJECT, the can_insert flag
577              * should never be set, and no other action is
578              * necessary.
579              */
580             if (jsn->expecting != CUR_CHAR) {
581                 /* make this branch execute only when we haven't manually
582                  * just placed the ',' in the expecting register.
583                  */
584                 INVOKE_ERROR(STRAY_TOKEN);
585             }
586 
587             if (state->type == JSONSL_T_OBJECT) {
588                 /* end of hash value, expect a string as a hash key */
589                 jsn->expecting = '"';
590             } else {
591                 jsn->can_insert = 1;
592             }
593 
594             jsn->tok_last = ',';
595             jsn->expecting = '"';
596             CONTINUE_NEXT_CHAR();
597 
598             /* new list or object */
599             /* hashes are more common */
600         case '{':
601         case '[':
602             INCR_METRIC(STRUCTURAL_TOKEN);
603             if (!jsn->can_insert) {
604                 INVOKE_ERROR(CANT_INSERT);
605             }
606 
607             ENSURE_HVAL;
608             state->nelem++;
609 
610             STACK_PUSH;
611             /* because the constants match the opening delimiters, we can do this: */
612             state->type = CUR_CHAR;
613             state->nelem = 0;
614             jsn->can_insert = 1;
615             if (CUR_CHAR == '{') {
616                 /* If we're a hash, we expect a key first, which is quouted */
617                 jsn->expecting = '"';
618             }
619             if (CUR_CHAR == JSONSL_T_OBJECT) {
620                 DO_CALLBACK(OBJECT, PUSH);
621             } else {
622                 DO_CALLBACK(LIST, PUSH);
623             }
624             jsn->tok_last = 0;
625             CONTINUE_NEXT_CHAR();
626 
627             /* closing of list or object */
628         case '}':
629         case ']':
630             INCR_METRIC(STRUCTURAL_TOKEN);
631             if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) {
632                 INVOKE_ERROR(TRAILING_COMMA);
633             }
634 
635             jsn->can_insert = 0;
636             jsn->level--;
637             jsn->expecting = ',';
638             jsn->tok_last = 0;
639             if (CUR_CHAR == ']') {
640                 if (state->type != '[') {
641                     INVOKE_ERROR(BRACKET_MISMATCH);
642                 }
643                 DO_CALLBACK(LIST, POP);
644             } else {
645                 if (state->type != '{') {
646                     INVOKE_ERROR(BRACKET_MISMATCH);
647                 } else if (state->nelem && state->nelem % 2 != 0) {
648                     INVOKE_ERROR(VALUE_EXPECTED);
649                 }
650                 DO_CALLBACK(OBJECT, POP);
651             }
652             state = jsn->stack + jsn->level;
653             state->pos_cur = jsn->pos;
654             CONTINUE_NEXT_CHAR();
655 
656         default:
657             GT_SPECIAL_BEGIN:
658             /**
659              * Not a string, not a structural token, and not benign whitespace.
660              * Technically we should iterate over the character always, but since
661              * we are not doing full numerical/value decoding anyway (but only hinting),
662              * we only check upon entry.
663              */
664             if (state->type != JSONSL_T_SPECIAL) {
665                 int special_flags = extract_special(CUR_CHAR);
666                 if (!special_flags) {
667                     /**
668                      * Try to do some heuristics here anyway to figure out what kind of
669                      * error this is. The 'special' case is a fallback scenario anyway.
670                      */
671                     if (CUR_CHAR == '\0') {
672                         INVOKE_ERROR(FOUND_NULL_BYTE);
673                     } else if (CUR_CHAR < 0x20) {
674                         INVOKE_ERROR(WEIRD_WHITESPACE);
675                     } else {
676                         INVOKE_ERROR(SPECIAL_EXPECTED);
677                     }
678                 }
679                 ENSURE_HVAL;
680                 state->nelem++;
681                 if (!jsn->can_insert) {
682                     INVOKE_ERROR(CANT_INSERT);
683                 }
684                 STACK_PUSH;
685                 state->type = JSONSL_T_SPECIAL;
686                 state->special_flags = special_flags;
687                 STATE_SPECIAL_LENGTH = 1;
688 
689                 if (special_flags == JSONSL_SPECIALf_UNSIGNED) {
690                     state->nelem = CUR_CHAR - 0x30;
691                     STATE_NUM_LAST = '1';
692                 } else {
693                     STATE_NUM_LAST = '-';
694                     state->nelem = 0;
695                 }
696                 DO_CALLBACK(SPECIAL, PUSH);
697             }
698             CONTINUE_NEXT_CHAR();
699         }
700     }
701 }
702 
703 JSONSL_API
jsonsl_strerror(jsonsl_error_t err)704 const char* jsonsl_strerror(jsonsl_error_t err)
705 {
706     if (err == JSONSL_ERROR_SUCCESS) {
707         return "SUCCESS";
708     }
709 #define X(t) \
710     if (err == JSONSL_ERROR_##t) \
711         return #t;
712     JSONSL_XERR;
713 #undef X
714     return "<UNKNOWN_ERROR>";
715 }
716 
717 JSONSL_API
jsonsl_strtype(jsonsl_type_t type)718 const char *jsonsl_strtype(jsonsl_type_t type)
719 {
720 #define X(o,c) \
721     if (type == JSONSL_T_##o) \
722         return #o;
723     JSONSL_XTYPE
724 #undef X
725     return "UNKNOWN TYPE";
726 
727 }
728 
729 /*
730  *
731  * JPR/JSONPointer functions
732  *
733  *
734  */
735 #ifndef JSONSL_NO_JPR
736 static
737 jsonsl_jpr_type_t
populate_component(char *in, struct jsonsl_jpr_component_st *component, char **next, jsonsl_error_t *errp)738 populate_component(char *in,
739                    struct jsonsl_jpr_component_st *component,
740                    char **next,
741                    jsonsl_error_t *errp)
742 {
743     unsigned long pctval;
744     char *c = NULL, *outp = NULL, *end = NULL;
745     size_t input_len;
746     jsonsl_jpr_type_t ret = JSONSL_PATH_NONE;
747 
748     if (*next == NULL || *(*next) == '\0') {
749         return JSONSL_PATH_NONE;
750     }
751 
752     /* Replace the next / with a NULL */
753     *next = strstr(in, "/");
754     if (*next != NULL) {
755         *(*next) = '\0'; /* drop the forward slash */
756         input_len = *next - in;
757         end = *next;
758         *next += 1; /* next character after the '/' */
759     } else {
760         input_len = strlen(in);
761         end = in + input_len + 1;
762     }
763 
764     component->pstr = in;
765 
766     /* Check for special components of interest */
767     if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) {
768         /* Lone wildcard */
769         ret = JSONSL_PATH_WILDCARD;
770         goto GT_RET;
771     } else if (isdigit(*in)) {
772         /* ASCII Numeric */
773         char *endptr;
774         component->idx = strtoul(in, &endptr, 10);
775         if (endptr && *endptr == '\0') {
776             ret = JSONSL_PATH_NUMERIC;
777             goto GT_RET;
778         }
779     }
780 
781     /* Default, it's a string */
782     ret = JSONSL_PATH_STRING;
783     for (c = outp = in; c < end; c++, outp++) {
784         char origc;
785         if (*c != '%') {
786             goto GT_ASSIGN;
787         }
788         /*
789          * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' }
790          */
791 
792         /* Need %XX */
793         if (c+2 >= end) {
794             *errp = JSONSL_ERROR_PERCENT_BADHEX;
795             return JSONSL_PATH_INVALID;
796         }
797         if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) {
798             *errp = JSONSL_ERROR_PERCENT_BADHEX;
799             return JSONSL_PATH_INVALID;
800         }
801 
802         /* Temporarily null-terminate the characters */
803         origc = *(c+3);
804         *(c+3) = '\0';
805         pctval = strtoul(c+1, NULL, 16);
806         *(c+3) = origc;
807 
808         *outp = (char) pctval;
809         c += 2;
810         continue;
811 
812         GT_ASSIGN:
813         *outp = *c;
814     }
815     /* Null-terminate the string */
816     for (; outp < c; outp++) {
817         *outp = '\0';
818     }
819 
820     GT_RET:
821     component->ptype = ret;
822     if (ret != JSONSL_PATH_WILDCARD) {
823         component->len = strlen(component->pstr);
824     }
825     return ret;
826 }
827 
828 JSONSL_API
829 jsonsl_jpr_t
jsonsl_jpr_new(const char *path, jsonsl_error_t *errp)830 jsonsl_jpr_new(const char *path, jsonsl_error_t *errp)
831 {
832     char *my_copy = NULL;
833     int count, curidx;
834     struct jsonsl_jpr_st *ret = NULL;
835     struct jsonsl_jpr_component_st *components = NULL;
836     size_t origlen;
837     jsonsl_error_t errstacked;
838 
839 #define JPR_BAIL(err) *errp = err; goto GT_ERROR;
840 
841     if (errp == NULL) {
842         errp = &errstacked;
843     }
844 
845     if (path == NULL || *path != '/') {
846         JPR_BAIL(JSONSL_ERROR_JPR_NOROOT);
847         return NULL;
848     }
849 
850     count = 1;
851     path++;
852     {
853         const char *c = path;
854         for (; *c; c++) {
855             if (*c == '/') {
856                 count++;
857                 if (*(c+1) == '/') {
858                     JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH);
859                 }
860             }
861         }
862     }
863     if(*path) {
864         count++;
865     }
866 
867     components = (struct jsonsl_jpr_component_st *)
868             malloc(sizeof(*components) * count);
869     if (!components) {
870         JPR_BAIL(JSONSL_ERROR_ENOMEM);
871     }
872 
873     my_copy = (char *)malloc(strlen(path) + 1);
874     if (!my_copy) {
875         JPR_BAIL(JSONSL_ERROR_ENOMEM);
876     }
877 
878     strcpy(my_copy, path);
879 
880     components[0].ptype = JSONSL_PATH_ROOT;
881 
882     if (*my_copy) {
883         char *cur = my_copy;
884         int pathret = JSONSL_PATH_STRING;
885         curidx = 1;
886         while (pathret > 0 && curidx < count) {
887             pathret = populate_component(cur, components + curidx, &cur, errp);
888             if (pathret > 0) {
889                 curidx++;
890             } else {
891                 break;
892             }
893         }
894 
895         if (pathret == JSONSL_PATH_INVALID) {
896             JPR_BAIL(JSONSL_ERROR_JPR_BADPATH);
897         }
898     } else {
899         curidx = 1;
900     }
901 
902     path--; /*revert path to leading '/' */
903     origlen = strlen(path) + 1;
904     ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret));
905     if (!ret) {
906         JPR_BAIL(JSONSL_ERROR_ENOMEM);
907     }
908     ret->orig = (char *)malloc(origlen);
909     if (!ret->orig) {
910         JPR_BAIL(JSONSL_ERROR_ENOMEM);
911     }
912     ret->components = components;
913     ret->ncomponents = curidx;
914     ret->basestr = my_copy;
915     ret->norig = origlen-1;
916     strcpy(ret->orig, path);
917 
918     return ret;
919 
920     GT_ERROR:
921     free(my_copy);
922     free(components);
923     if (ret) {
924         free(ret->orig);
925     }
926     free(ret);
927     return NULL;
928 #undef JPR_BAIL
929 }
930 
jsonsl_jpr_destroy(jsonsl_jpr_t jpr)931 void jsonsl_jpr_destroy(jsonsl_jpr_t jpr)
932 {
933     free(jpr->components);
934     free(jpr->basestr);
935     free(jpr->orig);
936     free(jpr);
937 }
938 
939 /**
940  * Call when there is a possibility of a match, either as a final match or
941  * as a path within a match
942  * @param jpr The JPR path
943  * @param component Component corresponding to the current element
944  * @param prlevel The level of the *parent*
945  * @param chtype The type of the child
946  * @return Match status
947  */
948 static jsonsl_jpr_match_t
jsonsl__match_continue(jsonsl_jpr_t jpr, const struct jsonsl_jpr_component_st *component, unsigned prlevel, unsigned chtype)949 jsonsl__match_continue(jsonsl_jpr_t jpr,
950                        const struct jsonsl_jpr_component_st *component,
951                        unsigned prlevel, unsigned chtype)
952 {
953     const struct jsonsl_jpr_component_st *next_comp = component + 1;
954     if (prlevel == jpr->ncomponents - 1) {
955         /* This is the match. Check the expected type of the match against
956          * the child */
957         if (jpr->match_type == 0 || jpr->match_type == chtype) {
958             return JSONSL_MATCH_COMPLETE;
959         } else {
960             return JSONSL_MATCH_TYPE_MISMATCH;
961         }
962     }
963     if (chtype == JSONSL_T_LIST) {
964         if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
965             return JSONSL_MATCH_POSSIBLE;
966         } else {
967             return JSONSL_MATCH_TYPE_MISMATCH;
968         }
969     } else if (chtype == JSONSL_T_OBJECT) {
970         if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
971             return JSONSL_MATCH_TYPE_MISMATCH;
972         } else {
973             return JSONSL_MATCH_POSSIBLE;
974         }
975     } else {
976         return JSONSL_MATCH_TYPE_MISMATCH;
977     }
978 }
979 
980 JSONSL_API
981 jsonsl_jpr_match_t
jsonsl_path_match(jsonsl_jpr_t jpr, const struct jsonsl_state_st *parent, const struct jsonsl_state_st *child, const char *key, size_t nkey)982 jsonsl_path_match(jsonsl_jpr_t jpr,
983                   const struct jsonsl_state_st *parent,
984                   const struct jsonsl_state_st *child,
985                   const char *key, size_t nkey)
986 {
987     const struct jsonsl_jpr_component_st *comp;
988     if (!parent) {
989         /* No parent. Return immediately since it's always a match */
990         return jsonsl__match_continue(jpr, jpr->components, 0, child->type);
991     }
992 
993     comp = jpr->components + parent->level;
994 
995     /* note that we don't need to verify the type of the match, this is
996      * always done through the previous call to jsonsl__match_continue.
997      * If we are in a POSSIBLE tree then we can be certain the types (at
998      * least at this level) are correct */
999     if (parent->type == JSONSL_T_OBJECT) {
1000         if (comp->len != nkey || strncmp(key, comp->pstr, nkey) != 0) {
1001             return JSONSL_MATCH_NOMATCH;
1002         }
1003     } else {
1004         if (comp->idx != parent->nelem - 1) {
1005             return JSONSL_MATCH_NOMATCH;
1006         }
1007     }
1008     return jsonsl__match_continue(jpr, comp, parent->level, child->type);
1009 }
1010 
1011 JSONSL_API
1012 jsonsl_jpr_match_t
jsonsl_jpr_match(jsonsl_jpr_t jpr, unsigned int parent_type, unsigned int parent_level, const char *key, size_t nkey)1013 jsonsl_jpr_match(jsonsl_jpr_t jpr,
1014                    unsigned int parent_type,
1015                    unsigned int parent_level,
1016                    const char *key,
1017                    size_t nkey)
1018 {
1019     /* find our current component. This is the child level */
1020     int cmpret;
1021     struct jsonsl_jpr_component_st *p_component;
1022     p_component = jpr->components + parent_level;
1023 
1024     if (parent_level >= jpr->ncomponents) {
1025         return JSONSL_MATCH_NOMATCH;
1026     }
1027 
1028     /* Lone query for 'root' element. Always matches */
1029     if (parent_level == 0) {
1030         if (jpr->ncomponents == 1) {
1031             return JSONSL_MATCH_COMPLETE;
1032         } else {
1033             return JSONSL_MATCH_POSSIBLE;
1034         }
1035     }
1036 
1037     /* Wildcard, always matches */
1038     if (p_component->ptype == JSONSL_PATH_WILDCARD) {
1039         if (parent_level == jpr->ncomponents-1) {
1040             return JSONSL_MATCH_COMPLETE;
1041         } else {
1042             return JSONSL_MATCH_POSSIBLE;
1043         }
1044     }
1045 
1046     /* Check numeric array index. This gets its special block so we can avoid
1047      * string comparisons */
1048     if (p_component->ptype == JSONSL_PATH_NUMERIC) {
1049         if (parent_type == JSONSL_T_LIST) {
1050             if (p_component->idx != nkey) {
1051                 /* Wrong index */
1052                 return JSONSL_MATCH_NOMATCH;
1053             } else {
1054                 if (parent_level == jpr->ncomponents-1) {
1055                     /* This is the last element of the path */
1056                     return JSONSL_MATCH_COMPLETE;
1057                 } else {
1058                     /* Intermediate element */
1059                     return JSONSL_MATCH_POSSIBLE;
1060                 }
1061             }
1062         } else if (p_component->is_arridx) {
1063             /* Numeric and an array index (set explicitly by user). But not
1064              * a list for a parent */
1065             return JSONSL_MATCH_TYPE_MISMATCH;
1066         }
1067     } else if (parent_type == JSONSL_T_LIST) {
1068         return JSONSL_MATCH_TYPE_MISMATCH;
1069     }
1070 
1071     /* Check lengths */
1072     if (p_component->len != nkey) {
1073         return JSONSL_MATCH_NOMATCH;
1074     }
1075 
1076     /* Check string comparison */
1077     cmpret = strncmp(p_component->pstr, key, nkey);
1078     if (cmpret == 0) {
1079         if (parent_level == jpr->ncomponents-1) {
1080             return JSONSL_MATCH_COMPLETE;
1081         } else {
1082             return JSONSL_MATCH_POSSIBLE;
1083         }
1084     }
1085 
1086     return JSONSL_MATCH_NOMATCH;
1087 }
1088 
1089 JSONSL_API
jsonsl_jpr_match_state_init(jsonsl_t jsn, jsonsl_jpr_t *jprs, size_t njprs)1090 void jsonsl_jpr_match_state_init(jsonsl_t jsn,
1091                                  jsonsl_jpr_t *jprs,
1092                                  size_t njprs)
1093 {
1094     size_t ii, *firstjmp;
1095     if (njprs == 0) {
1096         return;
1097     }
1098     jsn->jprs = (jsonsl_jpr_t *)malloc(sizeof(jsonsl_jpr_t) * njprs);
1099     jsn->jpr_count = njprs;
1100     jsn->jpr_root = (size_t*)calloc(1, sizeof(size_t) * njprs * jsn->levels_max);
1101     memcpy(jsn->jprs, jprs, sizeof(jsonsl_jpr_t) * njprs);
1102     /* Set the initial jump table values */
1103 
1104     firstjmp = jsn->jpr_root;
1105     for (ii = 0; ii < njprs; ii++) {
1106         firstjmp[ii] = ii+1;
1107     }
1108 }
1109 
1110 JSONSL_API
jsonsl_jpr_match_state_cleanup(jsonsl_t jsn)1111 void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn)
1112 {
1113     if (jsn->jpr_count == 0) {
1114         return;
1115     }
1116 
1117     free(jsn->jpr_root);
1118     free(jsn->jprs);
1119     jsn->jprs = NULL;
1120     jsn->jpr_root = NULL;
1121     jsn->jpr_count = 0;
1122 }
1123 
1124 /**
1125  * This function should be called exactly once on each element...
1126  * This should also be called in recursive order, since we rely
1127  * on the parent having been initalized for a match.
1128  *
1129  * Since the parent is checked for a match as well, we maintain a 'serial' counter.
1130  * Whenever we traverse an element, we expect the serial to be the same as a global
1131  * integer. If they do not match, we re-initialize the context, and set the serial.
1132  *
1133  * This ensures a type of consistency without having a proactive reset by the
1134  * main lexer itself.
1135  *
1136  */
1137 JSONSL_API
jsonsl_jpr_match_state(jsonsl_t jsn, struct jsonsl_state_st *state, const char *key, size_t nkey, jsonsl_jpr_match_t *out)1138 jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
1139                                     struct jsonsl_state_st *state,
1140                                     const char *key,
1141                                     size_t nkey,
1142                                     jsonsl_jpr_match_t *out)
1143 {
1144     struct jsonsl_state_st *parent_state;
1145     jsonsl_jpr_t ret = NULL;
1146 
1147     /* Jump and JPR tables for our own state and the parent state */
1148     size_t *jmptable, *pjmptable;
1149     size_t jmp_cur, ii, ourjmpidx;
1150 
1151     if (!jsn->jpr_root) {
1152         *out = JSONSL_MATCH_NOMATCH;
1153         return NULL;
1154     }
1155 
1156     pjmptable = jsn->jpr_root + (jsn->jpr_count * (state->level-1));
1157     jmptable = pjmptable + jsn->jpr_count;
1158 
1159     /* If the parent cannot match, then invalidate it */
1160     if (*pjmptable == 0) {
1161         *jmptable = 0;
1162         *out = JSONSL_MATCH_NOMATCH;
1163         return NULL;
1164     }
1165 
1166     parent_state = jsn->stack + state->level - 1;
1167 
1168     if (parent_state->type == JSONSL_T_LIST) {
1169         nkey = (size_t) parent_state->nelem;
1170     }
1171 
1172     *jmptable = 0;
1173     ourjmpidx = 0;
1174     memset(jmptable, 0, sizeof(int) * jsn->jpr_count);
1175 
1176     for (ii = 0; ii <  jsn->jpr_count; ii++) {
1177         jmp_cur = pjmptable[ii];
1178         if (jmp_cur) {
1179             jsonsl_jpr_t jpr = jsn->jprs[jmp_cur-1];
1180             *out = jsonsl_jpr_match(jpr,
1181                                     parent_state->type,
1182                                     parent_state->level,
1183                                     key, nkey);
1184             if (*out == JSONSL_MATCH_COMPLETE) {
1185                 ret = jpr;
1186                 *jmptable = 0;
1187                 return ret;
1188             } else if (*out == JSONSL_MATCH_POSSIBLE) {
1189                 jmptable[ourjmpidx] = ii+1;
1190                 ourjmpidx++;
1191             }
1192         } else {
1193             break;
1194         }
1195     }
1196     if (!*jmptable) {
1197         *out = JSONSL_MATCH_NOMATCH;
1198     }
1199     return NULL;
1200 }
1201 
1202 JSONSL_API
jsonsl_strmatchtype(jsonsl_jpr_match_t match)1203 const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match)
1204 {
1205 #define X(T,v) \
1206     if ( match == JSONSL_MATCH_##T ) \
1207         return #T;
1208     JSONSL_XMATCH
1209 #undef X
1210     return "<UNKNOWN>";
1211 }
1212 
1213 #endif /* JSONSL_WITH_JPR */
1214 
1215 static char *
jsonsl__writeutf8(uint32_t pt, char *out)1216 jsonsl__writeutf8(uint32_t pt, char *out)
1217 {
1218     #define ADD_OUTPUT(c) *out = (char)(c); out++;
1219 
1220     if (pt < 0x80) {
1221         ADD_OUTPUT(pt);
1222     } else if (pt < 0x800) {
1223         ADD_OUTPUT((pt >> 6) | 0xC0);
1224         ADD_OUTPUT((pt & 0x3F) | 0x80);
1225     } else if (pt < 0x10000) {
1226         ADD_OUTPUT((pt >> 12) | 0xE0);
1227         ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80);
1228         ADD_OUTPUT((pt & 0x3F) | 0x80);
1229     } else {
1230         ADD_OUTPUT((pt >> 18) | 0xF0);
1231         ADD_OUTPUT(((pt >> 12) & 0x3F) | 0x80);
1232         ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80);
1233         ADD_OUTPUT((pt & 0x3F) | 0x80);
1234     }
1235     return out;
1236     #undef ADD_OUTPUT
1237 }
1238 
1239 /* Thanks snej (https://github.com/mnunberg/jsonsl/issues/9) */
1240 static int
jsonsl__digit2int(char ch)1241 jsonsl__digit2int(char ch) {
1242     int d = ch - '0';
1243     if ((unsigned) d < 10) {
1244         return d;
1245     }
1246     d = ch - 'a';
1247     if ((unsigned) d < 6) {
1248         return d + 10;
1249     }
1250     d = ch - 'A';
1251     if ((unsigned) d < 6) {
1252         return d + 10;
1253     }
1254     return -1;
1255 }
1256 
1257 /* Assume 's' is at least 4 bytes long */
1258 static int
jsonsl__get_uescape_16(const char *s)1259 jsonsl__get_uescape_16(const char *s)
1260 {
1261     int ret = 0;
1262     int cur;
1263 
1264     #define GET_DIGIT(off) \
1265         cur = jsonsl__digit2int(s[off]); \
1266         if (cur == -1) { return -1; } \
1267         ret |= (cur << (12 - (off * 4)));
1268 
1269     GET_DIGIT(0);
1270     GET_DIGIT(1);
1271     GET_DIGIT(2);
1272     GET_DIGIT(3);
1273     #undef GET_DIGIT
1274     return ret;
1275 }
1276 
1277 /**
1278  * Utility function to convert escape sequences
1279  */
1280 JSONSL_API
jsonsl_util_unescape_ex(const char *in, char *out, size_t len, const int toEscape[128], unsigned *oflags, jsonsl_error_t *err, const char **errat)1281 size_t jsonsl_util_unescape_ex(const char *in,
1282                                char *out,
1283                                size_t len,
1284                                const int toEscape[128],
1285                                unsigned *oflags,
1286                                jsonsl_error_t *err,
1287                                const char **errat)
1288 {
1289     const unsigned char *c = (const unsigned char*)in;
1290     char *begin_p = out;
1291     unsigned oflags_s;
1292     uint16_t last_codepoint = 0;
1293 
1294     if (!oflags) {
1295         oflags = &oflags_s;
1296     }
1297     *oflags = 0;
1298 
1299     #define UNESCAPE_BAIL(e,offset) \
1300         *err = JSONSL_ERROR_##e; \
1301         if (errat) { \
1302             *errat = (const char*)(c+ (ptrdiff_t)(offset)); \
1303         } \
1304         return 0;
1305 
1306     for (; len; len--, c++, out++) {
1307         int uescval;
1308         if (*c != '\\') {
1309             /* Not an escape, so we don't care about this */
1310             goto GT_ASSIGN;
1311         }
1312 
1313         if (len < 2) {
1314             UNESCAPE_BAIL(ESCAPE_INVALID, 0);
1315         }
1316         if (!is_allowed_escape(c[1])) {
1317             UNESCAPE_BAIL(ESCAPE_INVALID, 1)
1318         }
1319         if ((toEscape && toEscape[(unsigned char)c[1] & 0x7f] == 0 &&
1320                 c[1] != '\\' && c[1] != '"')) {
1321             /* if we don't want to unescape this string, write the escape sequence to the output */
1322             *out++ = *c++;
1323             if (--len == 0)
1324                 break;
1325             goto GT_ASSIGN;
1326         }
1327 
1328         if (c[1] != 'u') {
1329             /* simple skip-and-replace using pre-defined maps.
1330              * TODO: should the maps actually reflect the desired
1331              * replacement character in toEscape?
1332              */
1333             char esctmp = get_escape_equiv(c[1]);
1334             if (esctmp) {
1335                 /* Check if there is a corresponding replacement */
1336                 *out = esctmp;
1337             } else {
1338                 /* Just gobble up the 'reverse-solidus' */
1339                 *out = c[1];
1340             }
1341             len--;
1342             c++;
1343             /* do not assign, just continue */
1344             continue;
1345         }
1346 
1347         /* next == 'u' */
1348         if (len < 6) {
1349             /* Need at least six characters.. */
1350             UNESCAPE_BAIL(UESCAPE_TOOSHORT, 2);
1351         }
1352 
1353         uescval = jsonsl__get_uescape_16((const char *)c + 2);
1354         if (uescval == -1) {
1355             UNESCAPE_BAIL(PERCENT_BADHEX, -1);
1356         } else if (uescval == 0) {
1357             UNESCAPE_BAIL(INVALID_CODEPOINT, 2);
1358         }
1359 
1360         if (last_codepoint) {
1361             uint16_t w1 = last_codepoint, w2 = (uint16_t)uescval;
1362             uint32_t cp;
1363 
1364             if (uescval < 0xDC00 || uescval > 0xDFFF) {
1365                 UNESCAPE_BAIL(INVALID_CODEPOINT, -1);
1366             }
1367 
1368             cp = (w1 & 0x3FF) << 10;
1369             cp |= (w2 & 0x3FF);
1370             cp += 0x10000;
1371 
1372             out = jsonsl__writeutf8(cp, out) - 1;
1373             last_codepoint = 0;
1374 
1375         } else if (uescval < 0xD800 || uescval > 0xDFFF) {
1376             *oflags |= JSONSL_SPECIALf_NONASCII;
1377             out = jsonsl__writeutf8(uescval, out) - 1;
1378 
1379         } else if (uescval > 0xD7FF && uescval < 0xDC00) {
1380             *oflags |= JSONSL_SPECIALf_NONASCII;
1381             last_codepoint = (uint16_t)uescval;
1382             out--;
1383         } else {
1384             UNESCAPE_BAIL(INVALID_CODEPOINT, 2);
1385         }
1386 
1387         /* Post uescape cleanup */
1388         len -= 5; /* Gobble up 5 chars after 'u' */
1389         c += 5;
1390         continue;
1391 
1392         /* Only reached by previous branches */
1393         GT_ASSIGN:
1394         *out = *c;
1395     }
1396 
1397     if (last_codepoint) {
1398         *err = JSONSL_ERROR_INVALID_CODEPOINT;
1399         return 0;
1400     }
1401 
1402     *err = JSONSL_ERROR_SUCCESS;
1403     return out - begin_p;
1404 }
1405 
1406 /**
1407  * Character Table definitions.
1408  * These were all generated via srcutil/genchartables.pl
1409  */
1410 
1411 /**
1412  * This table contains the beginnings of non-string
1413  * allowable (bareword) values.
1414  */
1415 static unsigned short Special_Table[0x100] = {
1416         /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1417         /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */
1418         /* 0x2d */ JSONSL_SPECIALf_DASH /* <-> */, /* 0x2d */
1419         /* 0x2e */ 0,0, /* 0x2f */
1420         /* 0x30 */ JSONSL_SPECIALf_ZERO /* <0> */, /* 0x30 */
1421         /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* <1> */, /* 0x31 */
1422         /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* <2> */, /* 0x32 */
1423         /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* <3> */, /* 0x33 */
1424         /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* <4> */, /* 0x34 */
1425         /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* <5> */, /* 0x35 */
1426         /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* <6> */, /* 0x36 */
1427         /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* <7> */, /* 0x37 */
1428         /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* <8> */, /* 0x38 */
1429         /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* <9> */, /* 0x39 */
1430         /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x59 */
1431         /* 0x5a */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */
1432         /* 0x66 */ JSONSL_SPECIALf_FALSE /* <f> */, /* 0x66 */
1433         /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1434         /* 0x6e */ JSONSL_SPECIALf_NULL /* <n> */, /* 0x6e */
1435         /* 0x6f */ 0,0,0,0,0, /* 0x73 */
1436         /* 0x74 */ JSONSL_SPECIALf_TRUE /* <t> */, /* 0x74 */
1437         /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1438         /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1439         /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1440         /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1441         /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0, /* 0xfe */
1442 };
1443 
1444 /**
1445  * Contains characters which signal the termination of any of the 'special' bareword
1446  * values.
1447  */
1448 static int Special_Endings[0x100] = {
1449         /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1450         /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1451         /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1452         /* 0x0b */ 0,0, /* 0x0c */
1453         /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1454         /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1455         /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1456         /* 0x21 */ 0, /* 0x21 */
1457         /* 0x22 */ 1 /* " */, /* 0x22 */
1458         /* 0x23 */ 0,0,0,0,0,0,0,0,0, /* 0x2b */
1459         /* 0x2c */ 1 /* , */, /* 0x2c */
1460         /* 0x2d */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x39 */
1461         /* 0x3a */ 1 /* : */, /* 0x3a */
1462         /* 0x3b */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5a */
1463         /* 0x5b */ 1 /* [ */, /* 0x5b */
1464         /* 0x5c */ 1 /* \ */, /* 0x5c */
1465         /* 0x5d */ 1 /* ] */, /* 0x5d */
1466         /* 0x5e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7a */
1467         /* 0x7b */ 1 /* { */, /* 0x7b */
1468         /* 0x7c */ 0, /* 0x7c */
1469         /* 0x7d */ 1 /* } */, /* 0x7d */
1470         /* 0x7e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9d */
1471         /* 0x9e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbd */
1472         /* 0xbe */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdd */
1473         /* 0xde */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfd */
1474         /* 0xfe */ 0 /* 0xfe */
1475 };
1476 
1477 /**
1478  * This table contains entries for the allowed whitespace as per RFC 4627
1479  */
1480 static int Allowed_Whitespace[0x100] = {
1481         /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
1482         /* 0x09 */ 1 /* <TAB> */, /* 0x09 */
1483         /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1484         /* 0x0b */ 0,0, /* 0x0c */
1485         /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1486         /* 0x0e */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1487         /* 0x20 */ 1 /* <SP> */, /* 0x20 */
1488         /* 0x21 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x40 */
1489         /* 0x41 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x60 */
1490         /* 0x61 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80 */
1491         /* 0x81 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0 */
1492         /* 0xa1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xc0 */
1493         /* 0xc1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xe0 */
1494         /* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1495 };
1496 
1497 static const int String_No_Passthrough[0x100] = {
1498         /* 0x00 */ 1 /* <NUL> */, /* 0x00 */
1499         /* 0x01 */ 1 /* <SOH> */, /* 0x01 */
1500         /* 0x02 */ 1 /* <STX> */, /* 0x02 */
1501         /* 0x03 */ 1 /* <ETX> */, /* 0x03 */
1502         /* 0x04 */ 1 /* <EOT> */, /* 0x04 */
1503         /* 0x05 */ 1 /* <ENQ> */, /* 0x05 */
1504         /* 0x06 */ 1 /* <ACK> */, /* 0x06 */
1505         /* 0x07 */ 1 /* <BEL> */, /* 0x07 */
1506         /* 0x08 */ 1 /* <BS> */, /* 0x08 */
1507         /* 0x09 */ 1 /* <HT> */, /* 0x09 */
1508         /* 0x0a */ 1 /* <LF> */, /* 0x0a */
1509         /* 0x0b */ 1 /* <VT> */, /* 0x0b */
1510         /* 0x0c */ 1 /* <FF> */, /* 0x0c */
1511         /* 0x0d */ 1 /* <CR> */, /* 0x0d */
1512         /* 0x0e */ 1 /* <SO> */, /* 0x0e */
1513         /* 0x0f */ 1 /* <SI> */, /* 0x0f */
1514         /* 0x10 */ 1 /* <DLE> */, /* 0x10 */
1515         /* 0x11 */ 1 /* <DC1> */, /* 0x11 */
1516         /* 0x12 */ 1 /* <DC2> */, /* 0x12 */
1517         /* 0x13 */ 1 /* <DC3> */, /* 0x13 */
1518         /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */
1519         /* 0x22 */ 1 /* <"> */, /* 0x22 */
1520         /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */
1521         /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
1522         /* 0x5c */ 1 /* <\> */, /* 0x5c */
1523         /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */
1524         /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */
1525         /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */
1526         /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */
1527         /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */
1528         /* 0xfd */ 0,0, /* 0xfe */
1529 };
1530 
1531 /**
1532  * Allowable two-character 'common' escapes:
1533  */
1534 static int Allowed_Escapes[0x100] = {
1535         /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1536         /* 0x20 */ 0,0, /* 0x21 */
1537         /* 0x22 */ 1 /* <"> */, /* 0x22 */
1538         /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */
1539         /* 0x2f */ 1 /* </> */, /* 0x2f */
1540         /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */
1541         /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
1542         /* 0x5c */ 1 /* <\> */, /* 0x5c */
1543         /* 0x5d */ 0,0,0,0,0, /* 0x61 */
1544         /* 0x62 */ 1 /* <b> */, /* 0x62 */
1545         /* 0x63 */ 0,0,0, /* 0x65 */
1546         /* 0x66 */ 1 /* <f> */, /* 0x66 */
1547         /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1548         /* 0x6e */ 1 /* <n> */, /* 0x6e */
1549         /* 0x6f */ 0,0,0, /* 0x71 */
1550         /* 0x72 */ 1 /* <r> */, /* 0x72 */
1551         /* 0x73 */ 0, /* 0x73 */
1552         /* 0x74 */ 1 /* <t> */, /* 0x74 */
1553         /* 0x75 */ 1 /* <u> */, /* 0x75 */
1554         /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */
1555         /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */
1556         /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */
1557         /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */
1558         /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */
1559 };
1560 
1561 /**
1562  * This table contains the _values_ for a given (single) escaped character.
1563  */
1564 static unsigned char Escape_Equivs[0x100] = {
1565         /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
1566         /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */
1567         /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */
1568         /* 0x60 */ 0,0, /* 0x61 */
1569         /* 0x62 */ 8 /* <b> */, /* 0x62 */
1570         /* 0x63 */ 0,0,0, /* 0x65 */
1571         /* 0x66 */ 12 /* <f> */, /* 0x66 */
1572         /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
1573         /* 0x6e */ 10 /* <n> */, /* 0x6e */
1574         /* 0x6f */ 0,0,0, /* 0x71 */
1575         /* 0x72 */ 13 /* <r> */, /* 0x72 */
1576         /* 0x73 */ 0, /* 0x73 */
1577         /* 0x74 */ 9 /* <t> */, /* 0x74 */
1578         /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
1579         /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
1580         /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
1581         /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
1582         /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */
1583 };
1584 
1585 /* Definitions of above-declared static functions */
get_escape_equiv(unsigned c)1586 static char get_escape_equiv(unsigned c) {
1587     return Escape_Equivs[c & 0xff];
1588 }
extract_special(unsigned c)1589 static unsigned extract_special(unsigned c) {
1590     return Special_Table[c & 0xff];
1591 }
is_special_end(unsigned c)1592 static int is_special_end(unsigned c) {
1593     return Special_Endings[c & 0xff];
1594 }
is_allowed_whitespace(unsigned c)1595 static int is_allowed_whitespace(unsigned c) {
1596     return c == ' ' || Allowed_Whitespace[c & 0xff];
1597 }
is_allowed_escape(unsigned c)1598 static int is_allowed_escape(unsigned c) {
1599     return Allowed_Escapes[c & 0xff];
1600 }
is_simple_char(unsigned c)1601 static int is_simple_char(unsigned c) {
1602     return !String_No_Passthrough[c & 0xff];
1603 }
1604 
1605 /* Clean up all our macros! */
1606 #undef INCR_METRIC
1607 #undef INCR_GENERIC
1608 #undef INCR_STRINGY_CATCH
1609 #undef CASE_DIGITS
1610 #undef INVOKE_ERROR
1611 #undef STACK_PUSH
1612 #undef STACK_POP_NOPOS
1613 #undef STACK_POP
1614 #undef CALLBACK_AND_POP_NOPOS
1615 #undef CALLBACK_AND_POP
1616 #undef SPECIAL_POP
1617 #undef CUR_CHAR
1618 #undef DO_CALLBACK
1619 #undef ENSURE_HVAL
1620 #undef VERIFY_SPECIAL
1621 #undef STATE_SPECIAL_LENGTH
1622 #undef IS_NORMAL_NUMBER
1623 #undef STATE_NUM_LAST
1624 #undef FASTPARSE_EXHAUSTED
1625 #undef FASTPARSE_BREAK
1626