xref: /6.6.0/subjson/contrib/jsonsl/jsonsl.c (revision 5cf05eaf)
1bfb89cbcSMark Nunberg /* Copyright (C) 2012-2015 Mark Nunberg.
2bfb89cbcSMark Nunberg  *
3bfb89cbcSMark Nunberg  * See included LICENSE file for license details.
4bfb89cbcSMark Nunberg  */
5bfb89cbcSMark Nunberg 
6e649f75aSMark Nunberg #include "jsonsl.h"
7e649f75aSMark Nunberg #include <assert.h>
8e649f75aSMark Nunberg #include <limits.h>
9e649f75aSMark Nunberg #include <ctype.h>
10e649f75aSMark Nunberg 
11e649f75aSMark Nunberg #ifdef JSONSL_USE_METRICS
12e649f75aSMark Nunberg #define XMETRICS \
13e649f75aSMark Nunberg     X(STRINGY_INSIGNIFICANT) \
14e649f75aSMark Nunberg     X(STRINGY_SLOWPATH) \
15e649f75aSMark Nunberg     X(ALLOWED_WHITESPACE) \
16e649f75aSMark Nunberg     X(QUOTE_FASTPATH) \
17e649f75aSMark Nunberg     X(SPECIAL_FASTPATH) \
18e649f75aSMark Nunberg     X(SPECIAL_WSPOP) \
19e649f75aSMark Nunberg     X(SPECIAL_SLOWPATH) \
20e649f75aSMark Nunberg     X(GENERIC) \
21e649f75aSMark Nunberg     X(STRUCTURAL_TOKEN) \
22e649f75aSMark Nunberg     X(SPECIAL_SWITCHFIRST) \
23e649f75aSMark Nunberg     X(STRINGY_CATCH) \
24431730afSMark Nunberg     X(NUMBER_FASTPATH) \
25e649f75aSMark Nunberg     X(ESCAPES) \
26e649f75aSMark Nunberg     X(TOTAL) \
27e649f75aSMark Nunberg 
28e649f75aSMark Nunberg struct jsonsl_metrics_st {
29e649f75aSMark Nunberg #define X(m) \
30e649f75aSMark Nunberg     unsigned long metric_##m;
31e649f75aSMark Nunberg     XMETRICS
32e649f75aSMark Nunberg #undef X
33e649f75aSMark Nunberg };
34e649f75aSMark Nunberg 
35e649f75aSMark Nunberg static struct jsonsl_metrics_st GlobalMetrics = { 0 };
36e649f75aSMark Nunberg static unsigned long GenericCounter[0x100] = { 0 };
37e649f75aSMark Nunberg static unsigned long StringyCatchCounter[0x100] = { 0 };
38e649f75aSMark Nunberg 
39e649f75aSMark Nunberg #define INCR_METRIC(m) \
40e649f75aSMark Nunberg     GlobalMetrics.metric_##m++;
41e649f75aSMark Nunberg 
42e649f75aSMark Nunberg #define INCR_GENERIC(c) \
43e649f75aSMark Nunberg         INCR_METRIC(GENERIC); \
44e649f75aSMark Nunberg         GenericCounter[c]++; \
45e649f75aSMark Nunberg 
46e649f75aSMark Nunberg #define INCR_STRINGY_CATCH(c) \
47e649f75aSMark Nunberg     INCR_METRIC(STRINGY_CATCH); \
48e649f75aSMark Nunberg     StringyCatchCounter[c]++;
49e649f75aSMark Nunberg 
50e649f75aSMark Nunberg JSONSL_API
jsonsl_dump_global_metrics(void)51e649f75aSMark Nunberg void jsonsl_dump_global_metrics(void)
52e649f75aSMark Nunberg {
53e649f75aSMark Nunberg     int ii;
54e649f75aSMark Nunberg     printf("JSONSL Metrics:\n");
55e649f75aSMark Nunberg #define X(m) \
56e649f75aSMark Nunberg     printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \
57e649f75aSMark Nunberg            (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100);
58e649f75aSMark Nunberg     XMETRICS
59e649f75aSMark Nunberg #undef X
60e649f75aSMark Nunberg     printf("Generic Characters:\n");
61e649f75aSMark Nunberg     for (ii = 0; ii < 0xff; ii++) {
62e649f75aSMark Nunberg         if (GenericCounter[ii]) {
63e649f75aSMark Nunberg             printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]);
64e649f75aSMark Nunberg         }
65e649f75aSMark Nunberg     }
66e649f75aSMark Nunberg     printf("Weird string loop\n");
67e649f75aSMark Nunberg     for (ii = 0; ii < 0xff; ii++) {
68e649f75aSMark Nunberg         if (StringyCatchCounter[ii]) {
69e649f75aSMark Nunberg             printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]);
70e649f75aSMark Nunberg         }
71e649f75aSMark Nunberg     }
72e649f75aSMark Nunberg }
73e649f75aSMark Nunberg 
74e649f75aSMark Nunberg #else
75e649f75aSMark Nunberg #define INCR_METRIC(m)
76e649f75aSMark Nunberg #define INCR_GENERIC(c)
77e649f75aSMark Nunberg #define INCR_STRINGY_CATCH(c)
78e649f75aSMark Nunberg JSONSL_API
jsonsl_dump_global_metrics(void)79e649f75aSMark Nunberg void jsonsl_dump_global_metrics(void) { }
80e649f75aSMark Nunberg #endif /* JSONSL_USE_METRICS */
81e649f75aSMark Nunberg 
82e649f75aSMark Nunberg #define CASE_DIGITS \
83e649f75aSMark Nunberg case '1': \
84e649f75aSMark Nunberg case '2': \
85e649f75aSMark Nunberg case '3': \
86e649f75aSMark Nunberg case '4': \
87e649f75aSMark Nunberg case '5': \
88e649f75aSMark Nunberg case '6': \
89e649f75aSMark Nunberg case '7': \
90e649f75aSMark Nunberg case '8': \
91e649f75aSMark Nunberg case '9': \
92e649f75aSMark Nunberg case '0':
93e649f75aSMark Nunberg 
94bfb89cbcSMark Nunberg static unsigned extract_special(unsigned);
95bfb89cbcSMark Nunberg static int is_special_end(unsigned);
96bfb89cbcSMark Nunberg static int is_allowed_whitespace(unsigned);
97bfb89cbcSMark Nunberg static int is_allowed_escape(unsigned);
98*5cf05eafSMark Nunberg static int is_simple_char(unsigned);
99bfb89cbcSMark Nunberg static char get_escape_equiv(unsigned);
100e649f75aSMark Nunberg 
101e649f75aSMark Nunberg JSONSL_API
jsonsl_new(int nlevels)102e649f75aSMark Nunberg jsonsl_t jsonsl_new(int nlevels)
103e649f75aSMark Nunberg {
104bfb89cbcSMark Nunberg     struct jsonsl_st *jsn = (struct jsonsl_st *)
105e649f75aSMark Nunberg             calloc(1, sizeof (*jsn) +
106e649f75aSMark Nunberg                     ( (nlevels-1) * sizeof (struct jsonsl_state_st) )
107e649f75aSMark Nunberg             );
108e649f75aSMark Nunberg 
109e649f75aSMark Nunberg     jsn->levels_max = nlevels;
110e649f75aSMark Nunberg     jsn->max_callback_level = -1;
111e649f75aSMark Nunberg     jsonsl_reset(jsn);
112e649f75aSMark Nunberg     return jsn;
113e649f75aSMark Nunberg }
114e649f75aSMark Nunberg 
115e649f75aSMark Nunberg JSONSL_API
jsonsl_reset(jsonsl_t jsn)116e649f75aSMark Nunberg void jsonsl_reset(jsonsl_t jsn)
117e649f75aSMark Nunberg {
118e649f75aSMark Nunberg     unsigned int ii;
119e649f75aSMark Nunberg     jsn->tok_last = 0;
120e649f75aSMark Nunberg     jsn->can_insert = 1;
121e649f75aSMark Nunberg     jsn->pos = 0;
122e649f75aSMark Nunberg     jsn->level = 0;
1239c3aeb87SMark Nunberg     jsn->stopfl = 0;
124e649f75aSMark Nunberg     jsn->in_escape = 0;
125e649f75aSMark Nunberg     jsn->expecting = 0;
126e649f75aSMark Nunberg 
127e649f75aSMark Nunberg     memset(jsn->stack, 0, (jsn->levels_max * sizeof (struct jsonsl_state_st)));
128e649f75aSMark Nunberg 
129e649f75aSMark Nunberg     for (ii = 0; ii < jsn->levels_max; ii++) {
130e649f75aSMark Nunberg         jsn->stack[ii].level = ii;
131e649f75aSMark Nunberg     }
132e649f75aSMark Nunberg }
133e649f75aSMark Nunberg 
134e649f75aSMark Nunberg JSONSL_API
jsonsl_destroy(jsonsl_t jsn)135e649f75aSMark Nunberg void jsonsl_destroy(jsonsl_t jsn)
136e649f75aSMark Nunberg {
137e649f75aSMark Nunberg     if (jsn) {
138e649f75aSMark Nunberg         free(jsn);
139e649f75aSMark Nunberg     }
140e649f75aSMark Nunberg }
141e649f75aSMark Nunberg 
142431730afSMark Nunberg 
143431730afSMark Nunberg #define FASTPARSE_EXHAUSTED 1
144431730afSMark Nunberg #define FASTPARSE_BREAK 0
145431730afSMark Nunberg 
146431730afSMark Nunberg /*
147431730afSMark Nunberg  * This function is meant to accelerate string parsing, reducing the main loop's
148431730afSMark Nunberg  * check if we are indeed a string.
149431730afSMark Nunberg  *
150431730afSMark Nunberg  * @param jsn the parser
151431730afSMark Nunberg  * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position)
152431730afSMark Nunberg  * @param[in,out] nbytes_p A pointer to the current size of the buffer
153431730afSMark Nunberg  * @return true if all bytes have been exhausted (and thus the main loop can
154431730afSMark Nunberg  * return), false if a special character was examined which requires greater
155431730afSMark Nunberg  * examination.
156431730afSMark Nunberg  */
157431730afSMark Nunberg static int
jsonsl__str_fastparse(jsonsl_t jsn,const jsonsl_uchar_t ** bytes_p,size_t * nbytes_p)158431730afSMark Nunberg jsonsl__str_fastparse(jsonsl_t jsn,
159431730afSMark Nunberg                       const jsonsl_uchar_t **bytes_p, size_t *nbytes_p)
160431730afSMark Nunberg {
161431730afSMark Nunberg     const jsonsl_uchar_t *bytes = *bytes_p;
162*5cf05eafSMark Nunberg     const jsonsl_uchar_t *end;
163*5cf05eafSMark Nunberg     for (end = bytes + *nbytes_p; bytes != end; bytes++) {
164431730afSMark Nunberg         if (
165431730afSMark Nunberg #ifdef JSONSL_USE_WCHAR
166431730afSMark Nunberg                 *bytes >= 0x100 ||
167431730afSMark Nunberg #endif /* JSONSL_USE_WCHAR */
168*5cf05eafSMark Nunberg                 (is_simple_char(*bytes))) {
169431730afSMark Nunberg             INCR_METRIC(TOTAL);
170431730afSMark Nunberg             INCR_METRIC(STRINGY_INSIGNIFICANT);
171431730afSMark Nunberg         } else {
172*5cf05eafSMark Nunberg             /* Once we're done here, re-calculate the position variables */
173*5cf05eafSMark Nunberg             jsn->pos += (bytes - *bytes_p);
174*5cf05eafSMark Nunberg             *nbytes_p -= (bytes - *bytes_p);
175*5cf05eafSMark Nunberg             *bytes_p = bytes;
176*5cf05eafSMark Nunberg             return FASTPARSE_BREAK;
177431730afSMark Nunberg         }
178431730afSMark Nunberg     }
179431730afSMark Nunberg 
180431730afSMark Nunberg     /* Once we're done here, re-calculate the position variables */
181*5cf05eafSMark Nunberg     jsn->pos += (bytes - *bytes_p);
182431730afSMark Nunberg     return FASTPARSE_EXHAUSTED;
183431730afSMark Nunberg }
184431730afSMark Nunberg 
185431730afSMark Nunberg /* Functions exactly like str_fastparse, except it also accepts a 'state'
186431730afSMark Nunberg  * argument, since the number's value is updated in the state. */
187431730afSMark Nunberg static int
jsonsl__num_fastparse(jsonsl_t jsn,const jsonsl_uchar_t ** bytes_p,size_t * nbytes_p,struct jsonsl_state_st * state)188431730afSMark Nunberg jsonsl__num_fastparse(jsonsl_t jsn,
189431730afSMark Nunberg                       const jsonsl_uchar_t **bytes_p, size_t *nbytes_p,
190431730afSMark Nunberg                       struct jsonsl_state_st *state)
191431730afSMark Nunberg {
192431730afSMark Nunberg     int exhausted = 1;
193431730afSMark Nunberg     size_t nbytes = *nbytes_p;
194431730afSMark Nunberg     const jsonsl_uchar_t *bytes = *bytes_p;
195431730afSMark Nunberg 
196431730afSMark Nunberg     for (; nbytes; nbytes--, bytes++) {
197431730afSMark Nunberg         jsonsl_uchar_t c = *bytes;
198431730afSMark Nunberg         if (isdigit(c)) {
199431730afSMark Nunberg             INCR_METRIC(TOTAL);
200431730afSMark Nunberg             INCR_METRIC(NUMBER_FASTPATH);
201431730afSMark Nunberg             state->nelem = (state->nelem * 10) + (c - 0x30);
202431730afSMark Nunberg         } else {
203431730afSMark Nunberg             exhausted = 0;
204431730afSMark Nunberg             break;
205431730afSMark Nunberg         }
206431730afSMark Nunberg     }
207431730afSMark Nunberg     jsn->pos += (*nbytes_p - nbytes);
208431730afSMark Nunberg     if (exhausted) {
209431730afSMark Nunberg         return FASTPARSE_EXHAUSTED;
210431730afSMark Nunberg     }
211431730afSMark Nunberg     *nbytes_p = nbytes;
212431730afSMark Nunberg     *bytes_p = bytes;
213431730afSMark Nunberg     return FASTPARSE_BREAK;
214431730afSMark Nunberg }
215431730afSMark Nunberg 
216e649f75aSMark Nunberg JSONSL_API
217e649f75aSMark Nunberg void
jsonsl_feed(jsonsl_t jsn,const jsonsl_char_t * bytes,size_t nbytes)218e649f75aSMark Nunberg jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
219e649f75aSMark Nunberg {
220e649f75aSMark Nunberg 
221e649f75aSMark Nunberg #define INVOKE_ERROR(eb) \
222e649f75aSMark Nunberg     if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \
223e649f75aSMark Nunberg         goto GT_AGAIN; \
224e649f75aSMark Nunberg     } \
225e649f75aSMark Nunberg     return;
226e649f75aSMark Nunberg 
227e649f75aSMark Nunberg #define STACK_PUSH \
228e649f75aSMark Nunberg     if (jsn->level >= (levels_max-1)) { \
229e649f75aSMark Nunberg         jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \
230e649f75aSMark Nunberg         return; \
231e649f75aSMark Nunberg     } \
232e649f75aSMark Nunberg     state = jsn->stack + (++jsn->level); \
233e649f75aSMark Nunberg     state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \
234e649f75aSMark Nunberg     state->pos_begin = jsn->pos;
235e649f75aSMark Nunberg 
236e649f75aSMark Nunberg #define STACK_POP_NOPOS \
237e649f75aSMark Nunberg     state->pos_cur = jsn->pos; \
238e649f75aSMark Nunberg     state = jsn->stack + (--jsn->level);
239e649f75aSMark Nunberg 
240e649f75aSMark Nunberg 
241e649f75aSMark Nunberg #define STACK_POP \
242e649f75aSMark Nunberg     STACK_POP_NOPOS; \
243e649f75aSMark Nunberg     state->pos_cur = jsn->pos;
244e649f75aSMark Nunberg 
245e649f75aSMark Nunberg #define CALLBACK_AND_POP_NOPOS(T) \
246e649f75aSMark Nunberg         state->pos_cur = jsn->pos; \
247e649f75aSMark Nunberg         DO_CALLBACK(T, POP); \
248e649f75aSMark Nunberg         state->nescapes = 0; \
249e649f75aSMark Nunberg         state = jsn->stack + (--jsn->level);
250e649f75aSMark Nunberg 
251e649f75aSMark Nunberg #define CALLBACK_AND_POP(T) \
252e649f75aSMark Nunberg         CALLBACK_AND_POP_NOPOS(T); \
253e649f75aSMark Nunberg         state->pos_cur = jsn->pos;
254e649f75aSMark Nunberg 
255e649f75aSMark Nunberg #define SPECIAL_POP \
256e649f75aSMark Nunberg     CALLBACK_AND_POP(SPECIAL); \
257e649f75aSMark Nunberg     jsn->expecting = 0; \
258e649f75aSMark Nunberg     jsn->tok_last = 0; \
259e649f75aSMark Nunberg 
260e649f75aSMark Nunberg #define CUR_CHAR (*(jsonsl_uchar_t*)c)
261e649f75aSMark Nunberg 
262e649f75aSMark Nunberg #define DO_CALLBACK(T, action) \
263e649f75aSMark Nunberg     if (jsn->call_##T && \
264e649f75aSMark Nunberg             jsn->max_callback_level > state->level && \
265e649f75aSMark Nunberg             state->ignore_callback == 0) { \
266e649f75aSMark Nunberg         \
267e649f75aSMark Nunberg         if (jsn->action_callback_##action) { \
268e649f75aSMark Nunberg             jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
269e649f75aSMark Nunberg         } else if (jsn->action_callback) { \
270e649f75aSMark Nunberg             jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
271e649f75aSMark Nunberg         } \
2729c3aeb87SMark Nunberg         if (jsn->stopfl) { return; } \
273e649f75aSMark Nunberg     }
274e649f75aSMark Nunberg 
275e649f75aSMark Nunberg     /**
276e649f75aSMark Nunberg      * Verifies that we are able to insert the (non-string) item into a hash.
277e649f75aSMark Nunberg      */
278e649f75aSMark Nunberg #define ENSURE_HVAL \
279e649f75aSMark Nunberg     if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \
280e649f75aSMark Nunberg         INVOKE_ERROR(HKEY_EXPECTED); \
281e649f75aSMark Nunberg     }
282e649f75aSMark Nunberg 
283e649f75aSMark Nunberg #define VERIFY_SPECIAL(lit) \
284e649f75aSMark Nunberg         if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \
285e649f75aSMark Nunberg             INVOKE_ERROR(SPECIAL_EXPECTED); \
286e649f75aSMark Nunberg         }
287e649f75aSMark Nunberg 
2881f330d76SMark Nunberg #define STATE_SPECIAL_LENGTH \
2891f330d76SMark Nunberg     (state)->nescapes
2901f330d76SMark Nunberg 
2911f330d76SMark Nunberg #define IS_NORMAL_NUMBER \
2921f330d76SMark Nunberg     ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \
2931f330d76SMark Nunberg         (state)->special_flags == JSONSL_SPECIALf_SIGNED)
2941f330d76SMark Nunberg 
2951f330d76SMark Nunberg #define STATE_NUM_LAST jsn->tok_last
2961f330d76SMark Nunberg 
297431730afSMark Nunberg #define CONTINUE_NEXT_CHAR() continue
298431730afSMark Nunberg 
299e649f75aSMark Nunberg     const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes;
300e649f75aSMark Nunberg     size_t levels_max = jsn->levels_max;
301e649f75aSMark Nunberg     struct jsonsl_state_st *state = jsn->stack + jsn->level;
302e649f75aSMark Nunberg     jsn->base = bytes;
303e649f75aSMark Nunberg 
304e649f75aSMark Nunberg     for (; nbytes; nbytes--, jsn->pos++, c++) {
3051efa455eSTrond Norbye         unsigned state_type;
306e649f75aSMark Nunberg         INCR_METRIC(TOTAL);
307431730afSMark Nunberg 
308431730afSMark Nunberg         GT_AGAIN:
309431730afSMark Nunberg         state_type = state->type;
310431730afSMark Nunberg         /* Most common type is typically a string: */
311431730afSMark Nunberg         if (state_type & JSONSL_Tf_STRINGY) {
312e649f75aSMark Nunberg             /* Special escape handling for some stuff */
313e649f75aSMark Nunberg             if (jsn->in_escape) {
314e649f75aSMark Nunberg                 jsn->in_escape = 0;
315e649f75aSMark Nunberg                 if (!is_allowed_escape(CUR_CHAR)) {
316e649f75aSMark Nunberg                     INVOKE_ERROR(ESCAPE_INVALID);
317e649f75aSMark Nunberg                 } else if (CUR_CHAR == 'u') {
318e649f75aSMark Nunberg                     DO_CALLBACK(UESCAPE, UESCAPE);
319e649f75aSMark Nunberg                     if (jsn->return_UESCAPE) {
320e649f75aSMark Nunberg                         return;
321e649f75aSMark Nunberg                     }
322e649f75aSMark Nunberg                 }
323431730afSMark Nunberg                 CONTINUE_NEXT_CHAR();
324e649f75aSMark Nunberg             }
325431730afSMark Nunberg 
326431730afSMark Nunberg             if (jsonsl__str_fastparse(jsn, &c, &nbytes) ==
327431730afSMark Nunberg                     FASTPARSE_EXHAUSTED) {
328431730afSMark Nunberg                 /* No need to readjust variables as we've exhausted the iterator */
329431730afSMark Nunberg                 return;
330431730afSMark Nunberg             } else {
331431730afSMark Nunberg                 if (CUR_CHAR == '"') {
332e649f75aSMark Nunberg                     goto GT_QUOTE;
333e649f75aSMark Nunberg                 } else if (CUR_CHAR == '\\') {
334e649f75aSMark Nunberg                     goto GT_ESCAPE;
335e649f75aSMark Nunberg                 } else {
336e649f75aSMark Nunberg                     INVOKE_ERROR(WEIRD_WHITESPACE);
337e649f75aSMark Nunberg                 }
338431730afSMark Nunberg             }
339e649f75aSMark Nunberg             INCR_METRIC(STRINGY_SLOWPATH);
340e649f75aSMark Nunberg 
341e649f75aSMark Nunberg         } else if (state_type == JSONSL_T_SPECIAL) {
3421f330d76SMark Nunberg             /* Fast track for signed/unsigned */
3431f330d76SMark Nunberg             if (IS_NORMAL_NUMBER) {
344431730afSMark Nunberg                 if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) ==
345431730afSMark Nunberg                         FASTPARSE_EXHAUSTED) {
346431730afSMark Nunberg                     return;
3471f330d76SMark Nunberg                 } else {
3481f330d76SMark Nunberg                     goto GT_SPECIAL_NUMERIC;
3491f330d76SMark Nunberg                 }
3501f330d76SMark Nunberg             } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
3511f330d76SMark Nunberg                 if (!isdigit(CUR_CHAR)) {
3521f330d76SMark Nunberg                     INVOKE_ERROR(INVALID_NUMBER);
3531f330d76SMark Nunberg                 }
3541f330d76SMark Nunberg 
3551f330d76SMark Nunberg                 if (CUR_CHAR == '0') {
3561f330d76SMark Nunberg                     state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED;
3571f330d76SMark Nunberg                 } else if (isdigit(CUR_CHAR)) {
3581f330d76SMark Nunberg                     state->special_flags = JSONSL_SPECIALf_SIGNED;
3591f330d76SMark Nunberg                     state->nelem = CUR_CHAR - 0x30;
3601f330d76SMark Nunberg                 } else {
3611f330d76SMark Nunberg                     INVOKE_ERROR(INVALID_NUMBER);
3621f330d76SMark Nunberg                 }
363431730afSMark Nunberg                 CONTINUE_NEXT_CHAR();
3641f330d76SMark Nunberg 
3651f330d76SMark Nunberg             } else if (state->special_flags == JSONSL_SPECIALf_ZERO) {
3661f330d76SMark Nunberg                 if (isdigit(CUR_CHAR)) {
3671f330d76SMark Nunberg                     /* Following a zero! */
3681f330d76SMark Nunberg                     INVOKE_ERROR(INVALID_NUMBER);
3691f330d76SMark Nunberg                 }
3701f330d76SMark Nunberg                 /* Unset the 'zero' flag: */
3711f330d76SMark Nunberg                 if (state->special_flags & JSONSL_SPECIALf_SIGNED) {
3721f330d76SMark Nunberg                     state->special_flags = JSONSL_SPECIALf_SIGNED;
3731f330d76SMark Nunberg                 } else {
3741f330d76SMark Nunberg                     state->special_flags = JSONSL_SPECIALf_UNSIGNED;
3751f330d76SMark Nunberg                 }
3761f330d76SMark Nunberg                 goto GT_SPECIAL_NUMERIC;
3771f330d76SMark Nunberg             }
3781f330d76SMark Nunberg 
379e649f75aSMark Nunberg             if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
3801f330d76SMark Nunberg                 GT_SPECIAL_NUMERIC:
381e649f75aSMark Nunberg                 switch (CUR_CHAR) {
382e649f75aSMark Nunberg                 CASE_DIGITS
3831f330d76SMark Nunberg                     STATE_NUM_LAST = '1';
384431730afSMark Nunberg                     CONTINUE_NEXT_CHAR();
3851f330d76SMark Nunberg 
3861f330d76SMark Nunberg                 case '.':
3871f330d76SMark Nunberg                     if (state->special_flags & JSONSL_SPECIALf_FLOAT) {
3881f330d76SMark Nunberg                         INVOKE_ERROR(INVALID_NUMBER);
3891f330d76SMark Nunberg                     }
3901f330d76SMark Nunberg                     state->special_flags |= JSONSL_SPECIALf_FLOAT;
3911f330d76SMark Nunberg                     STATE_NUM_LAST = '.';
392431730afSMark Nunberg                     CONTINUE_NEXT_CHAR();
393e649f75aSMark Nunberg 
394e649f75aSMark Nunberg                 case 'e':
395e649f75aSMark Nunberg                 case 'E':
3961f330d76SMark Nunberg                     if (state->special_flags & JSONSL_SPECIALf_EXPONENT) {
3971f330d76SMark Nunberg                         INVOKE_ERROR(INVALID_NUMBER);
3981f330d76SMark Nunberg                     }
3991f330d76SMark Nunberg                     state->special_flags |= JSONSL_SPECIALf_EXPONENT;
4001f330d76SMark Nunberg                     STATE_NUM_LAST = 'e';
401431730afSMark Nunberg                     CONTINUE_NEXT_CHAR();
4021f330d76SMark Nunberg 
403e649f75aSMark Nunberg                 case '-':
404e649f75aSMark Nunberg                 case '+':
4051f330d76SMark Nunberg                     if (STATE_NUM_LAST != 'e') {
4061f330d76SMark Nunberg                         INVOKE_ERROR(INVALID_NUMBER);
4071f330d76SMark Nunberg                     }
4081f330d76SMark Nunberg                     STATE_NUM_LAST = '-';
409431730afSMark Nunberg                     CONTINUE_NEXT_CHAR();
4101f330d76SMark Nunberg 
411e649f75aSMark Nunberg                 default:
412e649f75aSMark Nunberg                     if (is_special_end(CUR_CHAR)) {
413e649f75aSMark Nunberg                         goto GT_SPECIAL_POP;
414e649f75aSMark Nunberg                     }
415e649f75aSMark Nunberg                     INVOKE_ERROR(INVALID_NUMBER);
416e649f75aSMark Nunberg                     break;
417e649f75aSMark Nunberg                 }
418e649f75aSMark Nunberg             }
419e649f75aSMark Nunberg             /* else if (!NUMERIC) */
420e649f75aSMark Nunberg             if (!is_special_end(CUR_CHAR)) {
4211f330d76SMark Nunberg                 STATE_SPECIAL_LENGTH++;
4221f330d76SMark Nunberg 
423e649f75aSMark Nunberg                 /* Verify TRUE, FALSE, NULL */
424e649f75aSMark Nunberg                 if (state->special_flags == JSONSL_SPECIALf_TRUE) {
425e649f75aSMark Nunberg                     VERIFY_SPECIAL("true");
426e649f75aSMark Nunberg                 } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
427e649f75aSMark Nunberg                     VERIFY_SPECIAL("false");
428e649f75aSMark Nunberg                 } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
429e649f75aSMark Nunberg                     VERIFY_SPECIAL("null");
430e649f75aSMark Nunberg                 }
431e649f75aSMark Nunberg                 INCR_METRIC(SPECIAL_FASTPATH);
432431730afSMark Nunberg                 CONTINUE_NEXT_CHAR();
433e649f75aSMark Nunberg             }
434e649f75aSMark Nunberg 
435e649f75aSMark Nunberg             GT_SPECIAL_POP:
4361f330d76SMark Nunberg             if (IS_NORMAL_NUMBER) {
4371f330d76SMark Nunberg                 /* Nothing */
4381f330d76SMark Nunberg             } else if (state->special_flags == JSONSL_SPECIALf_ZERO ||
4391f330d76SMark Nunberg                     state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) {
4401f330d76SMark Nunberg                 /* 0 is unsigned! */
4411f330d76SMark Nunberg                 state->special_flags = JSONSL_SPECIALf_UNSIGNED;
4421f330d76SMark Nunberg             } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
4431f330d76SMark Nunberg                 /* Still in dash! */
4441f330d76SMark Nunberg                 INVOKE_ERROR(INVALID_NUMBER);
4451f330d76SMark Nunberg             } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
4461f330d76SMark Nunberg                 /* Check that we're not at the end of a token */
4471f330d76SMark Nunberg                 if (STATE_NUM_LAST != '1') {
4481f330d76SMark Nunberg                     INVOKE_ERROR(INVALID_NUMBER);
4491f330d76SMark Nunberg                 }
4501f330d76SMark Nunberg             } else if (state->special_flags == JSONSL_SPECIALf_TRUE) {
4511f330d76SMark Nunberg                 if (STATE_SPECIAL_LENGTH != 4) {
4521f330d76SMark Nunberg                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
4531f330d76SMark Nunberg                 }
4541f330d76SMark Nunberg                 state->nelem = 1;
4551f330d76SMark Nunberg             } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
4561f330d76SMark Nunberg                 if (STATE_SPECIAL_LENGTH != 5) {
4571f330d76SMark Nunberg                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
4581f330d76SMark Nunberg                 }
4591f330d76SMark Nunberg             } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
4601f330d76SMark Nunberg                 if (STATE_SPECIAL_LENGTH != 4) {
4611f330d76SMark Nunberg                     INVOKE_ERROR(SPECIAL_INCOMPLETE);
4621f330d76SMark Nunberg                 }
4631f330d76SMark Nunberg             }
464e649f75aSMark Nunberg             SPECIAL_POP;
465e649f75aSMark Nunberg             jsn->expecting = ',';
466e649f75aSMark Nunberg             if (is_allowed_whitespace(CUR_CHAR)) {
467431730afSMark Nunberg                 CONTINUE_NEXT_CHAR();
468e649f75aSMark Nunberg             }
469e649f75aSMark Nunberg             /**
470e649f75aSMark Nunberg              * This works because we have a non-whitespace token
471e649f75aSMark Nunberg              * which is not a special token. If this is a structural
472e649f75aSMark Nunberg              * character then it will be gracefully handled by the
473e649f75aSMark Nunberg              * switch statement. Otherwise it will default to the 'special'
474e649f75aSMark Nunberg              * state again,
475e649f75aSMark Nunberg              */
476e649f75aSMark Nunberg             goto GT_STRUCTURAL_TOKEN;
477e649f75aSMark Nunberg         } else if (is_allowed_whitespace(CUR_CHAR)) {
478e649f75aSMark Nunberg             INCR_METRIC(ALLOWED_WHITESPACE);
479e649f75aSMark Nunberg             /* So we're not special. Harmless insignificant whitespace
480e649f75aSMark Nunberg              * passthrough
481e649f75aSMark Nunberg              */
482431730afSMark Nunberg             CONTINUE_NEXT_CHAR();
483e649f75aSMark Nunberg         } else if (extract_special(CUR_CHAR)) {
484e649f75aSMark Nunberg             /* not a string, whitespace, or structural token. must be special */
485e649f75aSMark Nunberg             goto GT_SPECIAL_BEGIN;
486e649f75aSMark Nunberg         }
487e649f75aSMark Nunberg 
488e649f75aSMark Nunberg         INCR_GENERIC(CUR_CHAR);
489e649f75aSMark Nunberg 
490e649f75aSMark Nunberg         if (CUR_CHAR == '"') {
491e649f75aSMark Nunberg             GT_QUOTE:
492e649f75aSMark Nunberg             jsn->can_insert = 0;
493e649f75aSMark Nunberg             switch (state_type) {
494e649f75aSMark Nunberg 
495e649f75aSMark Nunberg             /* the end of a string or hash key */
496e649f75aSMark Nunberg             case JSONSL_T_STRING:
497e649f75aSMark Nunberg                 CALLBACK_AND_POP(STRING);
498431730afSMark Nunberg                 CONTINUE_NEXT_CHAR();
499e649f75aSMark Nunberg             case JSONSL_T_HKEY:
500e649f75aSMark Nunberg                 CALLBACK_AND_POP(HKEY);
501431730afSMark Nunberg                 CONTINUE_NEXT_CHAR();
502e649f75aSMark Nunberg 
503e649f75aSMark Nunberg             case JSONSL_T_OBJECT:
504e649f75aSMark Nunberg                 state->nelem++;
505e649f75aSMark Nunberg                 if ( (state->nelem-1) % 2 ) {
506e649f75aSMark Nunberg                     /* Odd, this must be a hash value */
507e649f75aSMark Nunberg                     if (jsn->tok_last != ':') {
508e649f75aSMark Nunberg                         INVOKE_ERROR(MISSING_TOKEN);
509e649f75aSMark Nunberg                     }
510e649f75aSMark Nunberg                     jsn->expecting = ','; /* Can't figure out what to expect next */
511e649f75aSMark Nunberg                     jsn->tok_last = 0;
512e649f75aSMark Nunberg 
513e649f75aSMark Nunberg                     STACK_PUSH;
514e649f75aSMark Nunberg                     state->type = JSONSL_T_STRING;
515e649f75aSMark Nunberg                     DO_CALLBACK(STRING, PUSH);
516e649f75aSMark Nunberg 
517e649f75aSMark Nunberg                 } else {
518e649f75aSMark Nunberg                     /* hash key */
519e649f75aSMark Nunberg                     if (jsn->expecting != '"') {
520e649f75aSMark Nunberg                         INVOKE_ERROR(STRAY_TOKEN);
521e649f75aSMark Nunberg                     }
522e649f75aSMark Nunberg                     jsn->tok_last = 0;
523e649f75aSMark Nunberg                     jsn->expecting = ':';
524e649f75aSMark Nunberg 
525e649f75aSMark Nunberg                     STACK_PUSH;
526e649f75aSMark Nunberg                     state->type = JSONSL_T_HKEY;
527e649f75aSMark Nunberg                     DO_CALLBACK(HKEY, PUSH);
528e649f75aSMark Nunberg                 }
529431730afSMark Nunberg                 CONTINUE_NEXT_CHAR();
530e649f75aSMark Nunberg 
531e649f75aSMark Nunberg             case JSONSL_T_LIST:
532e649f75aSMark Nunberg                 state->nelem++;
533e649f75aSMark Nunberg                 STACK_PUSH;
534e649f75aSMark Nunberg                 state->type = JSONSL_T_STRING;
535e649f75aSMark Nunberg                 jsn->expecting = ',';
536e649f75aSMark Nunberg                 jsn->tok_last = 0;
537e649f75aSMark Nunberg                 DO_CALLBACK(STRING, PUSH);
538431730afSMark Nunberg                 CONTINUE_NEXT_CHAR();
539e649f75aSMark Nunberg 
540e649f75aSMark Nunberg             case JSONSL_T_SPECIAL:
541e649f75aSMark Nunberg                 INVOKE_ERROR(STRAY_TOKEN);
542e649f75aSMark Nunberg                 break;
543e649f75aSMark Nunberg 
544e649f75aSMark Nunberg             default:
545e649f75aSMark Nunberg                 INVOKE_ERROR(STRING_OUTSIDE_CONTAINER);
546e649f75aSMark Nunberg                 break;
547e649f75aSMark Nunberg             } /* switch(state->type) */
548e649f75aSMark Nunberg         } else if (CUR_CHAR == '\\') {
549e649f75aSMark Nunberg             GT_ESCAPE:
550e649f75aSMark Nunberg             INCR_METRIC(ESCAPES);
551e649f75aSMark Nunberg         /* Escape */
552e649f75aSMark Nunberg             if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) {
553e649f75aSMark Nunberg                 INVOKE_ERROR(ESCAPE_OUTSIDE_STRING);
554e649f75aSMark Nunberg             }
555e649f75aSMark Nunberg             state->nescapes++;
556e649f75aSMark Nunberg             jsn->in_escape = 1;
557431730afSMark Nunberg             CONTINUE_NEXT_CHAR();
558e649f75aSMark Nunberg         } /* " or \ */
559e649f75aSMark Nunberg 
560e649f75aSMark Nunberg         GT_STRUCTURAL_TOKEN:
561e649f75aSMark Nunberg         switch (CUR_CHAR) {
562e649f75aSMark Nunberg         case ':':
563e649f75aSMark Nunberg             INCR_METRIC(STRUCTURAL_TOKEN);
564e649f75aSMark Nunberg             if (jsn->expecting != CUR_CHAR) {
565e649f75aSMark Nunberg                 INVOKE_ERROR(STRAY_TOKEN);
566e649f75aSMark Nunberg             }
567e649f75aSMark Nunberg             jsn->tok_last = ':';
568e649f75aSMark Nunberg             jsn->can_insert = 1;
569e649f75aSMark Nunberg             jsn->expecting = '"';
570431730afSMark Nunberg             CONTINUE_NEXT_CHAR();
571e649f75aSMark Nunberg 
572e649f75aSMark Nunberg         case ',':
573e649f75aSMark Nunberg             INCR_METRIC(STRUCTURAL_TOKEN);
574e649f75aSMark Nunberg             /**
575e649f75aSMark Nunberg              * The comma is one of the more generic tokens.
576e649f75aSMark Nunberg              * In the context of an OBJECT, the can_insert flag
577e649f75aSMark Nunberg              * should never be set, and no other action is
578e649f75aSMark Nunberg              * necessary.
579e649f75aSMark Nunberg              */
580e649f75aSMark Nunberg             if (jsn->expecting != CUR_CHAR) {
581e649f75aSMark Nunberg                 /* make this branch execute only when we haven't manually
582e649f75aSMark Nunberg                  * just placed the ',' in the expecting register.
583e649f75aSMark Nunberg                  */
584e649f75aSMark Nunberg                 INVOKE_ERROR(STRAY_TOKEN);
585e649f75aSMark Nunberg             }
586e649f75aSMark Nunberg 
587e649f75aSMark Nunberg             if (state->type == JSONSL_T_OBJECT) {
588e649f75aSMark Nunberg                 /* end of hash value, expect a string as a hash key */
589e649f75aSMark Nunberg                 jsn->expecting = '"';
590e649f75aSMark Nunberg             } else {
591e649f75aSMark Nunberg                 jsn->can_insert = 1;
592e649f75aSMark Nunberg             }
593e649f75aSMark Nunberg 
594e649f75aSMark Nunberg             jsn->tok_last = ',';
595e649f75aSMark Nunberg             jsn->expecting = '"';
596431730afSMark Nunberg             CONTINUE_NEXT_CHAR();
597e649f75aSMark Nunberg 
598e649f75aSMark Nunberg             /* new list or object */
599e649f75aSMark Nunberg             /* hashes are more common */
600e649f75aSMark Nunberg         case '{':
601e649f75aSMark Nunberg         case '[':
602e649f75aSMark Nunberg             INCR_METRIC(STRUCTURAL_TOKEN);
603e649f75aSMark Nunberg             if (!jsn->can_insert) {
604e649f75aSMark Nunberg                 INVOKE_ERROR(CANT_INSERT);
605e649f75aSMark Nunberg             }
606e649f75aSMark Nunberg 
607e649f75aSMark Nunberg             ENSURE_HVAL;
608e649f75aSMark Nunberg             state->nelem++;
609e649f75aSMark Nunberg 
610e649f75aSMark Nunberg             STACK_PUSH;
611e649f75aSMark Nunberg             /* because the constants match the opening delimiters, we can do this: */
612e649f75aSMark Nunberg             state->type = CUR_CHAR;
613e649f75aSMark Nunberg             state->nelem = 0;
614e649f75aSMark Nunberg             jsn->can_insert = 1;
615e649f75aSMark Nunberg             if (CUR_CHAR == '{') {
616e649f75aSMark Nunberg                 /* If we're a hash, we expect a key first, which is quouted */
617e649f75aSMark Nunberg                 jsn->expecting = '"';
618e649f75aSMark Nunberg             }
619e649f75aSMark Nunberg             if (CUR_CHAR == JSONSL_T_OBJECT) {
620e649f75aSMark Nunberg                 DO_CALLBACK(OBJECT, PUSH);
621e649f75aSMark Nunberg             } else {
622e649f75aSMark Nunberg                 DO_CALLBACK(LIST, PUSH);
623e649f75aSMark Nunberg             }
624e649f75aSMark Nunberg             jsn->tok_last = 0;
625431730afSMark Nunberg             CONTINUE_NEXT_CHAR();
626e649f75aSMark Nunberg 
627e649f75aSMark Nunberg             /* closing of list or object */
628e649f75aSMark Nunberg         case '}':
629e649f75aSMark Nunberg         case ']':
630e649f75aSMark Nunberg             INCR_METRIC(STRUCTURAL_TOKEN);
631e649f75aSMark Nunberg             if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) {
632e649f75aSMark Nunberg                 INVOKE_ERROR(TRAILING_COMMA);
633e649f75aSMark Nunberg             }
634e649f75aSMark Nunberg 
635e649f75aSMark Nunberg             jsn->can_insert = 0;
636e649f75aSMark Nunberg             jsn->level--;
637e649f75aSMark Nunberg             jsn->expecting = ',';
638e649f75aSMark Nunberg             jsn->tok_last = 0;
639e649f75aSMark Nunberg             if (CUR_CHAR == ']') {
640e649f75aSMark Nunberg                 if (state->type != '[') {
641e649f75aSMark Nunberg                     INVOKE_ERROR(BRACKET_MISMATCH);
642e649f75aSMark Nunberg                 }
643e649f75aSMark Nunberg                 DO_CALLBACK(LIST, POP);
644e649f75aSMark Nunberg             } else {
645e649f75aSMark Nunberg                 if (state->type != '{') {
646e649f75aSMark Nunberg                     INVOKE_ERROR(BRACKET_MISMATCH);
6471f330d76SMark Nunberg                 } else if (state->nelem && state->nelem % 2 != 0) {
6481f330d76SMark Nunberg                     INVOKE_ERROR(VALUE_EXPECTED);
649e649f75aSMark Nunberg                 }
650e649f75aSMark Nunberg                 DO_CALLBACK(OBJECT, POP);
651e649f75aSMark Nunberg             }
652e649f75aSMark Nunberg             state = jsn->stack + jsn->level;
653e649f75aSMark Nunberg             state->pos_cur = jsn->pos;
654431730afSMark Nunberg             CONTINUE_NEXT_CHAR();
655e649f75aSMark Nunberg 
656e649f75aSMark Nunberg         default:
657e649f75aSMark Nunberg             GT_SPECIAL_BEGIN:
658e649f75aSMark Nunberg             /**
659e649f75aSMark Nunberg              * Not a string, not a structural token, and not benign whitespace.
660e649f75aSMark Nunberg              * Technically we should iterate over the character always, but since
661e649f75aSMark Nunberg              * we are not doing full numerical/value decoding anyway (but only hinting),
662e649f75aSMark Nunberg              * we only check upon entry.
663e649f75aSMark Nunberg              */
664e649f75aSMark Nunberg             if (state->type != JSONSL_T_SPECIAL) {
665e649f75aSMark Nunberg                 int special_flags = extract_special(CUR_CHAR);
666e649f75aSMark Nunberg                 if (!special_flags) {
667e649f75aSMark Nunberg                     /**
668e649f75aSMark Nunberg                      * Try to do some heuristics here anyway to figure out what kind of
669e649f75aSMark Nunberg                      * error this is. The 'special' case is a fallback scenario anyway.
670e649f75aSMark Nunberg                      */
671e649f75aSMark Nunberg                     if (CUR_CHAR == '\0') {
672e649f75aSMark Nunberg                         INVOKE_ERROR(FOUND_NULL_BYTE);
673e649f75aSMark Nunberg                     } else if (CUR_CHAR < 0x20) {
674e649f75aSMark Nunberg                         INVOKE_ERROR(WEIRD_WHITESPACE);
675e649f75aSMark Nunberg                     } else {
676e649f75aSMark Nunberg                         INVOKE_ERROR(SPECIAL_EXPECTED);
677e649f75aSMark Nunberg                     }
678e649f75aSMark Nunberg                 }
679e649f75aSMark Nunberg                 ENSURE_HVAL;
680e649f75aSMark Nunberg                 state->nelem++;
681e649f75aSMark Nunberg                 if (!jsn->can_insert) {
682e649f75aSMark Nunberg                     INVOKE_ERROR(CANT_INSERT);
683e649f75aSMark Nunberg                 }
684e649f75aSMark Nunberg                 STACK_PUSH;
685e649f75aSMark Nunberg                 state->type = JSONSL_T_SPECIAL;
686e649f75aSMark Nunberg                 state->special_flags = special_flags;
6871f330d76SMark Nunberg                 STATE_SPECIAL_LENGTH = 1;
6881f330d76SMark Nunberg 
689e649f75aSMark Nunberg                 if (special_flags == JSONSL_SPECIALf_UNSIGNED) {
690e649f75aSMark Nunberg                     state->nelem = CUR_CHAR - 0x30;
6911f330d76SMark Nunberg                     STATE_NUM_LAST = '1';
692e649f75aSMark Nunberg                 } else {
6931f330d76SMark Nunberg                     STATE_NUM_LAST = '-';
694e649f75aSMark Nunberg                     state->nelem = 0;
695e649f75aSMark Nunberg                 }
696e649f75aSMark Nunberg                 DO_CALLBACK(SPECIAL, PUSH);
697e649f75aSMark Nunberg             }
698431730afSMark Nunberg             CONTINUE_NEXT_CHAR();
699e649f75aSMark Nunberg         }
700e649f75aSMark Nunberg     }
701e649f75aSMark Nunberg }
702e649f75aSMark Nunberg 
703e649f75aSMark Nunberg JSONSL_API
jsonsl_strerror(jsonsl_error_t err)704e649f75aSMark Nunberg const char* jsonsl_strerror(jsonsl_error_t err)
705e649f75aSMark Nunberg {
706e649f75aSMark Nunberg     if (err == JSONSL_ERROR_SUCCESS) {
707e649f75aSMark Nunberg         return "SUCCESS";
708e649f75aSMark Nunberg     }
709e649f75aSMark Nunberg #define X(t) \
710e649f75aSMark Nunberg     if (err == JSONSL_ERROR_##t) \
711e649f75aSMark Nunberg         return #t;
712e649f75aSMark Nunberg     JSONSL_XERR;
713e649f75aSMark Nunberg #undef X
714e649f75aSMark Nunberg     return "<UNKNOWN_ERROR>";
715e649f75aSMark Nunberg }
716e649f75aSMark Nunberg 
717e649f75aSMark Nunberg JSONSL_API
jsonsl_strtype(jsonsl_type_t type)718e649f75aSMark Nunberg const char *jsonsl_strtype(jsonsl_type_t type)
719e649f75aSMark Nunberg {
720e649f75aSMark Nunberg #define X(o,c) \
721e649f75aSMark Nunberg     if (type == JSONSL_T_##o) \
722e649f75aSMark Nunberg         return #o;
723e649f75aSMark Nunberg     JSONSL_XTYPE
724e649f75aSMark Nunberg #undef X
725e649f75aSMark Nunberg     return "UNKNOWN TYPE";
726e649f75aSMark Nunberg 
727e649f75aSMark Nunberg }
728e649f75aSMark Nunberg 
729e649f75aSMark Nunberg /*
730e649f75aSMark Nunberg  *
731e649f75aSMark Nunberg  * JPR/JSONPointer functions
732e649f75aSMark Nunberg  *
733e649f75aSMark Nunberg  *
734e649f75aSMark Nunberg  */
735e649f75aSMark Nunberg #ifndef JSONSL_NO_JPR
736e649f75aSMark Nunberg static
737e649f75aSMark Nunberg jsonsl_jpr_type_t
populate_component(char * in,struct jsonsl_jpr_component_st * component,char ** next,jsonsl_error_t * errp)738e649f75aSMark Nunberg populate_component(char *in,
739e649f75aSMark Nunberg                    struct jsonsl_jpr_component_st *component,
740e649f75aSMark Nunberg                    char **next,
741e649f75aSMark Nunberg                    jsonsl_error_t *errp)
742e649f75aSMark Nunberg {
743e649f75aSMark Nunberg     unsigned long pctval;
744e649f75aSMark Nunberg     char *c = NULL, *outp = NULL, *end = NULL;
745e649f75aSMark Nunberg     size_t input_len;
746e649f75aSMark Nunberg     jsonsl_jpr_type_t ret = JSONSL_PATH_NONE;
747e649f75aSMark Nunberg 
748e649f75aSMark Nunberg     if (*next == NULL || *(*next) == '\0') {
749e649f75aSMark Nunberg         return JSONSL_PATH_NONE;
750e649f75aSMark Nunberg     }
751e649f75aSMark Nunberg 
752e649f75aSMark Nunberg     /* Replace the next / with a NULL */
753e649f75aSMark Nunberg     *next = strstr(in, "/");
754e649f75aSMark Nunberg     if (*next != NULL) {
755e649f75aSMark Nunberg         *(*next) = '\0'; /* drop the forward slash */
756e649f75aSMark Nunberg         input_len = *next - in;
757e649f75aSMark Nunberg         end = *next;
758e649f75aSMark Nunberg         *next += 1; /* next character after the '/' */
759e649f75aSMark Nunberg     } else {
760e649f75aSMark Nunberg         input_len = strlen(in);
761e649f75aSMark Nunberg         end = in + input_len + 1;
762e649f75aSMark Nunberg     }
763e649f75aSMark Nunberg 
764e649f75aSMark Nunberg     component->pstr = in;
765e649f75aSMark Nunberg 
766e649f75aSMark Nunberg     /* Check for special components of interest */
767e649f75aSMark Nunberg     if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) {
768e649f75aSMark Nunberg         /* Lone wildcard */
769e649f75aSMark Nunberg         ret = JSONSL_PATH_WILDCARD;
770e649f75aSMark Nunberg         goto GT_RET;
771e649f75aSMark Nunberg     } else if (isdigit(*in)) {
772e649f75aSMark Nunberg         /* ASCII Numeric */
773e649f75aSMark Nunberg         char *endptr;
774e649f75aSMark Nunberg         component->idx = strtoul(in, &endptr, 10);
775e649f75aSMark Nunberg         if (endptr && *endptr == '\0') {
776e649f75aSMark Nunberg             ret = JSONSL_PATH_NUMERIC;
777e649f75aSMark Nunberg             goto GT_RET;
778e649f75aSMark Nunberg         }
779e649f75aSMark Nunberg     }
780e649f75aSMark Nunberg 
781e649f75aSMark Nunberg     /* Default, it's a string */
782e649f75aSMark Nunberg     ret = JSONSL_PATH_STRING;
783e649f75aSMark Nunberg     for (c = outp = in; c < end; c++, outp++) {
784e649f75aSMark Nunberg         char origc;
785e649f75aSMark Nunberg         if (*c != '%') {
786e649f75aSMark Nunberg             goto GT_ASSIGN;
787e649f75aSMark Nunberg         }
788e649f75aSMark Nunberg         /*
789e649f75aSMark Nunberg          * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' }
790e649f75aSMark Nunberg          */
791e649f75aSMark Nunberg 
792e649f75aSMark Nunberg         /* Need %XX */
793e649f75aSMark Nunberg         if (c+2 >= end) {
794e649f75aSMark Nunberg             *errp = JSONSL_ERROR_PERCENT_BADHEX;
795e649f75aSMark Nunberg             return JSONSL_PATH_INVALID;
796e649f75aSMark Nunberg         }
797e649f75aSMark Nunberg         if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) {
798e649f75aSMark Nunberg             *errp = JSONSL_ERROR_PERCENT_BADHEX;
799e649f75aSMark Nunberg             return JSONSL_PATH_INVALID;
800e649f75aSMark Nunberg         }
801e649f75aSMark Nunberg 
802e649f75aSMark Nunberg         /* Temporarily null-terminate the characters */
803e649f75aSMark Nunberg         origc = *(c+3);
804e649f75aSMark Nunberg         *(c+3) = '\0';
805e649f75aSMark Nunberg         pctval = strtoul(c+1, NULL, 16);
806e649f75aSMark Nunberg         *(c+3) = origc;
807e649f75aSMark Nunberg 
808e649f75aSMark Nunberg         *outp = (char) pctval;
809e649f75aSMark Nunberg         c += 2;
810e649f75aSMark Nunberg         continue;
811e649f75aSMark Nunberg 
812e649f75aSMark Nunberg         GT_ASSIGN:
813e649f75aSMark Nunberg         *outp = *c;
814e649f75aSMark Nunberg     }
815e649f75aSMark Nunberg     /* Null-terminate the string */
816e649f75aSMark Nunberg     for (; outp < c; outp++) {
817e649f75aSMark Nunberg         *outp = '\0';
818e649f75aSMark Nunberg     }
819e649f75aSMark Nunberg 
820e649f75aSMark Nunberg     GT_RET:
821e649f75aSMark Nunberg     component->ptype = ret;
822e649f75aSMark Nunberg     if (ret != JSONSL_PATH_WILDCARD) {
823e649f75aSMark Nunberg         component->len = strlen(component->pstr);
824e649f75aSMark Nunberg     }
825e649f75aSMark Nunberg     return ret;
826e649f75aSMark Nunberg }
827e649f75aSMark Nunberg 
828e649f75aSMark Nunberg JSONSL_API
829e649f75aSMark Nunberg jsonsl_jpr_t
jsonsl_jpr_new(const char * path,jsonsl_error_t * errp)830e649f75aSMark Nunberg jsonsl_jpr_new(const char *path, jsonsl_error_t *errp)
831e649f75aSMark Nunberg {
832e649f75aSMark Nunberg     char *my_copy = NULL;
833e649f75aSMark Nunberg     int count, curidx;
834e649f75aSMark Nunberg     struct jsonsl_jpr_st *ret = NULL;
835e649f75aSMark Nunberg     struct jsonsl_jpr_component_st *components = NULL;
836e649f75aSMark Nunberg     size_t origlen;
837e649f75aSMark Nunberg     jsonsl_error_t errstacked;
838e649f75aSMark Nunberg 
839e649f75aSMark Nunberg #define JPR_BAIL(err) *errp = err; goto GT_ERROR;
840e649f75aSMark Nunberg 
841e649f75aSMark Nunberg     if (errp == NULL) {
842e649f75aSMark Nunberg         errp = &errstacked;
843e649f75aSMark Nunberg     }
844e649f75aSMark Nunberg 
845e649f75aSMark Nunberg     if (path == NULL || *path != '/') {
846e649f75aSMark Nunberg         JPR_BAIL(JSONSL_ERROR_JPR_NOROOT);
847e649f75aSMark Nunberg         return NULL;
848e649f75aSMark Nunberg     }
849e649f75aSMark Nunberg 
850e649f75aSMark Nunberg     count = 1;
851e649f75aSMark Nunberg     path++;
852e649f75aSMark Nunberg     {
853e649f75aSMark Nunberg         const char *c = path;
854e649f75aSMark Nunberg         for (; *c; c++) {
855e649f75aSMark Nunberg             if (*c == '/') {
856e649f75aSMark Nunberg                 count++;
857e649f75aSMark Nunberg                 if (*(c+1) == '/') {
858e649f75aSMark Nunberg                     JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH);
859e649f75aSMark Nunberg                 }
860e649f75aSMark Nunberg             }
861e649f75aSMark Nunberg         }
862e649f75aSMark Nunberg     }
863e649f75aSMark Nunberg     if(*path) {
864e649f75aSMark Nunberg         count++;
865e649f75aSMark Nunberg     }
866e649f75aSMark Nunberg 
867bfb89cbcSMark Nunberg     components = (struct jsonsl_jpr_component_st *)
868bfb89cbcSMark Nunberg             malloc(sizeof(*components) * count);
869e649f75aSMark Nunberg     if (!components) {
870e649f75aSMark Nunberg         JPR_BAIL(JSONSL_ERROR_ENOMEM);
871e649f75aSMark Nunberg     }
872e649f75aSMark Nunberg 
873bfb89cbcSMark Nunberg     my_copy = (char *)malloc(strlen(path) + 1);
874e649f75aSMark Nunberg     if (!my_copy) {
875e649f75aSMark Nunberg         JPR_BAIL(JSONSL_ERROR_ENOMEM);
876e649f75aSMark Nunberg     }
877e649f75aSMark Nunberg 
878e649f75aSMark Nunberg     strcpy(my_copy, path);
879e649f75aSMark Nunberg 
880e649f75aSMark Nunberg     components[0].ptype = JSONSL_PATH_ROOT;
881e649f75aSMark Nunberg 
882e649f75aSMark Nunberg     if (*my_copy) {
883e649f75aSMark Nunberg         char *cur = my_copy;
884e649f75aSMark Nunberg         int pathret = JSONSL_PATH_STRING;
885e649f75aSMark Nunberg         curidx = 1;
886e649f75aSMark Nunberg         while (pathret > 0 && curidx < count) {
887e649f75aSMark Nunberg             pathret = populate_component(cur, components + curidx, &cur, errp);
888e649f75aSMark Nunberg             if (pathret > 0) {
889e649f75aSMark Nunberg                 curidx++;
890e649f75aSMark Nunberg             } else {
891e649f75aSMark Nunberg                 break;
892e649f75aSMark Nunberg             }
893e649f75aSMark Nunberg         }
894e649f75aSMark Nunberg 
895e649f75aSMark Nunberg         if (pathret == JSONSL_PATH_INVALID) {
896e649f75aSMark Nunberg             JPR_BAIL(JSONSL_ERROR_JPR_BADPATH);
897e649f75aSMark Nunberg         }
898e649f75aSMark Nunberg     } else {
899e649f75aSMark Nunberg         curidx = 1;
900e649f75aSMark Nunberg     }
901e649f75aSMark Nunberg 
902e649f75aSMark Nunberg     path--; /*revert path to leading '/' */
903e649f75aSMark Nunberg     origlen = strlen(path) + 1;
904bfb89cbcSMark Nunberg     ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret));
905e649f75aSMark Nunberg     if (!ret) {
906e649f75aSMark Nunberg         JPR_BAIL(JSONSL_ERROR_ENOMEM);
907e649f75aSMark Nunberg     }
908bfb89cbcSMark Nunberg     ret->orig = (char *)malloc(origlen);
909e649f75aSMark Nunberg     if (!ret->orig) {
910e649f75aSMark Nunberg         JPR_BAIL(JSONSL_ERROR_ENOMEM);
911e649f75aSMark Nunberg     }
912e649f75aSMark Nunberg     ret->components = components;
913e649f75aSMark Nunberg     ret->ncomponents = curidx;
914e649f75aSMark Nunberg     ret->basestr = my_copy;
915e649f75aSMark Nunberg     ret->norig = origlen-1;
916e649f75aSMark Nunberg     strcpy(ret->orig, path);
917e649f75aSMark Nunberg 
918e649f75aSMark Nunberg     return ret;
919e649f75aSMark Nunberg 
920e649f75aSMark Nunberg     GT_ERROR:
921e649f75aSMark Nunberg     free(my_copy);
922e649f75aSMark Nunberg     free(components);
923e649f75aSMark Nunberg     if (ret) {
924e649f75aSMark Nunberg         free(ret->orig);
925e649f75aSMark Nunberg     }
926e649f75aSMark Nunberg     free(ret);
927e649f75aSMark Nunberg     return NULL;
928e649f75aSMark Nunberg #undef JPR_BAIL
929e649f75aSMark Nunberg }
930e649f75aSMark Nunberg 
jsonsl_jpr_destroy(jsonsl_jpr_t jpr)931e649f75aSMark Nunberg void jsonsl_jpr_destroy(jsonsl_jpr_t jpr)
932e649f75aSMark Nunberg {
933e649f75aSMark Nunberg     free(jpr->components);
934e649f75aSMark Nunberg     free(jpr->basestr);
935e649f75aSMark Nunberg     free(jpr->orig);
936e649f75aSMark Nunberg     free(jpr);
937e649f75aSMark Nunberg }
938e649f75aSMark Nunberg 
939*5cf05eafSMark Nunberg /**
940*5cf05eafSMark Nunberg  * Call when there is a possibility of a match, either as a final match or
941*5cf05eafSMark Nunberg  * as a path within a match
942*5cf05eafSMark Nunberg  * @param jpr The JPR path
943*5cf05eafSMark Nunberg  * @param component Component corresponding to the current element
944*5cf05eafSMark Nunberg  * @param prlevel The level of the *parent*
945*5cf05eafSMark Nunberg  * @param chtype The type of the child
946*5cf05eafSMark Nunberg  * @return Match status
947*5cf05eafSMark Nunberg  */
948*5cf05eafSMark Nunberg static jsonsl_jpr_match_t
jsonsl__match_continue(jsonsl_jpr_t jpr,const struct jsonsl_jpr_component_st * component,unsigned prlevel,unsigned chtype)949*5cf05eafSMark Nunberg jsonsl__match_continue(jsonsl_jpr_t jpr,
950*5cf05eafSMark Nunberg                        const struct jsonsl_jpr_component_st *component,
951*5cf05eafSMark Nunberg                        unsigned prlevel, unsigned chtype)
952*5cf05eafSMark Nunberg {
953*5cf05eafSMark Nunberg     const struct jsonsl_jpr_component_st *next_comp = component + 1;
954*5cf05eafSMark Nunberg     if (prlevel == jpr->ncomponents - 1) {
955*5cf05eafSMark Nunberg         /* This is the match. Check the expected type of the match against
956*5cf05eafSMark Nunberg          * the child */
957*5cf05eafSMark Nunberg         if (jpr->match_type == 0 || jpr->match_type == chtype) {
958*5cf05eafSMark Nunberg             return JSONSL_MATCH_COMPLETE;
959*5cf05eafSMark Nunberg         } else {
960*5cf05eafSMark Nunberg             return JSONSL_MATCH_TYPE_MISMATCH;
961*5cf05eafSMark Nunberg         }
962*5cf05eafSMark Nunberg     }
963*5cf05eafSMark Nunberg     if (chtype == JSONSL_T_LIST) {
964*5cf05eafSMark Nunberg         if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
965*5cf05eafSMark Nunberg             return JSONSL_MATCH_POSSIBLE;
966*5cf05eafSMark Nunberg         } else {
967*5cf05eafSMark Nunberg             return JSONSL_MATCH_TYPE_MISMATCH;
968*5cf05eafSMark Nunberg         }
969*5cf05eafSMark Nunberg     } else if (chtype == JSONSL_T_OBJECT) {
970*5cf05eafSMark Nunberg         if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
971*5cf05eafSMark Nunberg             return JSONSL_MATCH_TYPE_MISMATCH;
972*5cf05eafSMark Nunberg         } else {
973*5cf05eafSMark Nunberg             return JSONSL_MATCH_POSSIBLE;
974*5cf05eafSMark Nunberg         }
975*5cf05eafSMark Nunberg     } else {
976*5cf05eafSMark Nunberg         return JSONSL_MATCH_TYPE_MISMATCH;
977*5cf05eafSMark Nunberg     }
978*5cf05eafSMark Nunberg }
979*5cf05eafSMark Nunberg 
980*5cf05eafSMark Nunberg JSONSL_API
981*5cf05eafSMark Nunberg jsonsl_jpr_match_t
jsonsl_path_match(jsonsl_jpr_t jpr,const struct jsonsl_state_st * parent,const struct jsonsl_state_st * child,const char * key,size_t nkey)982*5cf05eafSMark Nunberg jsonsl_path_match(jsonsl_jpr_t jpr,
983*5cf05eafSMark Nunberg                   const struct jsonsl_state_st *parent,
984*5cf05eafSMark Nunberg                   const struct jsonsl_state_st *child,
985*5cf05eafSMark Nunberg                   const char *key, size_t nkey)
986*5cf05eafSMark Nunberg {
987*5cf05eafSMark Nunberg     const struct jsonsl_jpr_component_st *comp;
988*5cf05eafSMark Nunberg     if (!parent) {
989*5cf05eafSMark Nunberg         /* No parent. Return immediately since it's always a match */
990*5cf05eafSMark Nunberg         return jsonsl__match_continue(jpr, jpr->components, 0, child->type);
991*5cf05eafSMark Nunberg     }
992*5cf05eafSMark Nunberg 
993*5cf05eafSMark Nunberg     comp = jpr->components +