1bfb89cbcSMark Nunberg/* Copyright (C) 2012-2015 Mark Nunberg.
2bfb89cbcSMark Nunberg *
3bfb89cbcSMark Nunberg * See included LICENSE file for license details.
4bfb89cbcSMark Nunberg */
5bfb89cbcSMark Nunberg
6e649f75aSMark Nunberg#include "jsonsl.h"
7e649f75aSMark Nunberg#include <assert.h>
8e649f75aSMark Nunberg#include <limits.h>
9e649f75aSMark Nunberg#include <ctype.h>
10e649f75aSMark Nunberg
11e649f75aSMark Nunberg#ifdef JSONSL_USE_METRICS
12e649f75aSMark Nunberg#define XMETRICS \
13e649f75aSMark Nunberg    X(STRINGY_INSIGNIFICANT) \
14e649f75aSMark Nunberg    X(STRINGY_SLOWPATH) \
15e649f75aSMark Nunberg    X(ALLOWED_WHITESPACE) \
16e649f75aSMark Nunberg    X(QUOTE_FASTPATH) \
17e649f75aSMark Nunberg    X(SPECIAL_FASTPATH) \
18e649f75aSMark Nunberg    X(SPECIAL_WSPOP) \
19e649f75aSMark Nunberg    X(SPECIAL_SLOWPATH) \
20e649f75aSMark Nunberg    X(GENERIC) \
21e649f75aSMark Nunberg    X(STRUCTURAL_TOKEN) \
22e649f75aSMark Nunberg    X(SPECIAL_SWITCHFIRST) \
23e649f75aSMark Nunberg    X(STRINGY_CATCH) \
24431730afSMark Nunberg    X(NUMBER_FASTPATH) \
25e649f75aSMark Nunberg    X(ESCAPES) \
26e649f75aSMark Nunberg    X(TOTAL) \
27e649f75aSMark Nunberg
28e649f75aSMark Nunbergstruct jsonsl_metrics_st {
29e649f75aSMark Nunberg#define X(m) \
30e649f75aSMark Nunberg    unsigned long metric_##m;
31e649f75aSMark Nunberg    XMETRICS
32e649f75aSMark Nunberg#undef X
33e649f75aSMark Nunberg};
34e649f75aSMark Nunberg
35e649f75aSMark Nunbergstatic struct jsonsl_metrics_st GlobalMetrics = { 0 };
36e649f75aSMark Nunbergstatic unsigned long GenericCounter[0x100] = { 0 };
37e649f75aSMark Nunbergstatic unsigned long StringyCatchCounter[0x100] = { 0 };
38e649f75aSMark Nunberg
39e649f75aSMark Nunberg#define INCR_METRIC(m) \
40e649f75aSMark Nunberg    GlobalMetrics.metric_##m++;
41e649f75aSMark Nunberg
42e649f75aSMark Nunberg#define INCR_GENERIC(c) \
43e649f75aSMark Nunberg        INCR_METRIC(GENERIC); \
44e649f75aSMark Nunberg        GenericCounter[c]++; \
45e649f75aSMark Nunberg
46e649f75aSMark Nunberg#define INCR_STRINGY_CATCH(c) \
47e649f75aSMark Nunberg    INCR_METRIC(STRINGY_CATCH); \
48e649f75aSMark Nunberg    StringyCatchCounter[c]++;
49e649f75aSMark Nunberg
50e649f75aSMark NunbergJSONSL_API
51e649f75aSMark Nunbergvoid jsonsl_dump_global_metrics(void)
52e649f75aSMark Nunberg{
53e649f75aSMark Nunberg    int ii;
54e649f75aSMark Nunberg    printf("JSONSL Metrics:\n");
55e649f75aSMark Nunberg#define X(m) \
56e649f75aSMark Nunberg    printf("\t%-30s %20lu (%0.2f%%)\n", #m, GlobalMetrics.metric_##m, \
57e649f75aSMark Nunberg           (float)((float)(GlobalMetrics.metric_##m/(float)GlobalMetrics.metric_TOTAL)) * 100);
58e649f75aSMark Nunberg    XMETRICS
59e649f75aSMark Nunberg#undef X
60e649f75aSMark Nunberg    printf("Generic Characters:\n");
61e649f75aSMark Nunberg    for (ii = 0; ii < 0xff; ii++) {
62e649f75aSMark Nunberg        if (GenericCounter[ii]) {
63e649f75aSMark Nunberg            printf("\t[ %c ] %lu\n", ii, GenericCounter[ii]);
64e649f75aSMark Nunberg        }
65e649f75aSMark Nunberg    }
66e649f75aSMark Nunberg    printf("Weird string loop\n");
67e649f75aSMark Nunberg    for (ii = 0; ii < 0xff; ii++) {
68e649f75aSMark Nunberg        if (StringyCatchCounter[ii]) {
69e649f75aSMark Nunberg            printf("\t[ %c ] %lu\n", ii, StringyCatchCounter[ii]);
70e649f75aSMark Nunberg        }
71e649f75aSMark Nunberg    }
72e649f75aSMark Nunberg}
73e649f75aSMark Nunberg
74e649f75aSMark Nunberg#else
75e649f75aSMark Nunberg#define INCR_METRIC(m)
76e649f75aSMark Nunberg#define INCR_GENERIC(c)
77e649f75aSMark Nunberg#define INCR_STRINGY_CATCH(c)
78e649f75aSMark NunbergJSONSL_API
79e649f75aSMark Nunbergvoid jsonsl_dump_global_metrics(void) { }
80e649f75aSMark Nunberg#endif /* JSONSL_USE_METRICS */
81e649f75aSMark Nunberg
82e649f75aSMark Nunberg#define CASE_DIGITS \
83e649f75aSMark Nunbergcase '1': \
84e649f75aSMark Nunbergcase '2': \
85e649f75aSMark Nunbergcase '3': \
86e649f75aSMark Nunbergcase '4': \
87e649f75aSMark Nunbergcase '5': \
88e649f75aSMark Nunbergcase '6': \
89e649f75aSMark Nunbergcase '7': \
90e649f75aSMark Nunbergcase '8': \
91e649f75aSMark Nunbergcase '9': \
92e649f75aSMark Nunbergcase '0':
93e649f75aSMark Nunberg
94bfb89cbcSMark Nunbergstatic unsigned extract_special(unsigned);
95bfb89cbcSMark Nunbergstatic int is_special_end(unsigned);
96bfb89cbcSMark Nunbergstatic int is_allowed_whitespace(unsigned);
97bfb89cbcSMark Nunbergstatic int is_allowed_escape(unsigned);
985cf05eafSMark Nunbergstatic int is_simple_char(unsigned);
99bfb89cbcSMark Nunbergstatic char get_escape_equiv(unsigned);
100e649f75aSMark Nunberg
101e649f75aSMark NunbergJSONSL_API
102e649f75aSMark Nunbergjsonsl_t jsonsl_new(int nlevels)
103e649f75aSMark Nunberg{
104bfb89cbcSMark Nunberg    struct jsonsl_st *jsn = (struct jsonsl_st *)
105e649f75aSMark Nunberg            calloc(1, sizeof (*jsn) +
106e649f75aSMark Nunberg                    ( (nlevels-1) * sizeof (struct jsonsl_state_st) )
107e649f75aSMark Nunberg            );
108e649f75aSMark Nunberg
109e649f75aSMark Nunberg    jsn->levels_max = nlevels;
110e649f75aSMark Nunberg    jsn->max_callback_level = -1;
111e649f75aSMark Nunberg    jsonsl_reset(jsn);
112e649f75aSMark Nunberg    return jsn;
113e649f75aSMark Nunberg}
114e649f75aSMark Nunberg
115e649f75aSMark NunbergJSONSL_API
116e649f75aSMark Nunbergvoid jsonsl_reset(jsonsl_t jsn)
117e649f75aSMark Nunberg{
118e649f75aSMark Nunberg    unsigned int ii;
119e649f75aSMark Nunberg    jsn->tok_last = 0;
120e649f75aSMark Nunberg    jsn->can_insert = 1;
121e649f75aSMark Nunberg    jsn->pos = 0;
122e649f75aSMark Nunberg    jsn->level = 0;
1239c3aeb87SMark Nunberg    jsn->stopfl = 0;
124e649f75aSMark Nunberg    jsn->in_escape = 0;
125e649f75aSMark Nunberg    jsn->expecting = 0;
126e649f75aSMark Nunberg
127e649f75aSMark Nunberg    memset(jsn->stack, 0, (jsn->levels_max * sizeof (struct jsonsl_state_st)));
128e649f75aSMark Nunberg
129e649f75aSMark Nunberg    for (ii = 0; ii < jsn->levels_max; ii++) {
130e649f75aSMark Nunberg        jsn->stack[ii].level = ii;
131e649f75aSMark Nunberg    }
132e649f75aSMark Nunberg}
133e649f75aSMark Nunberg
134e649f75aSMark NunbergJSONSL_API
135e649f75aSMark Nunbergvoid jsonsl_destroy(jsonsl_t jsn)
136e649f75aSMark Nunberg{
137e649f75aSMark Nunberg    if (jsn) {
138e649f75aSMark Nunberg        free(jsn);
139e649f75aSMark Nunberg    }
140e649f75aSMark Nunberg}
141e649f75aSMark Nunberg
142431730afSMark Nunberg
143431730afSMark Nunberg#define FASTPARSE_EXHAUSTED 1
144431730afSMark Nunberg#define FASTPARSE_BREAK 0
145431730afSMark Nunberg
146431730afSMark Nunberg/*
147431730afSMark Nunberg * This function is meant to accelerate string parsing, reducing the main loop's
148431730afSMark Nunberg * check if we are indeed a string.
149431730afSMark Nunberg *
150431730afSMark Nunberg * @param jsn the parser
151431730afSMark Nunberg * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position)
152431730afSMark Nunberg * @param[in,out] nbytes_p A pointer to the current size of the buffer
153431730afSMark Nunberg * @return true if all bytes have been exhausted (and thus the main loop can
154431730afSMark Nunberg * return), false if a special character was examined which requires greater
155431730afSMark Nunberg * examination.
156431730afSMark Nunberg */
157431730afSMark Nunbergstatic int
158431730afSMark Nunbergjsonsl__str_fastparse(jsonsl_t jsn,
159431730afSMark Nunberg                      const jsonsl_uchar_t **bytes_p, size_t *nbytes_p)
160431730afSMark Nunberg{
161431730afSMark Nunberg    const jsonsl_uchar_t *bytes = *bytes_p;
1625cf05eafSMark Nunberg    const jsonsl_uchar_t *end;
1635cf05eafSMark Nunberg    for (end = bytes + *nbytes_p; bytes != end; bytes++) {
164431730afSMark Nunberg        if (
165431730afSMark Nunberg#ifdef JSONSL_USE_WCHAR
166431730afSMark Nunberg                *bytes >= 0x100 ||
167431730afSMark Nunberg#endif /* JSONSL_USE_WCHAR */
1685cf05eafSMark Nunberg                (is_simple_char(*bytes))) {
169431730afSMark Nunberg            INCR_METRIC(TOTAL);
170431730afSMark Nunberg            INCR_METRIC(STRINGY_INSIGNIFICANT);
171431730afSMark Nunberg        } else {
1725cf05eafSMark Nunberg            /* Once we're done here, re-calculate the position variables */
1735cf05eafSMark Nunberg            jsn->pos += (bytes - *bytes_p);
1745cf05eafSMark Nunberg            *nbytes_p -= (bytes - *bytes_p);
1755cf05eafSMark Nunberg            *bytes_p = bytes;
1765cf05eafSMark Nunberg            return FASTPARSE_BREAK;
177431730afSMark Nunberg        }
178431730afSMark Nunberg    }
179431730afSMark Nunberg
180431730afSMark Nunberg    /* Once we're done here, re-calculate the position variables */
1815cf05eafSMark Nunberg    jsn->pos += (bytes - *bytes_p);
1825cf05eafSMark Nunberg    return FASTPARSE_EXHAUSTED;
183431730afSMark Nunberg}
184431730afSMark Nunberg
185431730afSMark Nunberg/* Functions exactly like str_fastparse, except it also accepts a 'state'
186431730afSMark Nunberg * argument, since the number's value is updated in the state. */
187431730afSMark Nunbergstatic int
188431730afSMark Nunbergjsonsl__num_fastparse(jsonsl_t jsn,
189431730afSMark Nunberg                      const jsonsl_uchar_t **bytes_p, size_t *nbytes_p,
190431730afSMark Nunberg                      struct jsonsl_state_st *state)
191431730afSMark Nunberg{
192431730afSMark Nunberg    int exhausted = 1;
193431730afSMark Nunberg    size_t nbytes = *nbytes_p;
194431730afSMark Nunberg    const jsonsl_uchar_t *bytes = *bytes_p;
195431730afSMark Nunberg
196431730afSMark Nunberg    for (; nbytes; nbytes--, bytes++) {
197431730afSMark Nunberg        jsonsl_uchar_t c = *bytes;
198431730afSMark Nunberg        if (isdigit(c)) {
199431730afSMark Nunberg            INCR_METRIC(TOTAL);
200431730afSMark Nunberg            INCR_METRIC(NUMBER_FASTPATH);
201431730afSMark Nunberg            state->nelem = (state->nelem * 10) + (c - 0x30);
202431730afSMark Nunberg        } else {
203431730afSMark Nunberg            exhausted = 0;
204431730afSMark Nunberg            break;
205431730afSMark Nunberg        }
206431730afSMark Nunberg    }
207431730afSMark Nunberg    jsn->pos += (*nbytes_p - nbytes);
208431730afSMark Nunberg    if (exhausted) {
209431730afSMark Nunberg        return FASTPARSE_EXHAUSTED;
210431730afSMark Nunberg    }
211431730afSMark Nunberg    *nbytes_p = nbytes;
212431730afSMark Nunberg    *bytes_p = bytes;
213431730afSMark Nunberg    return FASTPARSE_BREAK;
214431730afSMark Nunberg}
215431730afSMark Nunberg
216e649f75aSMark NunbergJSONSL_API
217e649f75aSMark Nunbergvoid
218e649f75aSMark Nunbergjsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
219e649f75aSMark Nunberg{
220e649f75aSMark Nunberg
221e649f75aSMark Nunberg#define INVOKE_ERROR(eb) \
222e649f75aSMark Nunberg    if (jsn->error_callback(jsn, JSONSL_ERROR_##eb, state, (char*)c)) { \
223e649f75aSMark Nunberg        goto GT_AGAIN; \
224e649f75aSMark Nunberg    } \
225e649f75aSMark Nunberg    return;
226e649f75aSMark Nunberg
227e649f75aSMark Nunberg#define STACK_PUSH \
228e649f75aSMark Nunberg    if (jsn->level >= (levels_max-1)) { \
229e649f75aSMark Nunberg        jsn->error_callback(jsn, JSONSL_ERROR_LEVELS_EXCEEDED, state, (char*)c); \
230e649f75aSMark Nunberg        return; \
231e649f75aSMark Nunberg    } \
232e649f75aSMark Nunberg    state = jsn->stack + (++jsn->level); \
233e649f75aSMark Nunberg    state->ignore_callback = jsn->stack[jsn->level-1].ignore_callback; \
234e649f75aSMark Nunberg    state->pos_begin = jsn->pos;
235e649f75aSMark Nunberg
236e649f75aSMark Nunberg#define STACK_POP_NOPOS \
237e649f75aSMark Nunberg    state->pos_cur = jsn->pos; \
238e649f75aSMark Nunberg    state = jsn->stack + (--jsn->level);
239e649f75aSMark Nunberg
240e649f75aSMark Nunberg
241e649f75aSMark Nunberg#define STACK_POP \
242e649f75aSMark Nunberg    STACK_POP_NOPOS; \
243e649f75aSMark Nunberg    state->pos_cur = jsn->pos;
244e649f75aSMark Nunberg
245e649f75aSMark Nunberg#define CALLBACK_AND_POP_NOPOS(T) \
246e649f75aSMark Nunberg        state->pos_cur = jsn->pos; \
247e649f75aSMark Nunberg        DO_CALLBACK(T, POP); \
248e649f75aSMark Nunberg        state->nescapes = 0; \
249e649f75aSMark Nunberg        state = jsn->stack + (--jsn->level);
250e649f75aSMark Nunberg
251e649f75aSMark Nunberg#define CALLBACK_AND_POP(T) \
252e649f75aSMark Nunberg        CALLBACK_AND_POP_NOPOS(T); \
253e649f75aSMark Nunberg        state->pos_cur = jsn->pos;
254e649f75aSMark Nunberg
255e649f75aSMark Nunberg#define SPECIAL_POP \
256e649f75aSMark Nunberg    CALLBACK_AND_POP(SPECIAL); \
257e649f75aSMark Nunberg    jsn->expecting = 0; \
258e649f75aSMark Nunberg    jsn->tok_last = 0; \
259e649f75aSMark Nunberg
260e649f75aSMark Nunberg#define CUR_CHAR (*(jsonsl_uchar_t*)c)
261e649f75aSMark Nunberg
262e649f75aSMark Nunberg#define DO_CALLBACK(T, action) \
263e649f75aSMark Nunberg    if (jsn->call_##T && \
264e649f75aSMark Nunberg            jsn->max_callback_level > state->level && \
265e649f75aSMark Nunberg            state->ignore_callback == 0) { \
266e649f75aSMark Nunberg        \
267e649f75aSMark Nunberg        if (jsn->action_callback_##action) { \
268e649f75aSMark Nunberg            jsn->action_callback_##action(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
269e649f75aSMark Nunberg        } else if (jsn->action_callback) { \
270e649f75aSMark Nunberg            jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \
271e649f75aSMark Nunberg        } \
2729c3aeb87SMark Nunberg        if (jsn->stopfl) { return; } \
273e649f75aSMark Nunberg    }
274e649f75aSMark Nunberg
275e649f75aSMark Nunberg    /**
276e649f75aSMark Nunberg     * Verifies that we are able to insert the (non-string) item into a hash.
277e649f75aSMark Nunberg     */
278e649f75aSMark Nunberg#define ENSURE_HVAL \
279e649f75aSMark Nunberg    if (state->nelem % 2 == 0 && state->type == JSONSL_T_OBJECT) { \
280e649f75aSMark Nunberg        INVOKE_ERROR(HKEY_EXPECTED); \
281e649f75aSMark Nunberg    }
282e649f75aSMark Nunberg
283e649f75aSMark Nunberg#define VERIFY_SPECIAL(lit) \
284e649f75aSMark Nunberg        if (CUR_CHAR != (lit)[jsn->pos - state->pos_begin]) { \
285e649f75aSMark Nunberg            INVOKE_ERROR(SPECIAL_EXPECTED); \
286e649f75aSMark Nunberg        }
287e649f75aSMark Nunberg
2881f330d76SMark Nunberg#define STATE_SPECIAL_LENGTH \
2891f330d76SMark Nunberg    (state)->nescapes
2901f330d76SMark Nunberg
2911f330d76SMark Nunberg#define IS_NORMAL_NUMBER \
2921f330d76SMark Nunberg    ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \
2931f330d76SMark Nunberg        (state)->special_flags == JSONSL_SPECIALf_SIGNED)
2941f330d76SMark Nunberg
2951f330d76SMark Nunberg#define STATE_NUM_LAST jsn->tok_last
2961f330d76SMark Nunberg
297431730afSMark Nunberg#define CONTINUE_NEXT_CHAR() continue
298431730afSMark Nunberg
299e649f75aSMark Nunberg    const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes;
300e649f75aSMark Nunberg    size_t levels_max = jsn->levels_max;
301e649f75aSMark Nunberg    struct jsonsl_state_st *state = jsn->stack + jsn->level;
302e649f75aSMark Nunberg    jsn->base = bytes;
303e649f75aSMark Nunberg
304e649f75aSMark Nunberg    for (; nbytes; nbytes--, jsn->pos++, c++) {
3051efa455eSTrond Norbye        unsigned state_type;
306e649f75aSMark Nunberg        INCR_METRIC(TOTAL);
307431730afSMark Nunberg
308e649f75aSMark Nunberg        GT_AGAIN:
309e649f75aSMark Nunberg        state_type = state->type;
310431730afSMark Nunberg        /* Most common type is typically a string: */
311e649f75aSMark Nunberg        if (state_type & JSONSL_Tf_STRINGY) {
312431730afSMark Nunberg            /* Special escape handling for some stuff */
313431730afSMark Nunberg            if (jsn->in_escape) {
314431730afSMark Nunberg                jsn->in_escape = 0;
315431730afSMark Nunberg                if (!is_allowed_escape(CUR_CHAR)) {
316431730afSMark Nunberg                    INVOKE_ERROR(ESCAPE_INVALID);
317431730afSMark Nunberg                } else if (CUR_CHAR == 'u') {
318431730afSMark Nunberg                    DO_CALLBACK(UESCAPE, UESCAPE);
319431730afSMark Nunberg                    if (jsn->return_UESCAPE) {
320431730afSMark Nunberg                        return;
321431730afSMark Nunberg                    }
322431730afSMark Nunberg                }
323431730afSMark Nunberg                CONTINUE_NEXT_CHAR();
324431730afSMark Nunberg            }
325431730afSMark Nunberg
326431730afSMark Nunberg            if (jsonsl__str_fastparse(jsn, &c, &nbytes) ==
327431730afSMark Nunberg                    FASTPARSE_EXHAUSTED) {
328431730afSMark Nunberg                /* No need to readjust variables as we've exhausted the iterator */
329431730afSMark Nunberg                return;
330e649f75aSMark Nunberg            } else {
331431730afSMark Nunberg                if (CUR_CHAR == '"') {
332431730afSMark Nunberg                    goto GT_QUOTE;
333431730afSMark Nunberg                } else if (CUR_CHAR == '\\') {
334431730afSMark Nunberg                    goto GT_ESCAPE;
335431730afSMark Nunberg                } else {
336431730afSMark Nunberg                    INVOKE_ERROR(WEIRD_WHITESPACE);
337431730afSMark Nunberg                }
338e649f75aSMark Nunberg            }
339e649f75aSMark Nunberg            INCR_METRIC(STRINGY_SLOWPATH);
340e649f75aSMark Nunberg
341e649f75aSMark Nunberg        } else if (state_type == JSONSL_T_SPECIAL) {
3421f330d76SMark Nunberg            /* Fast track for signed/unsigned */
3431f330d76SMark Nunberg            if (IS_NORMAL_NUMBER) {
344431730afSMark Nunberg                if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) ==
345431730afSMark Nunberg                        FASTPARSE_EXHAUSTED) {
346431730afSMark Nunberg                    return;
3471f330d76SMark Nunberg                } else {
3481f330d76SMark Nunberg                    goto GT_SPECIAL_NUMERIC;
3491f330d76SMark Nunberg                }
3501f330d76SMark Nunberg            } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
3511f330d76SMark Nunberg                if (!isdigit(CUR_CHAR)) {
3521f330d76SMark Nunberg                    INVOKE_ERROR(INVALID_NUMBER);
3531f330d76SMark Nunberg                }
3541f330d76SMark Nunberg
3551f330d76SMark Nunberg                if (CUR_CHAR == '0') {
3561f330d76SMark Nunberg                    state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED;
3571f330d76SMark Nunberg                } else if (isdigit(CUR_CHAR)) {
3581f330d76SMark Nunberg                    state->special_flags = JSONSL_SPECIALf_SIGNED;
3591f330d76SMark Nunberg                    state->nelem = CUR_CHAR - 0x30;
3601f330d76SMark Nunberg                } else {
3611f330d76SMark Nunberg                    INVOKE_ERROR(INVALID_NUMBER);
3621f330d76SMark Nunberg                }
363431730afSMark Nunberg                CONTINUE_NEXT_CHAR();
3641f330d76SMark Nunberg
3651f330d76SMark Nunberg            } else if (state->special_flags == JSONSL_SPECIALf_ZERO) {
3661f330d76SMark Nunberg                if (isdigit(CUR_CHAR)) {
3671f330d76SMark Nunberg                    /* Following a zero! */
3681f330d76SMark Nunberg                    INVOKE_ERROR(INVALID_NUMBER);
3691f330d76SMark Nunberg                }
3701f330d76SMark Nunberg                /* Unset the 'zero' flag: */
3711f330d76SMark Nunberg                if (state->special_flags & JSONSL_SPECIALf_SIGNED) {
3721f330d76SMark Nunberg                    state->special_flags = JSONSL_SPECIALf_SIGNED;
3731f330d76SMark Nunberg                } else {
3741f330d76SMark Nunberg                    state->special_flags = JSONSL_SPECIALf_UNSIGNED;
3751f330d76SMark Nunberg                }
3761f330d76SMark Nunberg                goto GT_SPECIAL_NUMERIC;
3771f330d76SMark Nunberg            }
3781f330d76SMark Nunberg
379e649f75aSMark Nunberg            if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
3801f330d76SMark Nunberg                GT_SPECIAL_NUMERIC:
381e649f75aSMark Nunberg                switch (CUR_CHAR) {
382e649f75aSMark Nunberg                CASE_DIGITS
3831f330d76SMark Nunberg                    STATE_NUM_LAST = '1';
384431730afSMark Nunberg                    CONTINUE_NEXT_CHAR();
3851f330d76SMark Nunberg
3861f330d76SMark Nunberg                case '.':
3871f330d76SMark Nunberg                    if (state->special_flags & JSONSL_SPECIALf_FLOAT) {
3881f330d76SMark Nunberg                        INVOKE_ERROR(INVALID_NUMBER);
3891f330d76SMark Nunberg                    }
3901f330d76SMark Nunberg                    state->special_flags |= JSONSL_SPECIALf_FLOAT;
3911f330d76SMark Nunberg                    STATE_NUM_LAST = '.';
392431730afSMark Nunberg                    CONTINUE_NEXT_CHAR();
393e649f75aSMark Nunberg
394e649f75aSMark Nunberg                case 'e':
395e649f75aSMark Nunberg                case 'E':
3961f330d76SMark Nunberg                    if (state->special_flags & JSONSL_SPECIALf_EXPONENT) {
3971f330d76SMark Nunberg                        INVOKE_ERROR(INVALID_NUMBER);
3981f330d76SMark Nunberg                    }
399e649f75aSMark Nunberg                    state->special_flags |= JSONSL_SPECIALf_EXPONENT;
4001f330d76SMark Nunberg                    STATE_NUM_LAST = 'e';
401431730afSMark Nunberg                    CONTINUE_NEXT_CHAR();
4021f330d76SMark Nunberg
4031f330d76SMark Nunberg                case '-':
4041f330d76SMark Nunberg                case '+':
4051f330d76SMark Nunberg                    if (STATE_NUM_LAST != 'e') {
4061f330d76SMark Nunberg                        INVOKE_ERROR(INVALID_NUMBER);
4071f330d76SMark Nunberg                    }
4081f330d76SMark Nunberg                    STATE_NUM_LAST = '-';
409431730afSMark Nunberg                    CONTINUE_NEXT_CHAR();
4101f330d76SMark Nunberg
411e649f75aSMark Nunberg                default:
412e649f75aSMark Nunberg                    if (is_special_end(CUR_CHAR)) {
413e649f75aSMark Nunberg                        goto GT_SPECIAL_POP;
414e649f75aSMark Nunberg                    }
415e649f75aSMark Nunberg                    INVOKE_ERROR(INVALID_NUMBER);
416e649f75aSMark Nunberg                    break;
417e649f75aSMark Nunberg                }
418e649f75aSMark Nunberg            }
419e649f75aSMark Nunberg            /* else if (!NUMERIC) */
420e649f75aSMark Nunberg            if (!is_special_end(CUR_CHAR)) {
4211f330d76SMark Nunberg                STATE_SPECIAL_LENGTH++;
4221f330d76SMark Nunberg
423e649f75aSMark Nunberg                /* Verify TRUE, FALSE, NULL */
424e649f75aSMark Nunberg                if (state->special_flags == JSONSL_SPECIALf_TRUE) {
425e649f75aSMark Nunberg                    VERIFY_SPECIAL("true");
426e649f75aSMark Nunberg                } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
427e649f75aSMark Nunberg                    VERIFY_SPECIAL("false");
428e649f75aSMark Nunberg                } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
429e649f75aSMark Nunberg                    VERIFY_SPECIAL("null");
430e649f75aSMark Nunberg                }
431e649f75aSMark Nunberg                INCR_METRIC(SPECIAL_FASTPATH);
432431730afSMark Nunberg                CONTINUE_NEXT_CHAR();
433e649f75aSMark Nunberg            }
434e649f75aSMark Nunberg
435e649f75aSMark Nunberg            GT_SPECIAL_POP:
4361f330d76SMark Nunberg            if (IS_NORMAL_NUMBER) {
4371f330d76SMark Nunberg                /* Nothing */
4381f330d76SMark Nunberg            } else if (state->special_flags == JSONSL_SPECIALf_ZERO ||
4391f330d76SMark Nunberg                    state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) {
4401f330d76SMark Nunberg                /* 0 is unsigned! */
4411f330d76SMark Nunberg                state->special_flags = JSONSL_SPECIALf_UNSIGNED;
4421f330d76SMark Nunberg            } else if (state->special_flags == JSONSL_SPECIALf_DASH) {
4431f330d76SMark Nunberg                /* Still in dash! */
4441f330d76SMark Nunberg                INVOKE_ERROR(INVALID_NUMBER);
4451f330d76SMark Nunberg            } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) {
4461f330d76SMark Nunberg                /* Check that we're not at the end of a token */
4471f330d76SMark Nunberg                if (STATE_NUM_LAST != '1') {
4481f330d76SMark Nunberg                    INVOKE_ERROR(INVALID_NUMBER);
4491f330d76SMark Nunberg                }
4501f330d76SMark Nunberg            } else if (state->special_flags == JSONSL_SPECIALf_TRUE) {
4511f330d76SMark Nunberg                if (STATE_SPECIAL_LENGTH != 4) {
4521f330d76SMark Nunberg                    INVOKE_ERROR(SPECIAL_INCOMPLETE);
4531f330d76SMark Nunberg                }
4541f330d76SMark Nunberg                state->nelem = 1;
4551f330d76SMark Nunberg            } else if (state->special_flags == JSONSL_SPECIALf_FALSE) {
4561f330d76SMark Nunberg                if (STATE_SPECIAL_LENGTH != 5) {
4571f330d76SMark Nunberg                    INVOKE_ERROR(SPECIAL_INCOMPLETE);
4581f330d76SMark Nunberg                }
4591f330d76SMark Nunberg            } else if (state->special_flags == JSONSL_SPECIALf_NULL) {
4601f330d76SMark Nunberg                if (STATE_SPECIAL_LENGTH != 4) {
4611f330d76SMark Nunberg                    INVOKE_ERROR(SPECIAL_INCOMPLETE);
4621f330d76SMark Nunberg                }
4631f330d76SMark Nunberg            }
464e649f75aSMark Nunberg            SPECIAL_POP;
465e649f75aSMark Nunberg            jsn->expecting = ',';
466e649f75aSMark Nunberg            if (is_allowed_whitespace(CUR_CHAR)) {
467431730afSMark Nunberg                CONTINUE_NEXT_CHAR();
468e649f75aSMark Nunberg            }
469e649f75aSMark Nunberg            /**
470e649f75aSMark Nunberg             * This works because we have a non-whitespace token
471e649f75aSMark Nunberg             * which is not a special token. If this is a structural
472e649f75aSMark Nunberg             * character then it will be gracefully handled by the
473e649f75aSMark Nunberg             * switch statement. Otherwise it will default to the 'special'
474e649f75aSMark Nunberg             * state again,
475e649f75aSMark Nunberg             */
476e649f75aSMark Nunberg            goto GT_STRUCTURAL_TOKEN;
477e649f75aSMark Nunberg        } else if (is_allowed_whitespace(CUR_CHAR)) {
478e649f75aSMark Nunberg            INCR_METRIC(ALLOWED_WHITESPACE);
479e649f75aSMark Nunberg            /* So we're not special. Harmless insignificant whitespace
480e649f75aSMark Nunberg             * passthrough
481e649f75aSMark Nunberg             */
482431730afSMark Nunberg            CONTINUE_NEXT_CHAR();
483e649f75aSMark Nunberg        } else if (extract_special(CUR_CHAR)) {
484e649f75aSMark Nunberg            /* not a string, whitespace, or structural token. must be special */
485e649f75aSMark Nunberg            goto GT_SPECIAL_BEGIN;
486e649f75aSMark Nunberg        }
487e649f75aSMark Nunberg
488e649f75aSMark Nunberg        INCR_GENERIC(CUR_CHAR);
489e649f75aSMark Nunberg
490e649f75aSMark Nunberg        if (CUR_CHAR == '"') {
491e649f75aSMark Nunberg            GT_QUOTE:
492e649f75aSMark Nunberg            jsn->can_insert = 0;
493e649f75aSMark Nunberg            switch (state_type) {
494e649f75aSMark Nunberg
495e649f75aSMark Nunberg            /* the end of a string or hash key */
496e649f75aSMark Nunberg            case JSONSL_T_STRING:
497e649f75aSMark Nunberg                CALLBACK_AND_POP(STRING);
498431730afSMark Nunberg                CONTINUE_NEXT_CHAR();
499e649f75aSMark Nunberg            case JSONSL_T_HKEY:
500e649f75aSMark Nunberg                CALLBACK_AND_POP(HKEY);
501431730afSMark Nunberg                CONTINUE_NEXT_CHAR();
502e649f75aSMark Nunberg
503e649f75aSMark Nunberg            case JSONSL_T_OBJECT:
504e649f75aSMark Nunberg                state->nelem++;
505e649f75aSMark Nunberg                if ( (state->nelem-1) % 2 ) {
506e649f75aSMark Nunberg                    /* Odd, this must be a hash value */
507e649f75aSMark Nunberg                    if (jsn->tok_last != ':') {
508e649f75aSMark Nunberg                        INVOKE_ERROR(MISSING_TOKEN);
509e649f75aSMark Nunberg                    }
510e649f75aSMark Nunberg                    jsn->expecting = ','; /* Can't figure out what to expect next */
511e649f75aSMark Nunberg                    jsn->tok_last = 0;
512e649f75aSMark Nunberg
513e649f75aSMark Nunberg                    STACK_PUSH;
514e649f75aSMark Nunberg                    state->type = JSONSL_T_STRING;
515e649f75aSMark Nunberg                    DO_CALLBACK(STRING, PUSH);
516e649f75aSMark Nunberg
517e649f75aSMark Nunberg                } else {
518e649f75aSMark Nunberg                    /* hash key */
519e649f75aSMark Nunberg                    if (jsn->expecting != '"') {
520e649f75aSMark Nunberg                        INVOKE_ERROR(STRAY_TOKEN);
521e649f75aSMark Nunberg                    }
522e649f75aSMark Nunberg                    jsn->tok_last = 0;
523e649f75aSMark Nunberg                    jsn->expecting = ':';
524e649f75aSMark Nunberg
525e649f75aSMark Nunberg                    STACK_PUSH;
526e649f75aSMark Nunberg                    state->type = JSONSL_T_HKEY;
527e649f75aSMark Nunberg                    DO_CALLBACK(HKEY, PUSH);
528e649f75aSMark Nunberg                }
529431730afSMark Nunberg                CONTINUE_NEXT_CHAR();
530e649f75aSMark Nunberg
531e649f75aSMark Nunberg            case JSONSL_T_LIST:
532e649f75aSMark Nunberg                state->nelem++;
533e649f75aSMark Nunberg                STACK_PUSH;
534e649f75aSMark Nunberg                state->type = JSONSL_T_STRING;
535e649f75aSMark Nunberg                jsn->expecting = ',';
536e649f75aSMark Nunberg                jsn->tok_last = 0;
537e649f75aSMark Nunberg                DO_CALLBACK(STRING, PUSH);
538431730afSMark Nunberg                CONTINUE_NEXT_CHAR();
539e649f75aSMark Nunberg
540e649f75aSMark Nunberg            case JSONSL_T_SPECIAL:
541e649f75aSMark Nunberg                INVOKE_ERROR(STRAY_TOKEN);
542e649f75aSMark Nunberg                break;
543e649f75aSMark Nunberg
544e649f75aSMark Nunberg            default:
545e649f75aSMark Nunberg                INVOKE_ERROR(STRING_OUTSIDE_CONTAINER);
546e649f75aSMark Nunberg                break;
547e649f75aSMark Nunberg            } /* switch(state->type) */
548e649f75aSMark Nunberg        } else if (CUR_CHAR == '\\') {
549e649f75aSMark Nunberg            GT_ESCAPE:
550e649f75aSMark Nunberg            INCR_METRIC(ESCAPES);
551e649f75aSMark Nunberg        /* Escape */
552e649f75aSMark Nunberg            if ( (state->type & JSONSL_Tf_STRINGY) == 0 ) {
553e649f75aSMark Nunberg                INVOKE_ERROR(ESCAPE_OUTSIDE_STRING);
554e649f75aSMark Nunberg            }
555e649f75aSMark Nunberg            state->nescapes++;
556e649f75aSMark Nunberg            jsn->in_escape = 1;
557431730afSMark Nunberg            CONTINUE_NEXT_CHAR();
558e649f75aSMark Nunberg        } /* " or \ */
559e649f75aSMark Nunberg
560e649f75aSMark Nunberg        GT_STRUCTURAL_TOKEN:
561e649f75aSMark Nunberg        switch (CUR_CHAR) {
562e649f75aSMark Nunberg        case ':':
563e649f75aSMark Nunberg            INCR_METRIC(STRUCTURAL_TOKEN);
564e649f75aSMark Nunberg            if (jsn->expecting != CUR_CHAR) {
565e649f75aSMark Nunberg                INVOKE_ERROR(STRAY_TOKEN);
566e649f75aSMark Nunberg            }
567e649f75aSMark Nunberg            jsn->tok_last = ':';
568e649f75aSMark Nunberg            jsn->can_insert = 1;
569e649f75aSMark Nunberg            jsn->expecting = '"';
570431730afSMark Nunberg            CONTINUE_NEXT_CHAR();
571e649f75aSMark Nunberg
572e649f75aSMark Nunberg        case ',':
573e649f75aSMark Nunberg            INCR_METRIC(STRUCTURAL_TOKEN);
574e649f75aSMark Nunberg            /**
575e649f75aSMark Nunberg             * The comma is one of the more generic tokens.
576e649f75aSMark Nunberg             * In the context of an OBJECT, the can_insert flag
577e649f75aSMark Nunberg             * should never be set, and no other action is
578e649f75aSMark Nunberg             * necessary.
579e649f75aSMark Nunberg             */
580e649f75aSMark Nunberg            if (jsn->expecting != CUR_CHAR) {
581e649f75aSMark Nunberg                /* make this branch execute only when we haven't manually
582e649f75aSMark Nunberg                 * just placed the ',' in the expecting register.
583e649f75aSMark Nunberg                 */
584e649f75aSMark Nunberg                INVOKE_ERROR(STRAY_TOKEN);
585e649f75aSMark Nunberg            }
586e649f75aSMark Nunberg
587e649f75aSMark Nunberg            if (state->type == JSONSL_T_OBJECT) {
588e649f75aSMark Nunberg                /* end of hash value, expect a string as a hash key */
589e649f75aSMark Nunberg                jsn->expecting = '"';
590e649f75aSMark Nunberg            } else {
591e649f75aSMark Nunberg                jsn->can_insert = 1;
592e649f75aSMark Nunberg            }
593e649f75aSMark Nunberg
594e649f75aSMark Nunberg            jsn->tok_last = ',';
595e649f75aSMark Nunberg            jsn->expecting = '"';
596431730afSMark Nunberg            CONTINUE_NEXT_CHAR();
597e649f75aSMark Nunberg
598e649f75aSMark Nunberg            /* new list or object */
599e649f75aSMark Nunberg            /* hashes are more common */
600e649f75aSMark Nunberg        case '{':
601e649f75aSMark Nunberg        case '[':
602e649f75aSMark Nunberg            INCR_METRIC(STRUCTURAL_TOKEN);
603e649f75aSMark Nunberg            if (!jsn->can_insert) {
604e649f75aSMark Nunberg                INVOKE_ERROR(CANT_INSERT);
605e649f75aSMark Nunberg            }
606e649f75aSMark Nunberg
607e649f75aSMark Nunberg            ENSURE_HVAL;
608e649f75aSMark Nunberg            state->nelem++;
609e649f75aSMark Nunberg
610e649f75aSMark Nunberg            STACK_PUSH;
611e649f75aSMark Nunberg            /* because the constants match the opening delimiters, we can do this: */
612e649f75aSMark Nunberg            state->type = CUR_CHAR;
613e649f75aSMark Nunberg            state->nelem = 0;
614e649f75aSMark Nunberg            jsn->can_insert = 1;
615e649f75aSMark Nunberg            if (CUR_CHAR == '{') {
616e649f75aSMark Nunberg                /* If we're a hash, we expect a key first, which is quouted */
617e649f75aSMark Nunberg                jsn->expecting = '"';
618e649f75aSMark Nunberg            }
619e649f75aSMark Nunberg            if (CUR_CHAR == JSONSL_T_OBJECT) {
620e649f75aSMark Nunberg                DO_CALLBACK(OBJECT, PUSH);
621e649f75aSMark Nunberg            } else {
622e649f75aSMark Nunberg                DO_CALLBACK(LIST, PUSH);
623e649f75aSMark Nunberg            }
624e649f75aSMark Nunberg            jsn->tok_last = 0;
625431730afSMark Nunberg            CONTINUE_NEXT_CHAR();
626e649f75aSMark Nunberg
627e649f75aSMark Nunberg            /* closing of list or object */
628e649f75aSMark Nunberg        case '}':
629e649f75aSMark Nunberg        case ']':
630e649f75aSMark Nunberg            INCR_METRIC(STRUCTURAL_TOKEN);
631e649f75aSMark Nunberg            if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) {
632e649f75aSMark Nunberg                INVOKE_ERROR(TRAILING_COMMA);
633e649f75aSMark Nunberg            }
634e649f75aSMark Nunberg
635e649f75aSMark Nunberg            jsn->can_insert = 0;
636e649f75aSMark Nunberg            jsn->level--;
637e649f75aSMark Nunberg            jsn->expecting = ',';
638e649f75aSMark Nunberg            jsn->tok_last = 0;
639e649f75aSMark Nunberg            if (CUR_CHAR == ']') {
640e649f75aSMark Nunberg                if (state->type != '[') {
641e649f75aSMark Nunberg                    INVOKE_ERROR(BRACKET_MISMATCH);
642e649f75aSMark Nunberg                }
643e649f75aSMark Nunberg                DO_CALLBACK(LIST, POP);
644e649f75aSMark Nunberg            } else {
645e649f75aSMark Nunberg                if (state->type != '{') {
646e649f75aSMark Nunberg                    INVOKE_ERROR(BRACKET_MISMATCH);
6471f330d76SMark Nunberg                } else if (state->nelem && state->nelem % 2 != 0) {
6481f330d76SMark Nunberg                    INVOKE_ERROR(VALUE_EXPECTED);
649e649f75aSMark Nunberg                }
650e649f75aSMark Nunberg                DO_CALLBACK(OBJECT, POP);
651e649f75aSMark Nunberg            }
652e649f75aSMark Nunberg            state = jsn->stack + jsn->level;
653e649f75aSMark Nunberg            state->pos_cur = jsn->pos;
654431730afSMark Nunberg            CONTINUE_NEXT_CHAR();
655e649f75aSMark Nunberg
656e649f75aSMark Nunberg        default:
657e649f75aSMark Nunberg            GT_SPECIAL_BEGIN:
658e649f75aSMark Nunberg            /**
659e649f75aSMark Nunberg             * Not a string, not a structural token, and not benign whitespace.
660e649f75aSMark Nunberg             * Technically we should iterate over the character always, but since
661e649f75aSMark Nunberg             * we are not doing full numerical/value decoding anyway (but only hinting),
662e649f75aSMark Nunberg             * we only check upon entry.
663e649f75aSMark Nunberg             */
664e649f75aSMark Nunberg            if (state->type != JSONSL_T_SPECIAL) {
665e649f75aSMark Nunberg                int special_flags = extract_special(CUR_CHAR);
666e649f75aSMark Nunberg                if (!special_flags) {
667e649f75aSMark Nunberg                    /**
668e649f75aSMark Nunberg                     * Try to do some heuristics here anyway to figure out what kind of
669e649f75aSMark Nunberg                     * error this is. The 'special' case is a fallback scenario anyway.
670e649f75aSMark Nunberg                     */
671e649f75aSMark Nunberg                    if (CUR_CHAR == '\0') {
672e649f75aSMark Nunberg                        INVOKE_ERROR(FOUND_NULL_BYTE);
673e649f75aSMark Nunberg                    } else if (CUR_CHAR < 0x20) {
674e649f75aSMark Nunberg                        INVOKE_ERROR(WEIRD_WHITESPACE);
675e649f75aSMark Nunberg                    } else {
676e649f75aSMark Nunberg                        INVOKE_ERROR(SPECIAL_EXPECTED);
677e649f75aSMark Nunberg                    }
678e649f75aSMark Nunberg                }
679e649f75aSMark Nunberg                ENSURE_HVAL;
680e649f75aSMark Nunberg                state->nelem++;
681e649f75aSMark Nunberg                if (!jsn->can_insert) {
682e649f75aSMark Nunberg                    INVOKE_ERROR(CANT_INSERT);
683e649f75aSMark Nunberg                }
684e649f75aSMark Nunberg                STACK_PUSH;
685e649f75aSMark Nunberg                state->type = JSONSL_T_SPECIAL;
686e649f75aSMark Nunberg                state->special_flags = special_flags;
6871f330d76SMark Nunberg                STATE_SPECIAL_LENGTH = 1;
6881f330d76SMark Nunberg
689e649f75aSMark Nunberg                if (special_flags == JSONSL_SPECIALf_UNSIGNED) {
690e649f75aSMark Nunberg                    state->nelem = CUR_CHAR - 0x30;
6911f330d76SMark Nunberg                    STATE_NUM_LAST = '1';
692e649f75aSMark Nunberg                } else {
6931f330d76SMark Nunberg                    STATE_NUM_LAST = '-';
694e649f75aSMark Nunberg                    state->nelem = 0;
695e649f75aSMark Nunberg                }
696e649f75aSMark Nunberg                DO_CALLBACK(SPECIAL, PUSH);
697e649f75aSMark Nunberg            }
698431730afSMark Nunberg            CONTINUE_NEXT_CHAR();
699e649f75aSMark Nunberg        }
700e649f75aSMark Nunberg    }
701e649f75aSMark Nunberg}
702e649f75aSMark Nunberg
703e649f75aSMark NunbergJSONSL_API
704e649f75aSMark Nunbergconst char* jsonsl_strerror(jsonsl_error_t err)
705e649f75aSMark Nunberg{
706e649f75aSMark Nunberg    if (err == JSONSL_ERROR_SUCCESS) {
707e649f75aSMark Nunberg        return "SUCCESS";
708e649f75aSMark Nunberg    }
709e649f75aSMark Nunberg#define X(t) \
710e649f75aSMark Nunberg    if (err == JSONSL_ERROR_##t) \
711e649f75aSMark Nunberg        return #t;
712e649f75aSMark Nunberg    JSONSL_XERR;
713e649f75aSMark Nunberg#undef X
714e649f75aSMark Nunberg    return "<UNKNOWN_ERROR>";
715e649f75aSMark Nunberg}
716e649f75aSMark Nunberg
717e649f75aSMark NunbergJSONSL_API
718e649f75aSMark Nunbergconst char *jsonsl_strtype(jsonsl_type_t type)
719e649f75aSMark Nunberg{
720e649f75aSMark Nunberg#define X(o,c) \
721e649f75aSMark Nunberg    if (type == JSONSL_T_##o) \
722e649f75aSMark Nunberg        return #o;
723e649f75aSMark Nunberg    JSONSL_XTYPE
724e649f75aSMark Nunberg#undef X
725e649f75aSMark Nunberg    return "UNKNOWN TYPE";
726e649f75aSMark Nunberg
727e649f75aSMark Nunberg}
728e649f75aSMark Nunberg
729e649f75aSMark Nunberg/*
730e649f75aSMark Nunberg *
731e649f75aSMark Nunberg * JPR/JSONPointer functions
732e649f75aSMark Nunberg *
733e649f75aSMark Nunberg *
734e649f75aSMark Nunberg */
735e649f75aSMark Nunberg#ifndef JSONSL_NO_JPR
736e649f75aSMark Nunbergstatic
737e649f75aSMark Nunbergjsonsl_jpr_type_t
738e649f75aSMark Nunbergpopulate_component(char *in,
739e649f75aSMark Nunberg                   struct jsonsl_jpr_component_st *component,
740e649f75aSMark Nunberg                   char **next,
741e649f75aSMark Nunberg                   jsonsl_error_t *errp)
742e649f75aSMark Nunberg{
743e649f75aSMark Nunberg    unsigned long pctval;
744e649f75aSMark Nunberg    char *c = NULL, *outp = NULL, *end = NULL;
745e649f75aSMark Nunberg    size_t input_len;
746e649f75aSMark Nunberg    jsonsl_jpr_type_t ret = JSONSL_PATH_NONE;
747e649f75aSMark Nunberg
748e649f75aSMark Nunberg    if (*next == NULL || *(*next) == '\0') {
749e649f75aSMark Nunberg        return JSONSL_PATH_NONE;
750e649f75aSMark Nunberg    }
751e649f75aSMark Nunberg
752e649f75aSMark Nunberg    /* Replace the next / with a NULL */
753e649f75aSMark Nunberg    *next = strstr(in, "/");
754e649f75aSMark Nunberg    if (*next != NULL) {
755e649f75aSMark Nunberg        *(*next) = '\0'; /* drop the forward slash */
756e649f75aSMark Nunberg        input_len = *next - in;
757e649f75aSMark Nunberg        end = *next;
758e649f75aSMark Nunberg        *next += 1; /* next character after the '/' */
759e649f75aSMark Nunberg    } else {
760e649f75aSMark Nunberg        input_len = strlen(in);
761e649f75aSMark Nunberg        end = in + input_len + 1;
762e649f75aSMark Nunberg    }
763e649f75aSMark Nunberg
764e649f75aSMark Nunberg    component->pstr = in;
765e649f75aSMark Nunberg
766e649f75aSMark Nunberg    /* Check for special components of interest */
767e649f75aSMark Nunberg    if (*in == JSONSL_PATH_WILDCARD_CHAR && input_len == 1) {
768e649f75aSMark Nunberg        /* Lone wildcard */
769e649f75aSMark Nunberg        ret = JSONSL_PATH_WILDCARD;
770e649f75aSMark Nunberg        goto GT_RET;
771e649f75aSMark Nunberg    } else if (isdigit(*in)) {
772e649f75aSMark Nunberg        /* ASCII Numeric */
773e649f75aSMark Nunberg        char *endptr;
774e649f75aSMark Nunberg        component->idx = strtoul(in, &endptr, 10);
775e649f75aSMark Nunberg        if (endptr && *endptr == '\0') {
776e649f75aSMark Nunberg            ret = JSONSL_PATH_NUMERIC;
777e649f75aSMark Nunberg            goto GT_RET;
778e649f75aSMark Nunberg        }
779e649f75aSMark Nunberg    }
780e649f75aSMark Nunberg
781e649f75aSMark Nunberg    /* Default, it's a string */
782e649f75aSMark Nunberg    ret = JSONSL_PATH_STRING;
783e649f75aSMark Nunberg    for (c = outp = in; c < end; c++, outp++) {
784e649f75aSMark Nunberg        char origc;
785e649f75aSMark Nunberg        if (*c != '%') {
786e649f75aSMark Nunberg            goto GT_ASSIGN;
787e649f75aSMark Nunberg        }
788e649f75aSMark Nunberg        /*
789e649f75aSMark Nunberg         * c = { [+0] = '%', [+1] = 'b', [+2] = 'e', [+3] = '\0' }
790e649f75aSMark Nunberg         */
791e649f75aSMark Nunberg
792e649f75aSMark Nunberg        /* Need %XX */
793e649f75aSMark Nunberg        if (c+2 >= end) {
794e649f75aSMark Nunberg            *errp = JSONSL_ERROR_PERCENT_BADHEX;
795e649f75aSMark Nunberg            return JSONSL_PATH_INVALID;
796e649f75aSMark Nunberg        }
797e649f75aSMark Nunberg        if (! (isxdigit(*(c+1)) && isxdigit(*(c+2))) ) {
798e649f75aSMark Nunberg            *errp = JSONSL_ERROR_PERCENT_BADHEX;
799e649f75aSMark Nunberg            return JSONSL_PATH_INVALID;
800e649f75aSMark Nunberg        }
801e649f75aSMark Nunberg
802e649f75aSMark Nunberg        /* Temporarily null-terminate the characters */
803e649f75aSMark Nunberg        origc = *(c+3);
804e649f75aSMark Nunberg        *(c+3) = '\0';
805e649f75aSMark Nunberg        pctval = strtoul(c+1, NULL, 16);
806e649f75aSMark Nunberg        *(c+3) = origc;
807e649f75aSMark Nunberg
808e649f75aSMark Nunberg        *outp = (char) pctval;
809e649f75aSMark Nunberg        c += 2;
810e649f75aSMark Nunberg        continue;
811e649f75aSMark Nunberg
812e649f75aSMark Nunberg        GT_ASSIGN:
813e649f75aSMark Nunberg        *outp = *c;
814e649f75aSMark Nunberg    }
815e649f75aSMark Nunberg    /* Null-terminate the string */
816e649f75aSMark Nunberg    for (; outp < c; outp++) {
817e649f75aSMark Nunberg        *outp = '\0';
818e649f75aSMark Nunberg    }
819e649f75aSMark Nunberg
820e649f75aSMark Nunberg    GT_RET:
821e649f75aSMark Nunberg    component->ptype = ret;
822e649f75aSMark Nunberg    if (ret != JSONSL_PATH_WILDCARD) {
823e649f75aSMark Nunberg        component->len = strlen(component->pstr);
824e649f75aSMark Nunberg    }
825e649f75aSMark Nunberg    return ret;
826e649f75aSMark Nunberg}
827e649f75aSMark Nunberg
828e649f75aSMark NunbergJSONSL_API
829e649f75aSMark Nunbergjsonsl_jpr_t
830e649f75aSMark Nunbergjsonsl_jpr_new(const char *path, jsonsl_error_t *errp)
831e649f75aSMark Nunberg{
832e649f75aSMark Nunberg    char *my_copy = NULL;
833e649f75aSMark Nunberg    int count, curidx;
834e649f75aSMark Nunberg    struct jsonsl_jpr_st *ret = NULL;
835e649f75aSMark Nunberg    struct jsonsl_jpr_component_st *components = NULL;
836e649f75aSMark Nunberg    size_t origlen;
837e649f75aSMark Nunberg    jsonsl_error_t errstacked;
838e649f75aSMark Nunberg
839e649f75aSMark Nunberg#define JPR_BAIL(err) *errp = err; goto GT_ERROR;
840e649f75aSMark Nunberg
841e649f75aSMark Nunberg    if (errp == NULL) {
842e649f75aSMark Nunberg        errp = &errstacked;
843e649f75aSMark Nunberg    }
844e649f75aSMark Nunberg
845e649f75aSMark Nunberg    if (path == NULL || *path != '/') {
846e649f75aSMark Nunberg        JPR_BAIL(JSONSL_ERROR_JPR_NOROOT);
847e649f75aSMark Nunberg        return NULL;
848e649f75aSMark Nunberg    }
849e649f75aSMark Nunberg
850e649f75aSMark Nunberg    count = 1;
851e649f75aSMark Nunberg    path++;
852e649f75aSMark Nunberg    {
853e649f75aSMark Nunberg        const char *c = path;
854e649f75aSMark Nunberg        for (; *c; c++) {
855e649f75aSMark Nunberg            if (*c == '/') {
856e649f75aSMark Nunberg                count++;
857e649f75aSMark Nunberg                if (*(c+1) == '/') {
858e649f75aSMark Nunberg                    JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH);
859e649f75aSMark Nunberg                }
860e649f75aSMark Nunberg            }
861e649f75aSMark Nunberg        }
862e649f75aSMark Nunberg    }
863e649f75aSMark Nunberg    if(*path) {
864e649f75aSMark Nunberg        count++;
865e649f75aSMark Nunberg    }
866e649f75aSMark Nunberg
867bfb89cbcSMark Nunberg    components = (struct jsonsl_jpr_component_st *)
868bfb89cbcSMark Nunberg            malloc(sizeof(*components) * count);
869e649f75aSMark Nunberg    if (!components) {
870e649f75aSMark Nunberg        JPR_BAIL(JSONSL_ERROR_ENOMEM);
871e649f75aSMark Nunberg    }
872e649f75aSMark Nunberg
873bfb89cbcSMark Nunberg    my_copy = (char *)malloc(strlen(path) + 1);
874e649f75aSMark Nunberg    if (!my_copy) {
875e649f75aSMark Nunberg        JPR_BAIL(JSONSL_ERROR_ENOMEM);
876e649f75aSMark Nunberg    }
877e649f75aSMark Nunberg
878e649f75aSMark Nunberg    strcpy(my_copy, path);
879e649f75aSMark Nunberg
880e649f75aSMark Nunberg    components[0].ptype = JSONSL_PATH_ROOT;
881e649f75aSMark Nunberg
882e649f75aSMark Nunberg    if (*my_copy) {
883e649f75aSMark Nunberg        char *cur = my_copy;
884e649f75aSMark Nunberg        int pathret = JSONSL_PATH_STRING;
885e649f75aSMark Nunberg        curidx = 1;
886e649f75aSMark Nunberg        while (pathret > 0 && curidx < count) {
887e649f75aSMark Nunberg            pathret = populate_component(cur, components + curidx, &cur, errp);
888e649f75aSMark Nunberg            if (pathret > 0) {
889e649f75aSMark Nunberg                curidx++;
890e649f75aSMark Nunberg            } else {
891e649f75aSMark Nunberg                break;
892e649f75aSMark Nunberg            }
893e649f75aSMark Nunberg        }
894e649f75aSMark Nunberg
895e649f75aSMark Nunberg        if (pathret == JSONSL_PATH_INVALID) {
896e649f75aSMark Nunberg            JPR_BAIL(JSONSL_ERROR_JPR_BADPATH);
897e649f75aSMark Nunberg        }
898e649f75aSMark Nunberg    } else {
899e649f75aSMark Nunberg        curidx = 1;
900e649f75aSMark Nunberg    }
901e649f75aSMark Nunberg
902e649f75aSMark Nunberg    path--; /*revert path to leading '/' */
903e649f75aSMark Nunberg    origlen = strlen(path) + 1;
904bfb89cbcSMark Nunberg    ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret));
905e649f75aSMark Nunberg    if (!ret) {
906e649f75aSMark Nunberg        JPR_BAIL(JSONSL_ERROR_ENOMEM);
907e649f75aSMark Nunberg    }
908bfb89cbcSMark Nunberg    ret->orig = (char *)malloc(origlen);
909e649f75aSMark Nunberg    if (!ret->orig) {
910e649f75aSMark Nunberg        JPR_BAIL(JSONSL_ERROR_ENOMEM);
911e649f75aSMark Nunberg    }
912e649f75aSMark Nunberg    ret->components = components;
913e649f75aSMark Nunberg    ret->ncomponents = curidx;
914e649f75aSMark Nunberg    ret->basestr = my_copy;
915e649f75aSMark Nunberg    ret->norig = origlen-1;
916e649f75aSMark Nunberg    strcpy(ret->orig, path);
917e649f75aSMark Nunberg
918e649f75aSMark Nunberg    return ret;
919e649f75aSMark Nunberg
920e649f75aSMark Nunberg    GT_ERROR:
921e649f75aSMark Nunberg    free(my_copy);
922e649f75aSMark Nunberg    free(components);
923e649f75aSMark Nunberg    if (ret) {
924e649f75aSMark Nunberg        free(ret->orig);
925e649f75aSMark Nunberg    }
926e649f75aSMark Nunberg    free(ret);
927e649f75aSMark Nunberg    return NULL;
928e649f75aSMark Nunberg#undef JPR_BAIL
929e649f75aSMark Nunberg}
930e649f75aSMark Nunberg
931e649f75aSMark Nunbergvoid jsonsl_jpr_destroy(jsonsl_jpr_t jpr)
932e649f75aSMark Nunberg{
933e649f75aSMark Nunberg    free(jpr->components);
934e649f75aSMark Nunberg    free(jpr->basestr);
935e649f75aSMark Nunberg    free(jpr->orig);
936e649f75aSMark Nunberg    free(jpr);
937e649f75aSMark Nunberg}
938e649f75aSMark Nunberg
9395cf05eafSMark Nunberg/**
9405cf05eafSMark Nunberg * Call when there is a possibility of a match, either as a final match or
9415cf05eafSMark Nunberg * as a path within a match
9425cf05eafSMark Nunberg * @param jpr The JPR path
9435cf05eafSMark Nunberg * @param component Component corresponding to the current element
9445cf05eafSMark Nunberg * @param prlevel The level of the *parent*
9455cf05eafSMark Nunberg * @param chtype The type of the child
9465cf05eafSMark Nunberg * @return Match status
9475cf05eafSMark Nunberg */
9485cf05eafSMark Nunbergstatic jsonsl_jpr_match_t
9495cf05eafSMark Nunbergjsonsl__match_continue(jsonsl_jpr_t jpr,
9505cf05eafSMark Nunberg                       const struct jsonsl_jpr_component_st *component,
9515cf05eafSMark Nunberg                       unsigned prlevel, unsigned chtype)
9525cf05eafSMark Nunberg{
9535cf05eafSMark Nunberg    const struct jsonsl_jpr_component_st *next_comp = component + 1;
9545cf05eafSMark Nunberg    if (prlevel == jpr->ncomponents - 1) {
9555cf05eafSMark Nunberg        /* This is the match. Check the expected type of the match against
9565cf05eafSMark Nunberg         * the child */
9575cf05eafSMark Nunberg        if (jpr->match_type == 0 || jpr->match_type == chtype) {
9585cf05eafSMark Nunberg            return JSONSL_MATCH_COMPLETE;
9595cf05eafSMark Nunberg        } else {
9605cf05eafSMark Nunberg            return JSONSL_MATCH_TYPE_MISMATCH;
9615cf05eafSMark Nunberg        }
9625cf05eafSMark Nunberg    }
9635cf05eafSMark Nunberg    if (chtype == JSONSL_T_LIST) {
9645cf05eafSMark Nunberg        if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
9655cf05eafSMark Nunberg            return JSONSL_MATCH_POSSIBLE;
9665cf05eafSMark Nunberg        } else {
9675cf05eafSMark Nunberg            return JSONSL_MATCH_TYPE_MISMATCH;
9685cf05eafSMark Nunberg        }
9695cf05eafSMark Nunberg    } else if (chtype == JSONSL_T_OBJECT) {
9705cf05eafSMark Nunberg        if (next_comp->ptype == JSONSL_PATH_NUMERIC) {
9715cf05eafSMark Nunberg            return JSONSL_MATCH_TYPE_MISMATCH;
972