xref: /4.0.0/platform/src/JSON_checker.c (revision a44eb642)
1/* JSON_checker.c */
2
3/* 2007-08-24 */
4
5/*
6Copyright (c) 2005 JSON.org
7
8Permission is hereby granted, free of charge, to any person obtaining a copy
9of this software and associated documentation files (the "Software"), to deal
10in the Software without restriction, including without limitation the rights
11to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12copies of the Software, and to permit persons to whom the Software is
13furnished to do so, subject to the following conditions:
14
15The above copyright notice and this permission notice shall be included in all
16copies or substantial portions of the Software.
17
18The Software shall be used for Good, not Evil.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26SOFTWARE.
27*/
28
29#include <stdlib.h>
30#include "JSON_checker.h"
31
32typedef struct JSON_checker_struct {
33    int state;
34    int depth;
35    int top;
36    int* stack;
37} * JSON_checker;
38
39
40#define true  1
41#define false 0
42#define __   -1     /* the universal error code */
43
44/*
45    Characters are mapped into these 31 character classes. This allows for
46    a significant reduction in the size of the state transition table.
47*/
48
49enum classes {
50    C_SPACE,  /* space */
51    C_WHITE,  /* other whitespace */
52    C_LCURB,  /* {  */
53    C_RCURB,  /* } */
54    C_LSQRB,  /* [ */
55    C_RSQRB,  /* ] */
56    C_COLON,  /* : */
57    C_COMMA,  /* , */
58    C_QUOTE,  /* " */
59    C_BACKS,  /* \ */
60    C_SLASH,  /* / */
61    C_PLUS,   /* + */
62    C_MINUS,  /* - */
63    C_POINT,  /* . */
64    C_ZERO ,  /* 0 */
65    C_DIGIT,  /* 123456789 */
66    C_LOW_A,  /* a */
67    C_LOW_B,  /* b */
68    C_LOW_C,  /* c */
69    C_LOW_D,  /* d */
70    C_LOW_E,  /* e */
71    C_LOW_F,  /* f */
72    C_LOW_L,  /* l */
73    C_LOW_N,  /* n */
74    C_LOW_R,  /* r */
75    C_LOW_S,  /* s */
76    C_LOW_T,  /* t */
77    C_LOW_U,  /* u */
78    C_ABCDF,  /* ABCDF */
79    C_E,      /* E */
80    C_ETC,    /* everything else */
81    NR_CLASSES
82};
83
84static int ascii_class[128] = {
85/*
86    This array maps the 128 ASCII characters into character classes.
87    The remaining Unicode characters should be mapped to C_ETC.
88    Non-whitespace control characters are errors.
89*/
90    __,      __,      __,      __,      __,      __,      __,      __,
91    __,      C_WHITE, C_WHITE, __,      __,      C_WHITE, __,      __,
92    __,      __,      __,      __,      __,      __,      __,      __,
93    __,      __,      __,      __,      __,      __,      __,      __,
94
95    C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
96    C_ETC,   C_ETC,   C_ETC,   C_PLUS,  C_COMMA, C_MINUS, C_POINT, C_SLASH,
97    C_ZERO,  C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
98    C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
99
100    C_ETC,   C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E,     C_ABCDF, C_ETC,
101    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
102    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
103    C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_BACKS, C_RSQRB, C_ETC,   C_ETC,
104
105    C_ETC,   C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
106    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_L, C_ETC,   C_LOW_N, C_ETC,
107    C_ETC,   C_ETC,   C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC,   C_ETC,
108    C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
109};
110
111
112/*
113    The state codes.
114*/
115enum states {
116    GO,  /* start    */
117    OK,  /* ok       */
118    OB,  /* object   */
119    KE,  /* key      */
120    CO,  /* colon    */
121    VA,  /* value    */
122    AR,  /* array    */
123    ST,  /* string   */
124    ES,  /* escape   */
125    U1,  /* u1       */
126    U2,  /* u2       */
127    U3,  /* u3       */
128    U4,  /* u4       */
129    MI,  /* minus    */
130    ZE,  /* zero     */
131    IN,  /* integer  */
132    FR,  /* fraction */
133    E1,  /* e        */
134    E2,  /* ex       */
135    E3,  /* exp      */
136    T1,  /* tr       */
137    T2,  /* tru      */
138    T3,  /* true     */
139    F1,  /* fa       */
140    F2,  /* fal      */
141    F3,  /* fals     */
142    F4,  /* false    */
143    N1,  /* nu       */
144    N2,  /* nul      */
145    N3,  /* null     */
146    NR_STATES
147};
148
149
150static int state_transition_table[NR_STATES][NR_CLASSES] = {
151/*
152    The state transition table takes the current state and the current symbol,
153    and returns either a new state or an action. An action is represented as a
154    negative number. A JSON text is accepted if at the end of the text the
155    state is OK and if the mode is MODE_DONE.
156
157                 white                                      1-9                                   ABCDF  etc
158             space |  {  }  [  ]  :  ,  "  \  /  +  -  .  0  |  a  b  c  d  e  f  l  n  r  s  t  u  |  E  |*/
159/*start  GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
160/*ok     OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
161/*object OB*/ {OB,OB,__,-9,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
162/*key    KE*/ {KE,KE,__,__,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
163/*colon  CO*/ {CO,CO,__,__,__,__,-2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
164/*value  VA*/ {VA,VA,-6,__,-5,__,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__},
165/*array  AR*/ {AR,AR,-6,__,-5,-7,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__},
166/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,ES,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
167/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__},
168/*u1     U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__},
169/*u2     U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__},
170/*u3     U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__},
171/*u4     U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ST,ST,ST,ST,ST,ST,ST,ST,__,__,__,__,__,__,ST,ST,__},
172/*minus  MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IN,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
173/*zero   ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
174/*int    IN*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,IN,IN,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
175/*frac   FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
176/*e      E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
177/*ex     E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
178/*exp    E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
179/*tr     T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__},
180/*tru    T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__},
181/*true   T3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__},
182/*fa     F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
183/*fal    F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__},
184/*fals   F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__},
185/*false  F4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__},
186/*nu     N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__},
187/*nul    N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__},
188/*null   N3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__},
189};
190
191
192/*
193    These modes can be pushed on the stack.
194*/
195enum modes {
196    MODE_ARRAY,
197    MODE_DONE,
198    MODE_KEY,
199    MODE_OBJECT
200};
201
202static int
203reject(JSON_checker jc)
204{
205/*
206    Delete the JSON_checker object.
207*/
208    free((void*)jc->stack);
209    free((void*)jc);
210    return false;
211}
212
213
214static int
215push(JSON_checker jc, int mode)
216{
217/*
218    Push a mode onto the stack. Return false if there is overflow.
219*/
220    jc->top += 1;
221    if (jc->top >= jc->depth) {
222        return false;
223    }
224    jc->stack[jc->top] = mode;
225    return true;
226}
227
228
229static int
230pop(JSON_checker jc, int mode)
231{
232/*
233    Pop the stack, assuring that the current mode matches the expectation.
234    Return false if there is underflow or if the modes mismatch.
235*/
236    if (jc->top < 0 || jc->stack[jc->top] != mode) {
237        return false;
238    }
239    jc->top -= 1;
240    return true;
241}
242
243
244static JSON_checker
245new_JSON_checker(int depth)
246{
247/*
248    new_JSON_checker starts the checking process by constructing a JSON_checker
249    object. It takes a depth parameter that restricts the level of maximum
250    nesting.
251
252    To continue the process, call JSON_checker_char for each character in the
253    JSON text, and then call JSON_checker_done to obtain the final result.
254    These functions are fully reentrant.
255
256    The JSON_checker object will be deleted by JSON_checker_done.
257    JSON_checker_char will delete the JSON_checker object if it sees an error.
258*/
259    JSON_checker jc = (JSON_checker)malloc(sizeof(struct JSON_checker_struct));
260    if (jc != NULL) {
261        /* Modified- we want to accept JSON values, not just JSON-Texts */
262        jc->state = VA;
263        jc->depth = depth;
264        jc->top = -1;
265        jc->stack = (int*)calloc(depth, sizeof(int));
266        push(jc, MODE_DONE);
267    }
268    return jc;
269}
270
271
272static int
273JSON_checker_char(JSON_checker jc, int next_char)
274{
275/*
276    After calling new_JSON_checker, call this function for each character (or
277    partial character) in your JSON text. It can accept UTF-8, UTF-16, or
278    UTF-32. It returns true if things are looking ok so far. If it rejects the
279    text, it deletes the JSON_checker object and returns false.
280*/
281    int next_class, next_state;
282/*
283    Determine the character's class.
284*/
285    if (next_char < 0) {
286        return reject(jc);
287    }
288    if (next_char >= 128) {
289        next_class = C_ETC;
290    } else {
291        next_class = ascii_class[next_char];
292        if (next_class <= __) {
293            return reject(jc);
294        }
295    }
296/*
297    Get the next state from the state transition table.
298*/
299    next_state = state_transition_table[jc->state][next_class];
300    if (next_state >= 0) {
301/*
302    Change the state.
303*/
304        jc->state = next_state;
305    } else {
306/*
307    Or perform one of the actions.
308*/
309        switch (next_state) {
310/* empty } */
311        case -9:
312            if (!pop(jc, MODE_KEY)) {
313                return reject(jc);
314            }
315            jc->state = OK;
316            break;
317
318/* } */ case -8:
319            if (!pop(jc, MODE_OBJECT)) {
320                return reject(jc);
321            }
322            jc->state = OK;
323            break;
324
325/* ] */ case -7:
326            if (!pop(jc, MODE_ARRAY)) {
327                return reject(jc);
328            }
329            jc->state = OK;
330            break;
331
332/* { */ case -6:
333            if (!push(jc, MODE_KEY)) {
334                return reject(jc);
335            }
336            jc->state = OB;
337            break;
338
339/* [ */ case -5:
340            if (!push(jc, MODE_ARRAY)) {
341                return reject(jc);
342            }
343            jc->state = AR;
344            break;
345
346/* " */ case -4:
347            switch (jc->stack[jc->top]) {
348            case MODE_KEY:
349                jc->state = CO;
350                break;
351            case MODE_ARRAY:
352            case MODE_OBJECT:
353            /*
354              Modified- we want to accept JSON values, not just JSON-Texts, this
355              allows us to accept bare strings.
356            */
357            case MODE_DONE:
358                jc->state = OK;
359                break;
360            default:
361                return reject(jc);
362            }
363            break;
364
365/* , */ case -3:
366            switch (jc->stack[jc->top]) {
367            case MODE_OBJECT:
368/*
369    A comma causes a flip from object mode to key mode.
370*/
371                if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) {
372                    return reject(jc);
373                }
374                jc->state = KE;
375                break;
376            case MODE_ARRAY:
377                jc->state = VA;
378                break;
379            default:
380                return reject(jc);
381            }
382            break;
383
384/* : */ case -2:
385/*
386    A colon causes a flip from key mode to object mode.
387*/
388            if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) {
389                return reject(jc);
390            }
391            jc->state = VA;
392            break;
393/*
394    Bad action.
395*/
396        default:
397            return reject(jc);
398        }
399    }
400    return true;
401}
402
403
404static int
405JSON_checker_done(JSON_checker jc)
406{
407/*
408    The JSON_checker_done function should be called after all of the characters
409    have been processed, but only if every call to JSON_checker_char returned
410    true. This function deletes the JSON_checker and returns true if the JSON
411    text was accepted.
412*/
413    int result = (jc->state == OK) && pop(jc, MODE_DONE);
414    reject(jc);
415    return result;
416}
417
418/* Check for both UTF-8ness and JSONness in one pass */
419int
420checkUTF8JSON(const unsigned char* data, size_t size) {
421    int expect = 0; /* Expect UTF code point to extend this many bytes */
422    int badjson = 0;
423    int badutf = 0;
424    const unsigned char *end = data + size;
425    JSON_checker jc = new_JSON_checker((int)(size/2) + 1);
426    for(;data < end; data++) {
427        if(!JSON_checker_char(jc, *data)) {
428            badjson = 1;
429            break;
430        }
431
432        if(*data <= 0x7F) {
433            if(expect != 0) {
434                /* Must not be expecting >0x7F. */
435                badutf = 1;
436                break;
437            }
438            continue;
439        }
440
441        if((*data & 0xC0) == 0xC0) {
442            if(expect != 0) {
443               /* Beginning of UTF-8 multi-byte sequence inside of another one. */
444                badutf = 1;
445                break;
446            }
447            expect++;
448            if(*data & 0x20) expect++;
449            if((*data & 0x10) && expect == 2) expect++;
450            /* Verify zero bit separates count bits and codepoint bits */
451            if(expect == 3 && (*data & 0x8)) {
452                badutf = 1;
453                break;
454            }
455            continue;
456        }
457
458        if(expect) {
459            expect--;
460        } else {
461           /* Got > 0x7F when not expecting it */
462            badutf = 1;
463            break;
464        }
465    }
466    if(!badjson) {
467        /* Feed fake space to the validator to force it to finish validating */
468        /* numerical values, iff it hasn't marked the current stream as valid */
469        if(jc->state != OK) {
470            badjson = !JSON_checker_char(jc, 32);
471        }
472        if(!badjson) {
473            badjson = !JSON_checker_done(jc);
474        }
475    }
476    return (!badjson && !badutf);
477}
478