xref: /3.0.3-GA/platform/src/JSON_checker.c (revision b8af0d5e)
1/* JSON_checker.c */
2
3/* 2007-08-24 */
4
5/*
6Copyright (c) 2005 JSON.org
7
8Permission is hereby granted, free of charge, to any person obtaining a copy
9of this software and associated documentation files (the "Software"), to deal
10in the Software without restriction, including without limitation the rights
11to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12copies of the Software, and to permit persons to whom the Software is
13furnished to do so, subject to the following conditions:
14
15The above copyright notice and this permission notice shall be included in all
16copies or substantial portions of the Software.
17
18The Software shall be used for Good, not Evil.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26SOFTWARE.
27*/
28
29#include <stdlib.h>
30#include "JSON_checker.h"
31
32typedef struct JSON_checker_struct {
33    int state;
34    int depth;
35    int top;
36    int* stack;
37} * JSON_checker;
38
39
40#define true  1
41#define false 0
42#define __   -1     /* the universal error code */
43
44/*
45    Characters are mapped into these 31 character classes. This allows for
46    a significant reduction in the size of the state transition table.
47*/
48
49enum classes {
50    C_SPACE,  /* space */
51    C_WHITE,  /* other whitespace */
52    C_LCURB,  /* {  */
53    C_RCURB,  /* } */
54    C_LSQRB,  /* [ */
55    C_RSQRB,  /* ] */
56    C_COLON,  /* : */
57    C_COMMA,  /* , */
58    C_QUOTE,  /* " */
59    C_BACKS,  /* \ */
60    C_SLASH,  /* / */
61    C_PLUS,   /* + */
62    C_MINUS,  /* - */
63    C_POINT,  /* . */
64    C_ZERO ,  /* 0 */
65    C_DIGIT,  /* 123456789 */
66    C_LOW_A,  /* a */
67    C_LOW_B,  /* b */
68    C_LOW_C,  /* c */
69    C_LOW_D,  /* d */
70    C_LOW_E,  /* e */
71    C_LOW_F,  /* f */
72    C_LOW_L,  /* l */
73    C_LOW_N,  /* n */
74    C_LOW_R,  /* r */
75    C_LOW_S,  /* s */
76    C_LOW_T,  /* t */
77    C_LOW_U,  /* u */
78    C_ABCDF,  /* ABCDF */
79    C_E,      /* E */
80    C_ETC,    /* everything else */
81    NR_CLASSES
82};
83
84static int ascii_class[128] = {
85/*
86    This array maps the 128 ASCII characters into character classes.
87    The remaining Unicode characters should be mapped to C_ETC.
88    Non-whitespace control characters are errors.
89*/
90    __,      __,      __,      __,      __,      __,      __,      __,
91    __,      C_WHITE, C_WHITE, __,      __,      C_WHITE, __,      __,
92    __,      __,      __,      __,      __,      __,      __,      __,
93    __,      __,      __,      __,      __,      __,      __,      __,
94
95    C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
96    C_ETC,   C_ETC,   C_ETC,   C_PLUS,  C_COMMA, C_MINUS, C_POINT, C_SLASH,
97    C_ZERO,  C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
98    C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
99
100    C_ETC,   C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E,     C_ABCDF, C_ETC,
101    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
102    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
103    C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_BACKS, C_RSQRB, C_ETC,   C_ETC,
104
105    C_ETC,   C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
106    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_L, C_ETC,   C_LOW_N, C_ETC,
107    C_ETC,   C_ETC,   C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC,   C_ETC,
108    C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
109};
110
111
112/*
113    The state codes.
114*/
115enum states {
116    GO,  /* start    */
117    OK,  /* ok       */
118    OB,  /* object   */
119    KE,  /* key      */
120    CO,  /* colon    */
121    VA,  /* value    */
122    AR,  /* array    */
123    ST,  /* string   */
124    ES,  /* escape   */
125    U1,  /* u1       */
126    U2,  /* u2       */
127    U3,  /* u3       */
128    U4,  /* u4       */
129    MI,  /* minus    */
130    ZE,  /* zero     */
131    IN,  /* integer  */
132    FR,  /* fraction */
133    E1,  /* e        */
134    E2,  /* ex       */
135    E3,  /* exp      */
136    T1,  /* tr       */
137    T2,  /* tru      */
138    T3,  /* true     */
139    F1,  /* fa       */
140    F2,  /* fal      */
141    F3,  /* fals     */
142    F4,  /* false    */
143    N1,  /* nu       */
144    N2,  /* nul      */
145    N3,  /* null     */
146    NR_STATES
147};
148
149
150static int state_transition_table[NR_STATES][NR_CLASSES] = {
151/*
152    The state transition table takes the current state and the current symbol,
153    and returns either a new state or an action. An action is represented as a
154    negative number. A JSON text is accepted if at the end of the text the
155    state is OK and if the mode is MODE_DONE.
156
157                 white                                      1-9                                   ABCDF  etc
158             space |  {  }  [  ]  :  ,  "  \  /  +  -  .  0  |  a  b  c  d  e  f  l  n  r  s  t  u  |  E  |*/
159/*start  GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
160/*ok     OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
161/*object OB*/ {OB,OB,__,-9,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
162/*key    KE*/ {KE,KE,__,__,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
163/*colon  CO*/ {CO,CO,__,__,__,__,-2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
164/*value  VA*/ {VA,VA,-6,__,-5,__,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__},
165/*array  AR*/ {AR,AR,-6,__,-5,-7,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__},
166/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,ES,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
167/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__},
168/*u1     U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__},
169/*u2     U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__},
170/*u3     U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__},
171/*u4     U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ST,ST,ST,ST,ST,ST,ST,ST,__,__,__,__,__,__,ST,ST,__},
172/*minus  MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IN,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
173/*zero   ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
174/*int    IN*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,IN,IN,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
175/*frac   FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
176/*e      E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
177/*ex     E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
178/*exp    E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
179/*tr     T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__},
180/*tru    T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__},
181/*true   T3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__},
182/*fa     F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
183/*fal    F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__},
184/*fals   F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__},
185/*false  F4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__},
186/*nu     N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__},
187/*nul    N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__},
188/*null   N3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__},
189};
190
191
192/*
193    These modes can be pushed on the stack.
194*/
195enum modes {
196    MODE_ARRAY,
197    MODE_DONE,
198    MODE_KEY,
199    MODE_OBJECT
200};
201
202static int
203reject(JSON_checker jc)
204{
205/*
206    Delete the JSON_checker object.
207*/
208    free((void*)jc->stack);
209    free((void*)jc);
210    return false;
211}
212
213
214static int
215push(JSON_checker jc, int mode)
216{
217/*
218    Push a mode onto the stack. Return false if there is overflow.
219*/
220    jc->top += 1;
221    if (jc->top >= jc->depth) {
222        return false;
223    }
224    jc->stack[jc->top] = mode;
225    return true;
226}
227
228
229static int
230pop(JSON_checker jc, int mode)
231{
232/*
233    Pop the stack, assuring that the current mode matches the expectation.
234    Return false if there is underflow or if the modes mismatch.
235*/
236    if (jc->top < 0 || jc->stack[jc->top] != mode) {
237        return false;
238    }
239    jc->top -= 1;
240    return true;
241}
242
243
244static JSON_checker
245new_JSON_checker(int depth)
246{
247/*
248    new_JSON_checker starts the checking process by constructing a JSON_checker
249    object. It takes a depth parameter that restricts the level of maximum
250    nesting.
251
252    To continue the process, call JSON_checker_char for each character in the
253    JSON text, and then call JSON_checker_done to obtain the final result.
254    These functions are fully reentrant.
255
256    The JSON_checker object will be deleted by JSON_checker_done.
257    JSON_checker_char will delete the JSON_checker object if it sees an error.
258*/
259    JSON_checker jc = (JSON_checker)malloc(sizeof(struct JSON_checker_struct));
260    /* Modified- we want to accept JSON values, not just JSON-Texts */
261    jc->state = VA;
262    jc->depth = depth;
263    jc->top = -1;
264    jc->stack = (int*)calloc(depth, sizeof(int));
265    push(jc, MODE_DONE);
266    return jc;
267}
268
269
270static int
271JSON_checker_char(JSON_checker jc, int next_char)
272{
273/*
274    After calling new_JSON_checker, call this function for each character (or
275    partial character) in your JSON text. It can accept UTF-8, UTF-16, or
276    UTF-32. It returns true if things are looking ok so far. If it rejects the
277    text, it deletes the JSON_checker object and returns false.
278*/
279    int next_class, next_state;
280/*
281    Determine the character's class.
282*/
283    if (next_char < 0) {
284        return reject(jc);
285    }
286    if (next_char >= 128) {
287        next_class = C_ETC;
288    } else {
289        next_class = ascii_class[next_char];
290        if (next_class <= __) {
291            return reject(jc);
292        }
293    }
294/*
295    Get the next state from the state transition table.
296*/
297    next_state = state_transition_table[jc->state][next_class];
298    if (next_state >= 0) {
299/*
300    Change the state.
301*/
302        jc->state = next_state;
303    } else {
304/*
305    Or perform one of the actions.
306*/
307        switch (next_state) {
308/* empty } */
309        case -9:
310            if (!pop(jc, MODE_KEY)) {
311                return reject(jc);
312            }
313            jc->state = OK;
314            break;
315
316/* } */ case -8:
317            if (!pop(jc, MODE_OBJECT)) {
318                return reject(jc);
319            }
320            jc->state = OK;
321            break;
322
323/* ] */ case -7:
324            if (!pop(jc, MODE_ARRAY)) {
325                return reject(jc);
326            }
327            jc->state = OK;
328            break;
329
330/* { */ case -6:
331            if (!push(jc, MODE_KEY)) {
332                return reject(jc);
333            }
334            jc->state = OB;
335            break;
336
337/* [ */ case -5:
338            if (!push(jc, MODE_ARRAY)) {
339                return reject(jc);
340            }
341            jc->state = AR;
342            break;
343
344/* " */ case -4:
345            switch (jc->stack[jc->top]) {
346            case MODE_KEY:
347                jc->state = CO;
348                break;
349            case MODE_ARRAY:
350            case MODE_OBJECT:
351            /*
352              Modified- we want to accept JSON values, not just JSON-Texts, this
353              allows us to accept bare strings.
354            */
355            case MODE_DONE:
356                jc->state = OK;
357                break;
358            default:
359                return reject(jc);
360            }
361            break;
362
363/* , */ case -3:
364            switch (jc->stack[jc->top]) {
365            case MODE_OBJECT:
366/*
367    A comma causes a flip from object mode to key mode.
368*/
369                if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) {
370                    return reject(jc);
371                }
372                jc->state = KE;
373                break;
374            case MODE_ARRAY:
375                jc->state = VA;
376                break;
377            default:
378                return reject(jc);
379            }
380            break;
381
382/* : */ case -2:
383/*
384    A colon causes a flip from key mode to object mode.
385*/
386            if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) {
387                return reject(jc);
388            }
389            jc->state = VA;
390            break;
391/*
392    Bad action.
393*/
394        default:
395            return reject(jc);
396        }
397    }
398    return true;
399}
400
401
402static int
403JSON_checker_done(JSON_checker jc)
404{
405/*
406    The JSON_checker_done function should be called after all of the characters
407    have been processed, but only if every call to JSON_checker_char returned
408    true. This function deletes the JSON_checker and returns true if the JSON
409    text was accepted.
410*/
411    int result = (jc->state == OK) && pop(jc, MODE_DONE);
412    reject(jc);
413    return result;
414}
415
416/* Check for both UTF-8ness and JSONness in one pass */
417int
418checkUTF8JSON(const unsigned char* data, size_t size) {
419    int expect = 0; /* Expect UTF code point to extend this many bytes */
420    int badjson = 0;
421    int badutf = 0;
422    const unsigned char *end = data + size;
423    JSON_checker jc = new_JSON_checker((int)(size/2) + 1);
424    for(;data < end; data++) {
425        if(!JSON_checker_char(jc, *data)) {
426            badjson = 1;
427            break;
428        }
429
430        if(*data <= 0x7F) {
431            if(expect != 0) {
432                /* Must not be expecting >0x7F. */
433                badutf = 1;
434                break;
435            }
436            continue;
437        }
438
439        if((*data & 0xC0) == 0xC0) {
440            if(expect != 0) {
441               /* Beginning of UTF-8 multi-byte sequence inside of another one. */
442                badutf = 1;
443                break;
444            }
445            expect++;
446            if(*data & 0x20) expect++;
447            if((*data & 0x10) && expect == 2) expect++;
448            /* Verify zero bit separates count bits and codepoint bits */
449            if(expect == 3 && (*data & 0x8)) return false;
450            continue;
451        }
452
453        if(expect) {
454            expect--;
455        } else {
456           /* Got > 0x7F when not expecting it */
457            badutf = 1;
458            break;
459        }
460    }
461    if(!badjson) {
462        /* Feed fake space to the validator to force it to finish validating */
463        /* numerical values, iff it hasn't marked the current stream as valid */
464        if(jc->state != OK) {
465            badjson = !JSON_checker_char(jc, 32);
466        }
467        if(!badjson) {
468            badjson = !JSON_checker_done(jc);
469        }
470    }
471    return (!badjson && !badutf);
472}
473