xref: /6.6.0/platform/src/JSON_checker.cc (revision 03c56234)
1/* JSON_checker.c */
2
3/* 2016-11-11 */
4
5/*
6Copyright (c) 2005 JSON.org
7
8Permission is hereby granted, free of charge, to any person obtaining a copy
9of this software and associated documentation files (the "Software"), to deal
10in the Software without restriction, including without limitation the rights
11to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12copies of the Software, and to permit persons to whom the Software is
13furnished to do so, subject to the following conditions:
14
15The above copyright notice and this permission notice shall be included in all
16copies or substantial portions of the Software.
17
18The Software shall be used for Good, not Evil.
19
20THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26SOFTWARE.
27*/
28
29#include <stdlib.h>
30#include "JSON_checker.h"
31
32#define __   -1     /* the universal error code */
33
34/*
35    Characters are mapped into these 31 character classes. This allows for
36    a significant reduction in the size of the state transition table.
37*/
38
39enum classes {
40    C_SPACE,  /* space */
41    C_WHITE,  /* other whitespace */
42    C_LCURB,  /* {  */
43    C_RCURB,  /* } */
44    C_LSQRB,  /* [ */
45    C_RSQRB,  /* ] */
46    C_COLON,  /* : */
47    C_COMMA,  /* , */
48    C_QUOTE,  /* " */
49    C_BACKS,  /* \ */
50    C_SLASH,  /* / */
51    C_PLUS,   /* + */
52    C_MINUS,  /* - */
53    C_POINT,  /* . */
54    C_ZERO ,  /* 0 */
55    C_DIGIT,  /* 123456789 */
56    C_LOW_A,  /* a */
57    C_LOW_B,  /* b */
58    C_LOW_C,  /* c */
59    C_LOW_D,  /* d */
60    C_LOW_E,  /* e */
61    C_LOW_F,  /* f */
62    C_LOW_L,  /* l */
63    C_LOW_N,  /* n */
64    C_LOW_R,  /* r */
65    C_LOW_S,  /* s */
66    C_LOW_T,  /* t */
67    C_LOW_U,  /* u */
68    C_ABCDF,  /* ABCDF */
69    C_E,      /* E */
70    C_ETC,    /* everything else */
71    NR_CLASSES
72};
73
74static int ascii_class[128] = {
75/*
76    This array maps the 128 ASCII characters into character classes.
77    The remaining Unicode characters should be mapped to C_ETC.
78    Non-whitespace control characters are errors.
79*/
80    __,      __,      __,      __,      __,      __,      __,      __,
81    __,      C_WHITE, C_WHITE, __,      __,      C_WHITE, __,      __,
82    __,      __,      __,      __,      __,      __,      __,      __,
83    __,      __,      __,      __,      __,      __,      __,      __,
84
85    C_SPACE, C_ETC,   C_QUOTE, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
86    C_ETC,   C_ETC,   C_ETC,   C_PLUS,  C_COMMA, C_MINUS, C_POINT, C_SLASH,
87    C_ZERO,  C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
88    C_DIGIT, C_DIGIT, C_COLON, C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
89
90    C_ETC,   C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E,     C_ABCDF, C_ETC,
91    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
92    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_ETC,
93    C_ETC,   C_ETC,   C_ETC,   C_LSQRB, C_BACKS, C_RSQRB, C_ETC,   C_ETC,
94
95    C_ETC,   C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
96    C_ETC,   C_ETC,   C_ETC,   C_ETC,   C_LOW_L, C_ETC,   C_LOW_N, C_ETC,
97    C_ETC,   C_ETC,   C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC,   C_ETC,
98    C_ETC,   C_ETC,   C_ETC,   C_LCURB, C_ETC,   C_RCURB, C_ETC,   C_ETC
99};
100
101
102/*
103    The state codes.
104*/
105enum states {
106    GO,  /* start    */
107    OK,  /* ok       */
108    OB,  /* object   */
109    KE,  /* key      */
110    CO,  /* colon    */
111    VA,  /* value    */
112    AR,  /* array    */
113    ST,  /* string   */
114    ES,  /* escape   */
115    U1,  /* u1       */
116    U2,  /* u2       */
117    U3,  /* u3       */
118    U4,  /* u4       */
119    MI,  /* minus    */
120    ZE,  /* zero     */
121    IN,  /* integer  */
122    FR,  /* fraction */
123    FS,  /* fraction */
124    E1,  /* e        */
125    E2,  /* ex       */
126    E3,  /* exp      */
127    T1,  /* tr       */
128    T2,  /* tru      */
129    T3,  /* true     */
130    F1,  /* fa       */
131    F2,  /* fal      */
132    F3,  /* fals     */
133    F4,  /* false    */
134    N1,  /* nu       */
135    N2,  /* nul      */
136    N3,  /* null     */
137    NR_STATES
138};
139
140
141static int state_transition_table[NR_STATES][NR_CLASSES] = {
142/*
143    The state transition table takes the current state and the current symbol,
144    and returns either a new state or an action. An action is represented as a
145    negative number. A JSON text is accepted if at the end of the text the
146    state is OK and if the mode is MODE_DONE.
147
148                 white                                      1-9                                   ABCDF  etc
149             space |  {  }  [  ]  :  ,  "  \  /  +  -  .  0  |  a  b  c  d  e  f  l  n  r  s  t  u  |  E  |*/
150/*start  GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
151/*ok     OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
152/*object OB*/ {OB,OB,__,-9,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
153/*key    KE*/ {KE,KE,__,__,__,__,__,__,ST,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
154/*colon  CO*/ {CO,CO,__,__,__,__,-2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
155/*value  VA*/ {VA,VA,-6,__,-5,__,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__},
156/*array  AR*/ {AR,AR,-6,__,-5,-7,__,__,ST,__,__,__,MI,__,ZE,IN,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__},
157/*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,ES,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
158/*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__},
159/*u1     U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__},
160/*u2     U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__},
161/*u3     U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__},
162/*u4     U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ST,ST,ST,ST,ST,ST,ST,ST,__,__,__,__,__,__,ST,ST,__},
163/*minus  MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IN,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
164/*zero   ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,__,__,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
165/*int    IN*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,FR,IN,IN,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
166/*frac   FR*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,FS,FS,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
167/*fracs  FS*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FS,FS,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__},
168/*e      E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
169/*ex     E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
170/*exp    E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
171/*tr     T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__},
172/*tru    T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__},
173/*true   T3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__},
174/*fa     F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
175/*fal    F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__},
176/*fals   F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__},
177/*false  F4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__},
178/*nu     N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__},
179/*nul    N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__},
180/*null   N3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__}
181};
182
183
184static bool JSON_checker_char(JSON_checker::Instance &jc, int next_char)
185{
186/*
187    After calling new_JSON_checker, call this function for each character (or
188    partial character) in your JSON text. It can accept UTF-8, UTF-16, or
189    UTF-32. It returns true if things are looking ok so far. If it rejects the
190    text, it deletes the JSON_checker object and returns false.
191*/
192    int next_class, next_state;
193/*
194    Determine the character's class.
195*/
196    if (next_char < 0) {
197        return false;
198    }
199    if (next_char >= 128) {
200        next_class = C_ETC;
201    } else {
202        next_class = ascii_class[next_char];
203        if (next_class <= __) {
204            return false;
205        }
206    }
207/*
208    Get the next state from the state transition table.
209*/
210    next_state = state_transition_table[jc.state][next_class];
211    if (next_state >= 0) {
212/*
213    Change the state.
214*/
215        jc.state = next_state;
216    } else {
217/*
218    Or perform one of the actions.
219*/
220        switch (next_state) {
221/* empty } */
222        case -9:
223            if (!jc.pop(JSON_checker::Modes::KEY)) {
224                return false;
225            }
226            jc.state = OK;
227            break;
228
229/* } */ case -8:
230            if (!jc.pop(JSON_checker::Modes::OBJECT)) {
231                return false;
232            }
233            jc.state = OK;
234            break;
235
236/* ] */ case -7:
237            if (!jc.pop(JSON_checker::Modes::ARRAY)) {
238                return false;
239            }
240            jc.state = OK;
241            break;
242
243/* { */ case -6:
244            if (!jc.push(JSON_checker::Modes::KEY)) {
245                return false;
246            }
247            jc.state = OB;
248            break;
249
250/* [ */ case -5:
251            if (!jc.push(JSON_checker::Modes::ARRAY)) {
252                return false;
253            }
254            jc.state = AR;
255            break;
256
257/* " */ case -4:
258            switch (jc.stack.top()) {
259            case JSON_checker::Modes::KEY:
260                jc.state = CO;
261                break;
262            case JSON_checker::Modes::ARRAY:
263            case JSON_checker::Modes::OBJECT:
264            /*
265              Modified- we want to accept JSON values, not just JSON-Texts, this
266              allows us to accept bare strings.
267            */
268            case JSON_checker::Modes::DONE:
269                jc.state = OK;
270                break;
271            default:
272                return false;
273            }
274            break;
275
276/* , */ case -3:
277            switch (jc.stack.top()) {
278            case JSON_checker::Modes::OBJECT:
279/*
280    A comma causes a flip from object mode to key mode.
281*/
282                if (!jc.pop(JSON_checker::Modes::OBJECT) || !jc.push(JSON_checker::Modes::KEY)) {
283                    return false;
284                }
285                jc.state = KE;
286                break;
287            case JSON_checker::Modes::ARRAY:
288                jc.state = VA;
289                break;
290            default:
291                return false;
292            }
293            break;
294
295/* : */ case -2:
296/*
297    A colon causes a flip from key mode to object mode.
298*/
299            if (!jc.pop(JSON_checker::Modes::KEY) || !jc.push(JSON_checker::Modes::OBJECT)) {
300                return false;
301            }
302            jc.state = VA;
303            break;
304/*
305    Bad action.
306*/
307        default:
308            return false;
309        }
310    }
311    return true;
312}
313
314static bool JSON_checker_done(JSON_checker::Instance &jc)
315{
316/*
317    The JSON_checker_done function should be called after all of the characters
318    have been processed, but only if every call to JSON_checker_char returned
319    true. This function deletes the JSON_checker and returns true if the JSON
320    text was accepted.
321*/
322    return (jc.state == OK) && jc.pop(JSON_checker::Modes::DONE);
323}
324
325/* Check for both UTF-8ness and JSONness in one pass */
326static bool checkUTF8JSON(JSON_checker::Instance &jc,
327                          const unsigned char* data,
328                          size_t size) {
329    int expect = 0; /* Expect UTF code point to extend this many bytes */
330    int badjson = 0;
331    int badutf = 0;
332    const unsigned char *end = data + size;
333    jc.reset();
334    for(;data < end; data++) {
335        if(!JSON_checker_char(jc, *data)) {
336            badjson = 1;
337            break;
338        }
339
340        if(*data <= 0x7F) {
341            if(expect != 0) {
342                /* Must not be expecting >0x7F. */
343                badutf = 1;
344                break;
345            }
346            continue;
347        }
348
349        if((*data & 0xC0) == 0xC0) {
350            if(expect != 0) {
351               /* Beginning of UTF-8 multi-byte sequence inside of another one. */
352                badutf = 1;
353                break;
354            }
355            expect++;
356            if(*data & 0x20) expect++;
357            if((*data & 0x10) && expect == 2) expect++;
358            /* Verify zero bit separates count bits and codepoint bits */
359            if(expect == 3 && (*data & 0x8)) {
360                badutf = 1;
361                break;
362            }
363            continue;
364        }
365
366        if(expect) {
367            expect--;
368        } else {
369           /* Got > 0x7F when not expecting it */
370            badutf = 1;
371            break;
372        }
373    }
374    if(!badjson) {
375        /* Feed fake space to the validator to force it to finish validating */
376        /* numerical values, iff it hasn't marked the current stream as valid */
377        if(jc.state != OK) {
378            badjson = !JSON_checker_char(jc, 32);
379        }
380        if(!badjson) {
381            badjson = !JSON_checker_done(jc);
382        }
383    }
384    return (!badjson && !badutf);
385}
386
387// Backwards compatible to avoid build breaks..
388bool checkUTF8JSON(const unsigned char* data, size_t size)
389{
390    JSON_checker::Instance instance;
391    try {
392        return checkUTF8JSON(instance, data, size);
393    } catch (std::bad_alloc&) {
394        return false;
395    }
396}
397
398JSON_checker::Validator::Validator() {
399    // empty
400}
401
402bool JSON_checker::Validator::validate(const uint8_t* data, size_t size) {
403    return checkUTF8JSON(instance, data, size);
404}
405
406bool JSON_checker::Validator::validate(const std::vector<uint8_t>& data) {
407    return validate(data.data(), static_cast<size_t>(data.size()));
408}
409
410bool JSON_checker::Validator::validate(const std::string& data) {
411    return validate(reinterpret_cast<const uint8_t*>(data.data()),
412                    static_cast<size_t>(data.length()));
413}
414
415JSON_checker::Instance::Instance() {
416    reset();
417}
418
419void JSON_checker::Instance::reset() {
420    state = VA;
421    stack.clear();
422    push(Modes::DONE);
423}
424
425bool JSON_checker::Instance::push(Modes mode) {
426    stack.push(mode);
427    return true;
428}
429
430bool JSON_checker::Instance::pop(Modes mode)
431{
432    if (stack.empty()) {
433        // stack underflow
434        return false;
435    }
436
437    Modes val = stack.top();
438    stack.pop();
439
440    if (val != mode) {
441        // unexpected element on the stack!!
442        return false;
443    }
444
445    return true;
446}
447