xref: /5.5.2/couchdb/src/ejson/yajl/yajl_parser.c (revision c6a29006)
1/*
2 * Copyright 2010, Lloyd Hilaiel.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *  1. Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 *
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *     notice, this list of conditions and the following disclaimer in
13 *     the documentation and/or other materials provided with the
14 *     distribution.
15 *
16 *  3. Neither the name of Lloyd Hilaiel nor the names of its
17 *     contributors may be used to endorse or promote products derived
18 *     from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include "yajl_lex.h"
34#include "yajl_parser.h"
35#include "yajl_encode.h"
36#include "yajl_bytestack.h"
37
38#include <stdlib.h>
39#include <limits.h>
40#include <errno.h>
41#include <stdio.h>
42#include <string.h>
43#include <ctype.h>
44#include <assert.h>
45#include <math.h>
46
47static const char *
48yajl_parser_error_to_string(yajl_parser_error error)
49{
50    switch (error) {
51        case yajl_parser_e_ok:
52            return "ok, no error";
53        case yajl_parser_client_cancelled:
54            return "client cancelled parse via callback return value";
55        case yajl_parser_integer_overflow:
56            return "integer overflow";
57        case yajl_parser_numeric_overflow:
58            return "numeric (floating point) overflow";
59        case yajl_parser_invalid_token:
60            return "unallowed token at this point in JSON text";
61        case yajl_parser_internal_invalid_token:
62            return "invalid token, internal error";
63        case yajl_parser_key_must_be_string:
64            return "invalid object key (must be a string)";
65        case yajl_parser_pair_missing_colon:
66            return "object key and value must be separated by a colon (':')";
67        case yajl_parser_bad_token_after_map_value:
68            return "after key and value, inside map, I expect ',' or '}'";
69        case yajl_parser_bad_token_after_array_value:
70            return "after array element, I expect ',' or ']'";
71    }
72    return "unknown error code";
73}
74
75
76unsigned char *
77yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
78                         unsigned int jsonTextLen, int verbose)
79{
80    unsigned int offset = hand->bytesConsumed;
81    unsigned char * str;
82    const char * errorType = NULL;
83    const char * errorText = NULL;
84    char text[72];
85    const char * arrow = "                     (right here) ------^\n";
86
87    if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
88        errorType = "parse";
89        errorText = yajl_parser_error_to_string(hand->parserError);
90    } else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
91        errorType = "lexical";
92        errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
93    } else {
94        errorType = "unknown";
95    }
96
97    {
98        unsigned int memneeded = 0;
99        memneeded += strlen(errorType);
100        memneeded += strlen(" error");
101        if (errorText != NULL) {
102            memneeded += strlen(": ");
103            memneeded += strlen(errorText);
104        }
105        str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
106        str[0] = 0;
107        strcat((char *) str, errorType);
108        strcat((char *) str, " error");
109        if (errorText != NULL) {
110            strcat((char *) str, ": ");
111            strcat((char *) str, errorText);
112        }
113        strcat((char *) str, "\n");
114    }
115
116    /* now we append as many spaces as needed to make sure the error
117     * falls at char 41, if verbose was specified */
118    if (verbose) {
119        unsigned int start, end, i;
120        unsigned int spacesNeeded;
121
122        spacesNeeded = (offset < 30 ? 40 - offset : 10);
123        start = (offset >= 30 ? offset - 30 : 0);
124        end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
125
126        for (i=0;i<spacesNeeded;i++) text[i] = ' ';
127
128        for (;start < end;start++, i++) {
129            if (jsonText[start] != '\n' && jsonText[start] != '\r')
130            {
131                text[i] = jsonText[start];
132            }
133            else
134            {
135                text[i] = ' ';
136            }
137        }
138        assert(i <= 71);
139        text[i++] = '\n';
140        text[i] = 0;
141        {
142            char * newStr = (char *)
143                YA_MALLOC(&(hand->alloc), (strlen((char *) str) +
144                                           strlen((char *) text) +
145                                           strlen(arrow) + 1));
146            newStr[0] = 0;
147            strcat((char *) newStr, (char *) str);
148            strcat((char *) newStr, text);
149            strcat((char *) newStr, arrow);
150            YA_FREE(&(hand->alloc), str);
151            str = (unsigned char *) newStr;
152        }
153    }
154    return str;
155}
156
157/* check for client cancelation */
158#define _CC_CHK(x)                                                \
159    if (!(x)) {                                                   \
160        yajl_bs_set(hand->stateStack, yajl_state_parse_error);    \
161        hand->parserError = yajl_parser_client_cancelled;          \
162        return yajl_status_client_canceled;                       \
163    }
164
165
166yajl_status
167yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
168              unsigned int jsonTextLen)
169{
170    yajl_tok tok;
171    const unsigned char * buf;
172    unsigned int bufLen;
173    unsigned int * offset = &(hand->bytesConsumed);
174
175    *offset = 0;
176
177
178  around_again:
179    switch (yajl_bs_current(hand->stateStack)) {
180        case yajl_state_parse_complete:
181            return yajl_status_ok;
182        case yajl_state_lexical_error:
183        case yajl_state_parse_error:
184            return yajl_status_error;
185        case yajl_state_start:
186        case yajl_state_map_need_val:
187        case yajl_state_array_need_val:
188        case yajl_state_array_start: {
189            /* for arrays and maps, we advance the state for this
190             * depth, then push the state of the next depth.
191             * If an error occurs during the parsing of the nesting
192             * enitity, the state at this level will not matter.
193             * a state that needs pushing will be anything other
194             * than state_start */
195            yajl_state stateToPush = yajl_state_start;
196
197            tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
198                               offset, &buf, &bufLen);
199
200            switch (tok) {
201                case yajl_tok_eof:
202                    return yajl_status_insufficient_data;
203                case yajl_tok_error:
204                    yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
205                    goto around_again;
206                case yajl_tok_string:
207                    if (hand->callbacks && hand->callbacks->yajl_string) {
208                        _CC_CHK(hand->callbacks->yajl_string(hand->ctx,
209                                                             buf, bufLen));
210                    }
211                    break;
212                case yajl_tok_string_with_escapes:
213                    if (hand->callbacks && hand->callbacks->yajl_string) {
214                        yajl_buf_clear(hand->decodeBuf);
215                        yajl_string_decode(hand->decodeBuf, buf, bufLen);
216                        _CC_CHK(hand->callbacks->yajl_string(
217                                    hand->ctx, yajl_buf_data(hand->decodeBuf),
218                                    yajl_buf_len(hand->decodeBuf)));
219                    }
220                    break;
221                case yajl_tok_bool:
222                    if (hand->callbacks && hand->callbacks->yajl_boolean) {
223                        _CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
224                                                              *buf == 't'));
225                    }
226                    break;
227                case yajl_tok_null:
228                    if (hand->callbacks && hand->callbacks->yajl_null) {
229                        _CC_CHK(hand->callbacks->yajl_null(hand->ctx));
230                    }
231                    break;
232                case yajl_tok_left_bracket:
233                    if (hand->callbacks && hand->callbacks->yajl_start_map) {
234                        _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
235                    }
236                    stateToPush = yajl_state_map_start;
237                    break;
238                case yajl_tok_left_brace:
239                    if (hand->callbacks && hand->callbacks->yajl_start_array) {
240                        _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
241                    }
242                    stateToPush = yajl_state_array_start;
243                    break;
244                case yajl_tok_integer:
245                    /*
246                     * note.  strtol does not respect the length of
247                     * the lexical token.  in a corner case where the
248                     * lexed number is a integer with a trailing zero,
249                     * immediately followed by the end of buffer,
250                     * sscanf could run off into oblivion and cause a
251                     * crash.  for this reason we copy the integer
252                     * (and doubles), into our parse buffer (the same
253                     * one used for unescaping strings), before
254                     * calling strtol.  yajl_buf ensures null padding,
255                     * so we're safe.
256                     */
257                    if (hand->callbacks) {
258                        if (hand->callbacks->yajl_number) {
259                            _CC_CHK(hand->callbacks->yajl_number(
260                                        hand->ctx,(const char *) buf, bufLen));
261                        } else if (hand->callbacks->yajl_integer) {
262                            long int i = 0;
263                            yajl_buf_clear(hand->decodeBuf);
264                            yajl_buf_append(hand->decodeBuf, buf, bufLen);
265                            buf = yajl_buf_data(hand->decodeBuf);
266                            i = strtol((const char *) buf, NULL, 10);
267                            if ((i == LONG_MIN || i == LONG_MAX) &&
268                                errno == ERANGE)
269                            {
270                                yajl_bs_set(hand->stateStack,
271                                            yajl_state_parse_error);
272                                hand->parserError = yajl_parser_integer_overflow;
273                                /* try to restore error offset */
274                                if (*offset >= bufLen) *offset -= bufLen;
275                                else *offset = 0;
276                                goto around_again;
277                            }
278                            _CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
279                                                                  i));
280                        }
281                    }
282                    break;
283                case yajl_tok_double:
284                    if (hand->callbacks) {
285                        if (hand->callbacks->yajl_number) {
286                            _CC_CHK(hand->callbacks->yajl_number(
287                                        hand->ctx, (const char *) buf, bufLen));
288                        } else if (hand->callbacks->yajl_double) {
289                            double d = 0.0;
290                            yajl_buf_clear(hand->decodeBuf);
291                            yajl_buf_append(hand->decodeBuf, buf, bufLen);
292                            buf = yajl_buf_data(hand->decodeBuf);
293                            d = strtod((char *) buf, NULL);
294                            if ((d == HUGE_VAL || d == -HUGE_VAL) &&
295                                errno == ERANGE)
296                            {
297                                yajl_bs_set(hand->stateStack,
298                                            yajl_state_parse_error);
299                                hand->parserError = yajl_parser_numeric_overflow;
300                                /* try to restore error offset */
301                                if (*offset >= bufLen) *offset -= bufLen;
302                                else *offset = 0;
303                                goto around_again;
304                            }
305                            _CC_CHK(hand->callbacks->yajl_double(hand->ctx,
306                                                                 d));
307                        }
308                    }
309                    break;
310                case yajl_tok_right_brace: {
311                    if (yajl_bs_current(hand->stateStack) ==
312                        yajl_state_array_start)
313                    {
314                        if (hand->callbacks &&
315                            hand->callbacks->yajl_end_array)
316                        {
317                            _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
318                        }
319                        yajl_bs_pop(hand->stateStack);
320                        goto around_again;
321                    }
322                    /* intentional fall-through */
323                }
324                case yajl_tok_colon:
325                case yajl_tok_comma:
326                case yajl_tok_right_bracket:
327                    yajl_bs_set(hand->stateStack, yajl_state_parse_error);
328                    hand->parserError = yajl_parser_invalid_token;
329                    goto around_again;
330                default:
331                    yajl_bs_set(hand->stateStack, yajl_state_parse_error);
332                    hand->parserError = yajl_parser_invalid_token;
333                    goto around_again;
334            }
335            /* got a value.  transition depends on the state we're in. */
336            {
337                yajl_state s = yajl_bs_current(hand->stateStack);
338                if (s == yajl_state_start) {
339                    yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
340                } else if (s == yajl_state_map_need_val) {
341                    yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
342                } else {
343                    yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
344                }
345            }
346            if (stateToPush != yajl_state_start) {
347                yajl_bs_push(hand->stateStack, stateToPush);
348            }
349
350            goto around_again;
351        }
352        case yajl_state_map_start:
353        case yajl_state_map_need_key: {
354            /* only difference between these two states is that in
355             * start '}' is valid, whereas in need_key, we've parsed
356             * a comma, and a string key _must_ follow */
357            tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
358                               offset, &buf, &bufLen);
359            switch (tok) {
360                case yajl_tok_eof:
361                    return yajl_status_insufficient_data;
362                case yajl_tok_error:
363                    yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
364                    goto around_again;
365                case yajl_tok_string_with_escapes:
366                    if (hand->callbacks && hand->callbacks->yajl_map_key) {
367                        yajl_buf_clear(hand->decodeBuf);
368                        yajl_string_decode(hand->decodeBuf, buf, bufLen);
369                        buf = yajl_buf_data(hand->decodeBuf);
370                        bufLen = yajl_buf_len(hand->decodeBuf);
371                    }
372                    /* intentional fall-through */
373                case yajl_tok_string:
374                    if (hand->callbacks && hand->callbacks->yajl_map_key) {
375                        _CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
376                                                              bufLen));
377                    }
378                    yajl_bs_set(hand->stateStack, yajl_state_map_sep);
379                    goto around_again;
380                case yajl_tok_right_bracket:
381                    if (yajl_bs_current(hand->stateStack) ==
382                        yajl_state_map_start)
383                    {
384                        if (hand->callbacks && hand->callbacks->yajl_end_map) {
385                            _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
386                        }
387                        yajl_bs_pop(hand->stateStack);
388                        goto around_again;
389                    }
390                default:
391                    yajl_bs_set(hand->stateStack, yajl_state_parse_error);
392                    hand->parserError = yajl_parser_key_must_be_string;
393                    goto around_again;
394            }
395        }
396        case yajl_state_map_sep: {
397            tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
398                               offset, &buf, &bufLen);
399            switch (tok) {
400                case yajl_tok_colon:
401                    yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
402                    goto around_again;
403                case yajl_tok_eof:
404                    return yajl_status_insufficient_data;
405                case yajl_tok_error:
406                    yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
407                    goto around_again;
408                default:
409                    yajl_bs_set(hand->stateStack, yajl_state_parse_error);
410                    hand->parserError = yajl_parser_pair_missing_colon;
411                    goto around_again;
412            }
413        }
414        case yajl_state_map_got_val: {
415            tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
416                               offset, &buf, &bufLen);
417            switch (tok) {
418                case yajl_tok_right_bracket:
419                    if (hand->callbacks && hand->callbacks->yajl_end_map) {
420                        _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
421                    }
422                    yajl_bs_pop(hand->stateStack);
423                    goto around_again;
424                case yajl_tok_comma:
425                    yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
426                    goto around_again;
427                case yajl_tok_eof:
428                    return yajl_status_insufficient_data;
429                case yajl_tok_error:
430                    yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
431                    goto around_again;
432                default:
433                    yajl_bs_set(hand->stateStack, yajl_state_parse_error);
434                    hand->parserError = yajl_parser_bad_token_after_map_value;
435                    /* try to restore error offset */
436                    if (*offset >= bufLen) *offset -= bufLen;
437                    else *offset = 0;
438                    goto around_again;
439            }
440        }
441        case yajl_state_array_got_val: {
442            tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
443                               offset, &buf, &bufLen);
444            switch (tok) {
445                case yajl_tok_right_brace:
446                    if (hand->callbacks && hand->callbacks->yajl_end_array) {
447                        _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
448                    }
449                    yajl_bs_pop(hand->stateStack);
450                    goto around_again;
451                case yajl_tok_comma:
452                    yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
453                    goto around_again;
454                case yajl_tok_eof:
455                    return yajl_status_insufficient_data;
456                case yajl_tok_error:
457                    yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
458                    goto around_again;
459                default:
460                    yajl_bs_set(hand->stateStack, yajl_state_parse_error);
461                    hand->parserError = yajl_parser_bad_token_after_array_value;
462                    goto around_again;
463            }
464        }
465    }
466
467    abort();
468    return yajl_status_error;
469}
470
471