xref: /5.5.2/couchdb/src/ejson/yajl/yajl_lex.h (revision 3925e856)
1/*
2 * Copyright 2010, Lloyd Hilaiel.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *  1. Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 *
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *     notice, this list of conditions and the following disclaimer in
13 *     the documentation and/or other materials provided with the
14 *     distribution.
15 *
16 *  3. Neither the name of Lloyd Hilaiel nor the names of its
17 *     contributors may be used to endorse or promote products derived
18 *     from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#ifndef __YAJL_LEX_H__
34#define __YAJL_LEX_H__
35
36#include "yajl_common.h"
37
38typedef enum {
39    yajl_tok_bool,
40    yajl_tok_colon,
41    yajl_tok_comma,
42    yajl_tok_eof,
43    yajl_tok_error,
44    yajl_tok_left_brace,
45    yajl_tok_left_bracket,
46    yajl_tok_null,
47    yajl_tok_right_brace,
48    yajl_tok_right_bracket,
49
50    /* we differentiate between integers and doubles to allow the
51     * parser to interpret the number without re-scanning */
52    yajl_tok_integer,
53    yajl_tok_double,
54
55    /* we differentiate between strings which require further processing,
56     * and strings that do not */
57    yajl_tok_string,
58    yajl_tok_string_with_escapes,
59
60    /* comment tokens are not currently returned to the parser, ever */
61    yajl_tok_comment
62} yajl_tok;
63
64typedef struct yajl_lexer_t * yajl_lexer;
65
66yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc,
67                          unsigned int allowComments,
68                          unsigned int validateUTF8);
69
70void yajl_lex_free(yajl_lexer lexer);
71
72/**
73 * run/continue a lex. "offset" is an input/output parameter.
74 * It should be initialized to zero for a
75 * new chunk of target text, and upon subsetquent calls with the same
76 * target text should passed with the value of the previous invocation.
77 *
78 * the client may be interested in the value of offset when an error is
79 * returned from the lexer.  This allows the client to render useful
80n * error messages.
81 *
82 * When you pass the next chunk of data, context should be reinitialized
83 * to zero.
84 *
85 * Finally, the output buffer is usually just a pointer into the jsonText,
86 * however in cases where the entity being lexed spans multiple chunks,
87 * the lexer will buffer the entity and the data returned will be
88 * a pointer into that buffer.
89 *
90 * This behavior is abstracted from client code except for the performance
91 * implications which require that the client choose a reasonable chunk
92 * size to get adequate performance.
93 */
94yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
95                      unsigned int jsonTextLen, unsigned int * offset,
96                      const unsigned char ** outBuf, unsigned int * outLen);
97
98/** have a peek at the next token, but don't move the lexer forward */
99yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
100                       unsigned int jsonTextLen, unsigned int offset);
101
102
103typedef enum {
104    yajl_lex_e_ok = 0,
105    yajl_lex_string_invalid_utf8,
106    yajl_lex_string_invalid_escaped_char,
107    yajl_lex_string_invalid_json_char,
108    yajl_lex_string_invalid_hex_char,
109    yajl_lex_invalid_char,
110    yajl_lex_invalid_string,
111    yajl_lex_missing_integer_after_decimal,
112    yajl_lex_missing_integer_after_exponent,
113    yajl_lex_missing_integer_after_minus,
114    yajl_lex_unallowed_comment
115} yajl_lex_error;
116
117const char * yajl_lex_error_to_string(yajl_lex_error error);
118
119/** allows access to more specific information about the lexical
120 *  error when yajl_lex_lex returns yajl_tok_error. */
121yajl_lex_error yajl_lex_get_error(yajl_lexer lexer);
122
123/** get the current offset into the most recently lexed json string. */
124unsigned int yajl_lex_current_offset(yajl_lexer lexer);
125
126/** get the number of lines lexed by this lexer instance */
127unsigned int yajl_lex_current_line(yajl_lexer lexer);
128
129/** get the number of chars lexed by this lexer instance since the last
130 *  \n or \r */
131unsigned int yajl_lex_current_char(yajl_lexer lexer);
132
133#endif
134