xref: /5.5.2/subjson/contrib/jsonsl/jsonsl.h (revision 5cf05eaf)
1/**
2 * JSON Simple/Stacked/Stateful Lexer.
3 * - Does not buffer data
4 * - Maintains state
5 * - Callback oriented
6 * - Lightweight and fast. One source file and one header file
7 *
8 * Copyright (C) 2012-2015 Mark Nunberg
9 * See included LICENSE file for license details.
10 */
11
12#ifndef JSONSL_H_
13#define JSONSL_H_
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <stddef.h>
18#include <string.h>
19#include <sys/types.h>
20#include <wchar.h>
21
22#ifdef __cplusplus
23extern "C" {
24#endif /* __cplusplus */
25
26#ifdef JSONSL_USE_WCHAR
27typedef jsonsl_char_t wchar_t;
28typedef jsonsl_uchar_t unsigned wchar_t;
29#else
30typedef char jsonsl_char_t;
31typedef unsigned char jsonsl_uchar_t;
32#endif /* JSONSL_USE_WCHAR */
33
34/* Stolen from http-parser.h, and possibly others */
35#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
36typedef __int8 int8_t;
37typedef unsigned __int8 uint8_t;
38typedef __int16 int16_t;
39typedef unsigned __int16 uint16_t;
40typedef __int32 int32_t;
41typedef unsigned __int32 uint32_t;
42typedef __int64 int64_t;
43typedef unsigned __int64 uint64_t;
44#if !defined(_MSC_VER) || _MSC_VER<1400
45typedef unsigned int size_t;
46typedef int ssize_t;
47#endif
48#else
49#include <stdint.h>
50#endif
51
52
53#if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS))
54#define JSONSL_STATE_GENERIC
55#endif /* !defined JSONSL_STATE_GENERIC */
56
57#ifdef JSONSL_STATE_GENERIC
58#define JSONSL_STATE_USER_FIELDS
59#endif /* JSONSL_STATE_GENERIC */
60
61/* Additional fields for component object */
62#ifndef JSONSL_JPR_COMPONENT_USER_FIELDS
63#define JSONSL_JPR_COMPONENT_USER_FIELDS
64#endif
65
66#ifndef JSONSL_API
67/**
68 * We require a /DJSONSL_DLL so that users already using this as a static
69 * or embedded library don't get confused
70 */
71#if defined(_WIN32) && defined(JSONSL_DLL)
72#define JSONSL_API __declspec(dllexport)
73#else
74#define JSONSL_API
75#endif /* _WIN32 */
76
77#endif /* !JSONSL_API */
78
79#ifndef JSONSL_INLINE
80#if defined(_MSC_VER)
81  #define JSONSL_INLINE __inline
82  #elif defined(__GNUC__)
83  #define JSONSL_INLINE __inline__
84  #else
85  #define JSONSL_INLINE inline
86  #endif /* _MSC_VER or __GNUC__ */
87#endif /* JSONSL_INLINE */
88
89#define JSONSL_MAX_LEVELS 512
90
91struct jsonsl_st;
92typedef struct jsonsl_st *jsonsl_t;
93
94typedef struct jsonsl_jpr_st* jsonsl_jpr_t;
95
96/**
97 * This flag is true when AND'd against a type whose value
98 * must be in "quoutes" i.e. T_HKEY and T_STRING
99 */
100#define JSONSL_Tf_STRINGY 0xffff00
101
102/**
103 * Constant representing the special JSON types.
104 * The values are special and aid in speed (the OBJECT and LIST
105 * values are the char literals of their openings).
106 *
107 * Their actual value is a character which attempts to resemble
108 * some mnemonic reference to the actual type.
109 *
110 * If new types are added, they must fit into the ASCII printable
111 * range (so they should be AND'd with 0x7f and yield something
112 * meaningful)
113 */
114#define JSONSL_XTYPE \
115    X(STRING,   '"'|JSONSL_Tf_STRINGY) \
116    X(HKEY,     '#'|JSONSL_Tf_STRINGY) \
117    X(OBJECT,   '{') \
118    X(LIST,     '[') \
119    X(SPECIAL,  '^') \
120    X(UESCAPE,  'u')
121typedef enum {
122#define X(o, c) \
123    JSONSL_T_##o = c,
124    JSONSL_XTYPE
125    JSONSL_T_UNKNOWN = '?',
126    /* Abstract 'root' object */
127    JSONSL_T_ROOT = 0
128#undef X
129} jsonsl_type_t;
130
131/**
132 * Subtypes for T_SPECIAL. We define them as flags
133 * because more than one type can be applied to a
134 * given object.
135 */
136
137#define JSONSL_XSPECIAL \
138    X(NONE, 0) \
139    X(SIGNED,       1<<0) \
140    X(UNSIGNED,     1<<1) \
141    X(TRUE,         1<<2) \
142    X(FALSE,        1<<3) \
143    X(NULL,         1<<4) \
144    X(FLOAT,        1<<5) \
145    X(EXPONENT,     1<<6) \
146    X(NONASCII,     1<<7)
147typedef enum {
148#define X(o,b) \
149    JSONSL_SPECIALf_##o = b,
150    JSONSL_XSPECIAL
151#undef X
152    /* Handy flags for checking */
153
154    JSONSL_SPECIALf_UNKNOWN = 1 << 8,
155
156    /** @private Private */
157    JSONSL_SPECIALf_ZERO    = 1 << 9 | JSONSL_SPECIALf_UNSIGNED,
158    /** @private */
159    JSONSL_SPECIALf_DASH    = 1 << 10,
160
161    /** Type is numeric */
162    JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED),
163
164    /** Type is a boolean */
165    JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE),
166
167    /** Type is an "extended", not integral type (but numeric) */
168    JSONSL_SPECIALf_NUMNOINT = (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT)
169} jsonsl_special_t;
170
171
172/**
173 * These are the various types of stack (or other) events
174 * which will trigger a callback.
175 * Like the type constants, this are also mnemonic
176 */
177#define JSONSL_XACTION \
178    X(PUSH, '+') \
179    X(POP, '-') \
180    X(UESCAPE, 'U') \
181    X(ERROR, '!')
182typedef enum {
183#define X(a,c) \
184    JSONSL_ACTION_##a = c,
185    JSONSL_XACTION
186    JSONSL_ACTION_UNKNOWN = '?'
187#undef X
188} jsonsl_action_t;
189
190
191/**
192 * Various errors which may be thrown while parsing JSON
193 */
194#define JSONSL_XERR \
195/* Trailing garbage characters */ \
196    X(GARBAGE_TRAILING) \
197/* We were expecting a 'special' (numeric, true, false, null) */ \
198    X(SPECIAL_EXPECTED) \
199/* The 'special' value was incomplete */ \
200    X(SPECIAL_INCOMPLETE) \
201/* Found a stray token */ \
202    X(STRAY_TOKEN) \
203/* We were expecting a token before this one */ \
204    X(MISSING_TOKEN) \
205/* Cannot insert because the container is not ready */ \
206    X(CANT_INSERT) \
207/* Found a '\' outside a string */ \
208    X(ESCAPE_OUTSIDE_STRING) \
209/* Found a ':' outside of a hash */ \
210    X(KEY_OUTSIDE_OBJECT) \
211/* found a string outside of a container */ \
212    X(STRING_OUTSIDE_CONTAINER) \
213/* Found a null byte in middle of string */ \
214    X(FOUND_NULL_BYTE) \
215/* Current level exceeds limit specified in constructor */ \
216    X(LEVELS_EXCEEDED) \
217/* Got a } as a result of an opening [ or vice versa */ \
218    X(BRACKET_MISMATCH) \
219/* We expected a key, but got something else instead */ \
220    X(HKEY_EXPECTED) \
221/* We got an illegal control character (bad whitespace or something) */ \
222    X(WEIRD_WHITESPACE) \
223/* Found a \u-escape, but there were less than 4 following hex digits */ \
224    X(UESCAPE_TOOSHORT) \
225/* Invalid two-character escape */ \
226    X(ESCAPE_INVALID) \
227/* Trailing comma */ \
228    X(TRAILING_COMMA) \
229/* An invalid number was passed in a numeric field */ \
230    X(INVALID_NUMBER) \
231/* Value is missing for object */ \
232    X(VALUE_EXPECTED) \
233/* The following are for JPR Stuff */ \
234    \
235/* Found a literal '%' but it was only followed by a single valid hex digit */ \
236    X(PERCENT_BADHEX) \
237/* jsonpointer URI is malformed '/' */ \
238    X(JPR_BADPATH) \
239/* Duplicate slash */ \
240    X(JPR_DUPSLASH) \
241/* No leading root */ \
242    X(JPR_NOROOT) \
243/* Allocation failure */ \
244    X(ENOMEM) \
245/* Invalid unicode codepoint detected (in case of escapes) */ \
246    X(INVALID_CODEPOINT)
247
248typedef enum {
249    JSONSL_ERROR_SUCCESS = 0,
250#define X(e) \
251    JSONSL_ERROR_##e,
252    JSONSL_XERR
253#undef X
254    JSONSL_ERROR_GENERIC
255} jsonsl_error_t;
256
257
258/**
259 * A state is a single level of the stack.
260 * Non-private data (i.e. the 'data' field, see the STATE_GENERIC section)
261 * will remain in tact until the item is popped.
262 *
263 * As a result, it means a parent state object may be accessed from a child
264 * object, (the parents fields will all be valid). This allows a user to create
265 * an ad-hoc hierarchy on top of the JSON one.
266 *
267 */
268struct jsonsl_state_st {
269    /**
270     * The JSON object type
271     */
272    unsigned type;
273
274    /** If this element is special, then its extended type is here */
275    unsigned special_flags;
276
277    /**
278     * The position (in terms of number of bytes since the first call to
279     * jsonsl_feed()) at which the state was first pushed. This includes
280     * opening tokens, if applicable.
281     *
282     * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will
283     * be the position of the first quote.
284     *
285     * @see jsonsl_st::pos which contains the _current_ position and can be
286     * used during a POP callback to get the length of the element.
287     */
288    size_t pos_begin;
289
290    /**FIXME: This is redundant as the same information can be derived from
291     * jsonsl_st::pos at pop-time */
292    size_t pos_cur;
293
294    /**
295     * Level of recursion into nesting. This is mainly a convenience
296     * variable, as this can technically be deduced from the lexer's
297     * level parameter (though the logic is not that simple)
298     */
299    unsigned int level;
300
301
302    /**
303     * how many elements in the object/list.
304     * For objects (hashes), an element is either
305     * a key or a value. Thus for one complete pair,
306     * nelem will be 2.
307     *
308     * For special types, this will hold the sum of the digits.
309     * This only holds true for values which are simple signed/unsigned
310     * numbers. Otherwise a special flag is set, and extra handling is not
311     * performed.
312     */
313    uint64_t nelem;
314
315
316
317    /*TODO: merge this and special_flags into a union */
318
319
320    /**
321     * Useful for an opening nest, this will prevent a callback from being
322     * invoked on this item or any of its children
323     */
324    int ignore_callback;
325
326    /**
327     * Counter which is incremented each time an escape ('\') is encountered.
328     * This is used internally for non-string types and should only be
329     * inspected by the user if the state actually represents a string
330     * type.
331     */
332    unsigned int nescapes;
333
334    /**
335     * Put anything you want here. if JSONSL_STATE_USER_FIELDS is here, then
336     * the macro expansion happens here.
337     *
338     * You can use these fields to store hierarchical or 'tagging' information
339     * for specific objects.
340     *
341     * See the documentation above for the lifetime of the state object (i.e.
342     * if the private data points to allocated memory, it should be freed
343     * when the object is popped, as the state object will be re-used)
344     */
345#ifndef JSONSL_STATE_GENERIC
346    JSONSL_STATE_USER_FIELDS
347#else
348
349    /**
350     * Otherwise, this is a simple void * pointer for anything you want
351     */
352    void *data;
353#endif /* JSONSL_STATE_USER_FIELDS */
354};
355
356/**Gets the number of elements in the list.
357 * @param st The state. Must be of type JSONSL_T_LIST
358 * @return number of elements in the list
359 */
360#define JSONSL_LIST_SIZE(st) ((st)->nelem)
361
362/**Gets the number of key-value pairs in an object
363 * @param st The state. Must be of type JSONSL_T_OBJECT
364 * @return the number of key-value pairs in the object
365 */
366#define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2)
367
368/**Gets the numeric value.
369 * @param st The state. Must be of type JSONSL_T_SPECIAL and
370 *           special_flags must have the JSONSL_SPECIALf_NUMERIC flag
371 *           set.
372 * @return the numeric value of the state.
373 */
374#define JSONSL_NUMERIC_VALUE(st) ((st)->nelem)
375
376/*
377 * So now we need some special structure for keeping the
378 * JPR info in sync. Preferrably all in a single block
379 * of memory (there's no need for separate allocations.
380 * So we will define a 'table' with the following layout
381 *
382 * Level    nPosbl  JPR1_last   JPR2_last   JPR3_last
383 *
384 * 0        1       NOMATCH     POSSIBLE    POSSIBLE
385 * 1        0       NOMATCH     NOMATCH     COMPLETE
386 * [ table ends here because no further path is possible]
387 *
388 * Where the JPR..n corresponds to the number of JPRs
389 * requested, and nPosble is a quick flag to determine
390 *
391 * the number of possibilities. In the future this might
392 * be made into a proper 'jump' table,
393 *
394 * Since we always mark JPRs from the higher levels descending
395 * into the lower ones, a prospective child match would first
396 * look at the parent table to check the possibilities, and then
397 * see which ones were possible..
398 *
399 * Thus, the size of this blob would be (and these are all ints here)
400 * nLevels * nJPR * 2.
401 *
402 * the 'Width' of the table would be nJPR*2, and the 'height' would be
403 * nlevels
404 */
405
406/**
407 * This is called when a stack change ocurs.
408 *
409 * @param jsn The lexer
410 * @param action The type of action, this can be PUSH or POP
411 * @param state A pointer to the stack currently affected by the action
412 * @param at A pointer to the position of the input buffer which triggered
413 * this action.
414 */
415typedef void (*jsonsl_stack_callback)(
416        jsonsl_t jsn,
417        jsonsl_action_t action,
418        struct jsonsl_state_st* state,
419        const jsonsl_char_t *at);
420
421
422/**
423 * This is called when an error is encountered.
424 * Sometimes it's possible to 'erase' characters (by replacing them
425 * with whitespace). If you think you have corrected the error, you
426 * can return a true value, in which case the parser will backtrack
427 * and try again.
428 *
429 * @param jsn The lexer
430 * @param error The error which was thrown
431 * @param state the current state
432 * @param a pointer to the position of the input buffer which triggered
433 * the error. Note that this is not const, this is because you have the
434 * possibility of modifying the character in an attempt to correct the
435 * error
436 *
437 * @return zero to bail, nonzero to try again (this only makes sense if
438 * the input buffer has been modified by this callback)
439 */
440typedef int (*jsonsl_error_callback)(
441        jsonsl_t jsn,
442        jsonsl_error_t error,
443        struct jsonsl_state_st* state,
444        jsonsl_char_t *at);
445
446struct jsonsl_st {
447    /** Public, read-only */
448
449    /** This is the current level of the stack */
450    unsigned int level;
451
452    /** Flag set to indicate we should stop processing */
453    unsigned int stopfl;
454
455    /**
456     * This is the current position, relative to the beginning
457     * of the stream.
458     */
459    size_t pos;
460
461    /** This is the 'bytes' variable passed to feed() */
462    const jsonsl_char_t *base;
463
464    /** Callback invoked for PUSH actions */
465    jsonsl_stack_callback action_callback_PUSH;
466
467    /** Callback invoked for POP actions */
468    jsonsl_stack_callback action_callback_POP;
469
470    /** Default callback for any action, if neither PUSH or POP callbacks are defined */
471    jsonsl_stack_callback action_callback;
472
473    /**
474     * Do not invoke callbacks for objects deeper than this level.
475     * NOTE: This field establishes the lower bound for ignored callbacks,
476     * and is thus misnamed. `min_ignore_level` would actually make more
477     * sense, but we don't want to break API.
478     */
479    unsigned int max_callback_level;
480
481    /** The error callback. Invoked when an error happens. Should not be NULL */
482    jsonsl_error_callback error_callback;
483
484    /* these are boolean flags you can modify. You will be called
485     * about notification for each of these types if the corresponding
486     * variable is true.
487     */
488
489    /**
490     * @name Callback Booleans.
491     * These determine whether a callback is to be invoked for certain types of objects
492     * @{*/
493
494    /** Boolean flag to enable or disable the invokcation for events on this type*/
495    int call_SPECIAL;
496    int call_OBJECT;
497    int call_LIST;
498    int call_STRING;
499    int call_HKEY;
500    /*@}*/
501
502    /**
503     * @name u-Escape handling
504     * Special handling for the \\u-f00d type sequences. These are meant
505     * to be translated back into the corresponding octet(s).
506     * A special callback (if set) is invoked with *at=='u'. An application
507     * may wish to temporarily suspend parsing and handle the 'u-' sequence
508     * internally (or not).
509     */
510
511     /*@{*/
512
513    /** Callback to be invoked for a u-escape */
514    jsonsl_stack_callback action_callback_UESCAPE;
515
516    /** Boolean flag, whether to invoke the callback */
517    int call_UESCAPE;
518
519    /** Boolean flag, whether we should return after encountering a u-escape:
520     * the callback is invoked and then we return if this is true
521     */
522    int return_UESCAPE;
523    /*@}*/
524
525    struct {
526        int allow_trailing_comma;
527    } options;
528
529    /** Put anything here */
530    void *data;
531
532    /*@{*/
533    /** Private */
534    int in_escape;
535    char expecting;
536    char tok_last;
537    int can_insert;
538    unsigned int levels_max;
539
540#ifndef JSONSL_NO_JPR
541    size_t jpr_count;
542    jsonsl_jpr_t *jprs;
543
544    /* Root pointer for JPR matching information */
545    size_t *jpr_root;
546#endif /* JSONSL_NO_JPR */
547    /*@}*/
548
549    /**
550     * This is the stack. Its upper bound is levels_max, or the
551     * nlevels argument passed to jsonsl_new. If you modify this structure,
552     * make sure that this member is last.
553     */
554    struct jsonsl_state_st stack[1];
555};
556
557
558/**
559 * Creates a new lexer object, with capacity for recursion up to nlevels
560 *
561 * @param nlevels maximum recursion depth
562 */
563JSONSL_API
564jsonsl_t jsonsl_new(int nlevels);
565
566/**
567 * Feeds data into the lexer.
568 *
569 * @param jsn the lexer object
570 * @param bytes new data to be fed
571 * @param nbytes size of new data
572 */
573JSONSL_API
574void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes);
575
576/**
577 * Resets the internal parser state. This does not free the parser
578 * but does clean it internally, so that the next time feed() is called,
579 * it will be treated as a new stream
580 *
581 * @param jsn the lexer
582 */
583JSONSL_API
584void jsonsl_reset(jsonsl_t jsn);
585
586/**
587 * Frees the lexer, cleaning any allocated memory taken
588 *
589 * @param jsn the lexer
590 */
591JSONSL_API
592void jsonsl_destroy(jsonsl_t jsn);
593
594/**
595 * Gets the 'parent' element, given the current one
596 *
597 * @param jsn the lexer
598 * @param cur the current nest, which should be a struct jsonsl_nest_st
599 */
600static JSONSL_INLINE
601struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn,
602                                          const struct jsonsl_state_st *state)
603{
604    /* Don't complain about overriding array bounds */
605    if (state->level > 1) {
606        return jsn->stack + state->level - 1;
607    } else {
608        return NULL;
609    }
610}
611
612/**
613 * Gets the state of the last fully consumed child of this parent. This is
614 * only valid in the parent's POP callback.
615 *
616 * @param the lexer
617 * @return A pointer to the child.
618 */
619static JSONSL_INLINE
620struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn,
621                                          const struct jsonsl_state_st *parent)
622{
623    return jsn->stack + (parent->level + 1);
624}
625
626/**Call to instruct the parser to stop parsing and return. This is valid
627 * only from within a callback */
628static JSONSL_INLINE
629void jsonsl_stop(jsonsl_t jsn)
630{
631    jsn->stopfl = 1;
632}
633
634/**
635 * This enables receiving callbacks on all events. Doesn't do
636 * anything special but helps avoid some boilerplate.
637 * This does not touch the UESCAPE callbacks or flags.
638 */
639static JSONSL_INLINE
640void jsonsl_enable_all_callbacks(jsonsl_t jsn)
641{
642    jsn->call_HKEY = 1;
643    jsn->call_STRING = 1;
644    jsn->call_OBJECT = 1;
645    jsn->call_SPECIAL = 1;
646    jsn->call_LIST = 1;
647}
648
649/**
650 * A macro which returns true if the current state object can
651 * have children. This means a list type or an object type.
652 */
653#define JSONSL_STATE_IS_CONTAINER(state) \
654        (state->type == JSONSL_T_OBJECT || state->type == JSONSL_T_LIST)
655
656/**
657 * These two functions, dump a string representation
658 * of the error or type, respectively. They will never
659 * return NULL
660 */
661JSONSL_API
662const char* jsonsl_strerror(jsonsl_error_t err);
663JSONSL_API
664const char* jsonsl_strtype(jsonsl_type_t jt);
665
666/**
667 * Dumps global metrics to the screen. This is a noop unless
668 * jsonsl was compiled with JSONSL_USE_METRICS
669 */
670JSONSL_API
671void jsonsl_dump_global_metrics(void);
672
673/* This macro just here for editors to do code folding */
674#ifndef JSONSL_NO_JPR
675
676/**
677 * @name JSON Pointer API
678 *
679 * JSONPointer API. This isn't really related to the lexer (at least not yet)
680 * JSONPointer provides an extremely simple specification for providing
681 * locations within JSON objects. We will extend it a bit and allow for
682 * providing 'wildcard' characters by which to be able to 'query' the stream.
683 *
684 * See http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-00
685 *
686 * Currently I'm implementing the 'single query' API which can only use a single
687 * query component. In the future I will integrate my yet-to-be-published
688 * Boyer-Moore-esque prefix searching implementation, in order to allow
689 * multiple paths to be merged into one for quick and efficient searching.
690 *
691 *
692 * JPR (as we'll refer to it within the source) can be used by splitting
693 * the components into mutliple sections, and incrementally 'track' each
694 * component. When JSONSL delivers a 'pop' callback for a string, or a 'push'
695 * callback for an object, we will check to see whether the index matching
696 * the component corresponding to the current level contains a match
697 * for our path.
698 *
699 * In order to do this properly, a structure must be maintained within the
700 * parent indicating whether its children are possible matches. This flag
701 * will be 'inherited' by call children which may conform to the match
702 * specification, and discarded by all which do not (thereby eliminating
703 * their children from inheriting it).
704 *
705 * A successful match is a complete one. One can provide multiple paths with
706 * multiple levels of matches e.g.
707 *  /foo/bar/baz/^/blah
708 *
709 *  @{
710 */
711
712/** The wildcard character */
713#ifndef JSONSL_PATH_WILDCARD_CHAR
714#define JSONSL_PATH_WILDCARD_CHAR '^'
715#endif /* WILDCARD_CHAR */
716
717#define JSONSL_XMATCH \
718    X(COMPLETE,1) \
719    X(POSSIBLE,0) \
720    X(NOMATCH,-1) \
721    X(TYPE_MISMATCH, -2)
722
723typedef enum {
724
725#define X(T,v) \
726    JSONSL_MATCH_##T = v,
727    JSONSL_XMATCH
728
729#undef X
730    JSONSL_MATCH_UNKNOWN
731} jsonsl_jpr_match_t;
732
733typedef enum {
734    JSONSL_PATH_STRING = 1,
735    JSONSL_PATH_WILDCARD,
736    JSONSL_PATH_NUMERIC,
737    JSONSL_PATH_ROOT,
738
739    /* Special */
740    JSONSL_PATH_INVALID = -1,
741    JSONSL_PATH_NONE = 0
742} jsonsl_jpr_type_t;
743
744struct jsonsl_jpr_component_st {
745    /** The string the component points to */
746    char *pstr;
747    /** if this is a numeric type, the number is 'cached' here */
748    unsigned long idx;
749    /** The length of the string */
750    size_t len;
751    /** The type of component (NUMERIC or STRING) */
752    jsonsl_jpr_type_t ptype;
753
754    /** Set this to true to enforce type checking between dict keys and array
755     * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects
756     * that an array index is actually a child of a dictionary. */
757    short is_arridx;
758
759    /* Extra fields (for more advanced searches. Default is empty) */
760    JSONSL_JPR_COMPONENT_USER_FIELDS
761};
762
763struct jsonsl_jpr_st {
764    /** Path components */
765    struct jsonsl_jpr_component_st *components;
766    size_t ncomponents;
767
768    /**Type of the match to be expected. If nonzero, will be compared against
769     * the actual type */
770    unsigned match_type;
771
772    /** Base of allocated string for components */
773    char *basestr;
774
775    /** The original match string. Useful for returning to the user */
776    char *orig;
777    size_t norig;
778};
779
780/**
781 * Create a new JPR object.
782 *
783 * @param path the JSONPointer path specification.
784 * @param errp a pointer to a jsonsl_error_t. If this function returns NULL,
785 * then more details will be in this variable.
786 *
787 * @return a new jsonsl_jpr_t object, or NULL on error.
788 */
789JSONSL_API
790jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp);
791
792/**
793 * Destroy a JPR object
794 */
795JSONSL_API
796void jsonsl_jpr_destroy(jsonsl_jpr_t jpr);
797
798/**
799 * Match a JSON object against a type and specific level
800 *
801 * @param jpr the JPR object
802 * @param parent_type the type of the parent (should be T_LIST or T_OBJECT)
803 * @param parent_level the level of the parent
804 * @param key the 'key' of the child. If the parent is an array, this should be
805 * empty.
806 * @param nkey - the length of the key. If the parent is an array (T_LIST), then
807 * this should be the current index.
808 *
809 * NOTE: The key of the child means any kind of associative data related to the
810 * element. Thus: <<< { "foo" : [ >>,
811 * the opening array's key is "foo".
812 *
813 * @return a status constant. This indicates whether a match was excluded, possible,
814 * or successful.
815 */
816JSONSL_API
817jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr,
818                                    unsigned int parent_type,
819                                    unsigned int parent_level,
820                                    const char *key, size_t nkey);
821
822/**
823 * Alternate matching algorithm. This matching algorithm does not use
824 * JSONPointer but relies on a more structured searching mechanism. It
825 * assumes that there is a clear distinction between array indices and
826 * object keys. In this case, the jsonsl_path_component_st::ptype should
827 * be set to @ref JSONSL_PATH_NUMERIC for an array index (the
828 * jsonsl_path_comonent_st::is_arridx field will be removed in a future
829 * version).
830 *
831 * @param jpr The path
832 * @param parent The parent structure. Can be NULL if this is the root object
833 * @param child The child structure. Should not be NULL
834 * @param key Object key, if an object
835 * @param nkey Length of object key
836 * @return Status constant if successful
837 *
838 * @note
839 * For successful matching, both the key and the path itself should be normalized
840 * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This
841 * should currently be done in the application. Another version of this function
842 * may use a temporary buffer in such circumstances (allocated by the application).
843 *
844 * Since this function also checks the state of the child, it should only
845 * be called on PUSH callbacks, and not POP callbacks
846 */
847JSONSL_API
848jsonsl_jpr_match_t
849jsonsl_path_match(jsonsl_jpr_t jpr,
850                  const struct jsonsl_state_st *parent,
851                  const struct jsonsl_state_st *child,
852                  const char *key, size_t nkey);
853
854
855/**
856 * Associate a set of JPR objects with a lexer instance.
857 * This should be called before the lexer has been fed any data (and
858 * behavior is undefined if you don't adhere to this).
859 *
860 * After using this function, you may subsequently call match_state() on
861 * given states (presumably from within the callbacks).
862 *
863 * Note that currently the first JPR is the quickest and comes
864 * pre-allocated with the state structure. Further JPR objects
865 * are chained.
866 *
867 * @param jsn The lexer
868 * @param jprs An array of jsonsl_jpr_t objects
869 * @param njprs How many elements in the jprs array.
870 */
871JSONSL_API
872void jsonsl_jpr_match_state_init(jsonsl_t jsn,
873                                 jsonsl_jpr_t *jprs,
874                                 size_t njprs);
875
876/**
877 * This follows the same semantics as the normal match,
878 * except we infer parent and type information from the relevant state objects.
879 * The match status (for all possible JPR objects) is set in the *out parameter.
880 *
881 * If a match has succeeded, then its JPR object will be returned. In all other
882 * instances, NULL is returned;
883 *
884 * @param jpr The jsonsl_jpr_t handle
885 * @param state The jsonsl_state_st which is a candidate
886 * @param key The hash key (if applicable, can be NULL if parent is list)
887 * @param nkey Length of hash key (if applicable, can be zero if parent is list)
888 * @param out A pointer to a jsonsl_jpr_match_t. This will be populated with
889 * the match result
890 *
891 * @return If a match was completed in full, then the JPR object containing
892 * the matching path will be returned. Otherwise, the return is NULL (note, this
893 * does not mean matching has failed, it can still be part of the match: check
894 * the out parameter).
895 */
896JSONSL_API
897jsonsl_jpr_t jsonsl_jpr_match_state(jsonsl_t jsn,
898                                    struct jsonsl_state_st *state,
899                                    const char *key,
900                                    size_t nkey,
901                                    jsonsl_jpr_match_t *out);
902
903
904/**
905 * Cleanup any memory allocated and any states set by
906 * match_state_init() and match_state()
907 * @param jsn The lexer
908 */
909JSONSL_API
910void jsonsl_jpr_match_state_cleanup(jsonsl_t jsn);
911
912/**
913 * Return a string representation of the match result returned by match()
914 */
915JSONSL_API
916const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
917
918/* @}*/
919
920/**
921 * Utility function to convert escape sequences into their original form.
922 *
923 * The decoders I've sampled do not seem to specify a standard behavior of what
924 * to escape/unescape.
925 *
926 * RFC 4627 Mandates only that the quoute, backslash, and ASCII control
927 * characters (0x00-0x1f) be escaped. It is often common for applications
928 * to escape a '/' - however this may also be desired behavior. the JSON
929 * spec is not clear on this, and therefore jsonsl leaves it up to you.
930 *
931 * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically
932 * true when dealing with 'u-escapes' which can be expressed perfectly fine
933 * as utf8. One use case for normalization is JPR string comparison, in which
934 * case two effectively equivalent strings may not match because one is using
935 * u-escapes and the other proper utf8. To normalize u-escapes only, pass in
936 * an empty `toEscape` table, enabling only the `u` index.
937 *
938 * @param in The input string.
939 * @param out An allocated output (should be the same size as in)
940 * @param len the size of the buffer
941 * @param toEscape - A sparse array of characters to unescape. Characters
942 * which are not present in this array, e.g. toEscape['c'] == 0 will be
943 * ignored and passed to the output in their original form.
944 * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
945 * then this variable will have the SPECIALf_NONASCII flag on.
946 *
947 * @param err A pointer to an error variable. If an error ocurrs, it will be
948 * set in this variable
949 * @param errat If not null and an error occurs, this will be set to point
950 * to the position within the string at which the offending character was
951 * encountered.
952 *
953 * @return The effective size of the output buffer.
954 *
955 * @note
956 * This function now encodes the UTF8 equivalents of utf16 escapes (i.e.
957 * 'u-escapes'). Previously this would encode the escapes as utf16 literals,
958 * which while still correct in some sense was confusing for many (especially
959 * considering that the inputs were variations of char).
960 *
961 * @note
962 * The output buffer will never be larger than the input buffer, since
963 * standard escape sequences (i.e. '\t') occupy two bytes in the source
964 * but only one byte (when unescaped) in the output. Likewise u-escapes
965 * (i.e. \uXXXX) will occupy six bytes in the source, but at the most
966 * two bytes when escaped.
967 */
968JSONSL_API
969size_t jsonsl_util_unescape_ex(const char *in,
970                               char *out,
971                               size_t len,
972                               const int toEscape[128],
973                               unsigned *oflags,
974                               jsonsl_error_t *err,
975                               const char **errat);
976
977/**
978 * Convenience macro to avoid passing too many parameters
979 */
980#define jsonsl_util_unescape(in, out, len, toEscape, err) \
981    jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
982
983#endif /* JSONSL_NO_JPR */
984
985#ifdef __cplusplus
986}
987#endif /* __cplusplus */
988
989#endif /* JSONSL_H_ */
990