xref: /5.5.2/subjson/subdoc/path.cc (revision 5a685f44)
1/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3*     Copyright 2015 Couchbase, Inc
4*
5*   Licensed under the Apache License, Version 2.0 (the "License");
6*   you may not use this file except in compliance with the License.
7*   You may obtain a copy of the License at
8*
9*       http://www.apache.org/licenses/LICENSE-2.0
10*
11*   Unless required by applicable law or agreed to in writing, software
12*   distributed under the License is distributed on an "AS IS" BASIS,
13*   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*   See the License for the specific language governing permissions and
15*   limitations under the License.
16*/
17
18#define INCLUDE_JSONSL_SRC
19#include "subdoc-api.h"
20#include "path.h"
21
22using namespace Subdoc;
23
24const char *
25Path::convert_escaped(const char *src, size_t& len)
26{
27    if (m_cached.empty()) {
28        m_used.push_back(new std::string());
29    } else {
30        m_used.push_back(m_cached.back());
31        m_cached.pop_back();
32    }
33    std::string& s = *m_used.back();
34
35    for (size_t ii = 0; ii < len; ii++) {
36        if (src[ii] != '`') {
37            s += src[ii];
38        } else if(src[ii] == '`' && ii+1 < len && src[ii+1] == '`') {
39            s += src[ii++];
40        }
41    }
42    len = s.size();
43    return s.c_str();
44}
45
46/* Adds a numeric component */
47int
48Path::add_num_component(const char *component, size_t len)
49{
50    unsigned ii;
51    size_t numval = 0;
52
53    if (component[0] == '-') {
54        if (len != 2 || component[1] != '1') {
55            return JSONSL_ERROR_INVALID_NUMBER;
56        } else {
57            return add_array_index(-1);
58        }
59    }
60
61    for (ii = 0; ii < len; ii++) {
62        const char *c = &component[ii];
63        if (*c < 0x30 || *c > 0x39) {
64            return JSONSL_ERROR_INVALID_NUMBER;
65        } else {
66            size_t tmpval = numval;
67            tmpval *= 10;
68            tmpval += *c - 0x30;
69
70            /* check for overflow */
71            if (tmpval < numval) {
72                return JSONSL_ERROR_INVALID_NUMBER;
73            } else {
74                numval = tmpval;
75            }
76        }
77    }
78    return add_array_index(numval);
79}
80
81int
82Path::add_str_component(const char *component, size_t len, int n_backtick)
83{
84    /* Allocate first component: */
85    if (len > 1 && component[0] == '`' && component[len-1] == '`') {
86        component++;
87        n_backtick -= 2;
88        len -= 2;
89    }
90
91    if (size() == Limits::MAX_COMPONENTS) {
92        return JSONSL_ERROR_LEVELS_EXCEEDED;
93    }
94    if (len == 0) {
95        return JSONSL_ERROR_JPR_BADPATH;
96    }
97
98    if (n_backtick) {
99        /* OHNOEZ! Slow path */
100        component = convert_escaped(component, len);
101    }
102
103    Component& jpr_comp = add(JSONSL_PATH_STRING);
104    jpr_comp.pstr = const_cast<char*>(component);
105    jpr_comp.len = len;
106    jpr_comp.is_neg = 0;
107    return 0;
108}
109
110jsonsl_error_t
111Path::add_array_index(long ixnum)
112{
113    if (size() == Limits::MAX_COMPONENTS) {
114        return JSONSL_ERROR_LEVELS_EXCEEDED;
115    }
116
117    Component& comp = add(JSONSL_PATH_NUMERIC);
118    comp.len = 0;
119    comp.idx = ixnum;
120    comp.pstr = NULL;
121    if (ixnum == -1) {
122        has_negix = true;
123        comp.is_neg = 1;
124    } else {
125        comp.is_neg = 0;
126    }
127    return JSONSL_ERROR_SUCCESS;
128}
129
130/* Copied over from jsonsl */
131static const int allowed_json_escapes[0x100] = {
132        /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
133        /* 0x20 */ 0,0, /* 0x21 */
134        /* 0x22 */ 1 /* <"> */, /* 0x22 */
135        /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */
136        /* 0x2f */ 1 /* </> */, /* 0x2f */
137        /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */
138        /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
139        /* 0x5c */ 1 /* <\> */, /* 0x5c */
140        /* 0x5d */ 0,0,0,0,0, /* 0x61 */
141        /* 0x62 */ 1 /* <b> */, /* 0x62 */
142        /* 0x63 */ 0,0,0, /* 0x65 */
143        /* 0x66 */ 1 /* <f> */, /* 0x66 */
144        /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
145        /* 0x6e */ 1 /* <n> */, /* 0x6e */
146        /* 0x6f */ 0,0,0, /* 0x71 */
147        /* 0x72 */ 1 /* <r> */, /* 0x72 */
148        /* 0x73 */ 0, /* 0x73 */
149        /* 0x74 */ 1 /* <t> */, /* 0x74 */
150        /* 0x75 */ 1 /* <u> */, /* 0x75 */
151        /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */
152        /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */
153        /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */
154        /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */
155        /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */
156};
157
158int
159Path::parse_bracket(const char *path, size_t len, size_t *n_consumed)
160{
161    // Check if 0 before decreasing! */
162    if (len == 0) {
163        return JSONSL_ERROR_JPR_BADPATH;
164    }
165
166    // Adjust positions so we don't parse the first '[':
167    len--, path++, *n_consumed = 1;
168
169    for (size_t ii = 0; ii < len; ii++) {
170        if (path[ii] == ']') {
171            *n_consumed += (ii + 1);
172            return add_num_component(path, ii);
173        }
174    }
175
176    // Didn't find the closing ']'
177    return JSONSL_ERROR_JPR_BADPATH;
178}
179
180int
181Path::parse_string(const char *path, size_t len, size_t *n_consumed)
182{
183    bool in_n1ql_escape = false;
184    bool in_json_escape = false;
185    int n_backticks = 0;
186
187    if (len == 0) {
188        return JSONSL_ERROR_JPR_BADPATH;
189    }
190
191    for (size_t ii = 0; ii < len; ii++) {
192        // Escape handling
193        int can_jescape = allowed_json_escapes[static_cast<int>(path[ii])];
194        if (in_json_escape) {
195            if (!can_jescape) {
196                return JSONSL_ERROR_JPR_BADPATH;
197            } else if (path[ii] == 'u') {
198                /* We can't handle \u-escapes in paths now! */
199                return JSONSL_ERROR_JPR_BADPATH;
200            }
201            in_json_escape = false;
202        } else if (path[ii] == '\\') {
203            in_json_escape = true;
204        } else if (path[ii] == '"' || path[ii] < 0x1F) {
205            // Needs escape!
206            return JSONSL_ERROR_JPR_BADPATH;
207        }
208
209        if (path[ii] == '`') {
210            n_backticks++;
211            in_n1ql_escape = !in_n1ql_escape;
212        }
213        if (in_n1ql_escape) {
214            continue;
215        }
216
217        // Token handling
218        if (path[ii] == ']') {
219            return JSONSL_ERROR_JPR_BADPATH;
220        } else if (path[ii] == '[' || path[ii] == '.') {
221            *n_consumed = ii;
222            if (path[ii] == '.') {
223                *n_consumed += 1;
224            }
225
226            if (in_n1ql_escape || in_json_escape) {
227                return JSONSL_ERROR_JPR_BADPATH;
228            }
229            return add_str_component(path, ii, n_backticks);
230        }
231    }
232
233    if (in_n1ql_escape || in_json_escape) {
234        return JSONSL_ERROR_JPR_BADPATH;
235    }
236
237    *n_consumed = len;
238    return add_str_component(path, len, n_backticks);
239}
240
241/* So this should somehow give us a 'JPR' object.. */
242int
243Path::parse(const char *path, size_t len)
244{
245    /* Path's buffers cannot change */
246    ncomponents = 0;
247    has_negix = false;
248    add(JSONSL_PATH_ROOT);
249
250    size_t ii = 0;
251
252    while (ii < len) {
253        size_t to_adv = 0;
254        int rv;
255
256        if (path[ii] == '[') {
257            rv = parse_bracket(path + ii, len-ii, &to_adv);
258            if (rv == 0) {
259                ii += to_adv;
260                if (ii == len) {
261                    // Last character. Will implicitly break
262
263                } else if (path[ii] == '[') {
264                    // Parse it on the next iteration
265
266                } else if (path[ii] == '.') {
267                    // Skip another character. Ignore the '.'
268                    ii++;
269                } else {
270                    return JSONSL_ERROR_JPR_BADPATH;
271                }
272            }
273        } else {
274            rv = parse_string(path + ii, len - ii, &to_adv);
275            ii += to_adv;
276        }
277
278        if (rv != 0) {
279            return rv;
280        }
281    }
282    return JSONSL_ERROR_SUCCESS;
283}
284
285Path::Path() : PathComponentInfo(components_s, 0) {
286    has_negix = false;
287    memset(components_s, 0, sizeof components_s);
288}
289
290Path::~Path() {
291    clear();
292    for (auto ii : m_cached) {
293        delete ii;
294    }
295}
296
297void
298Path::clear() {
299    unsigned ii;
300    for (ii = 1; ii < size(); ii++) {
301        Component& comp = get_component(ii);
302        comp.pstr = NULL;
303        comp.ptype = JSONSL_PATH_NONE;
304        comp.is_neg = 0;
305    }
306
307    m_cached.insert(m_cached.end(), m_used.begin(), m_used.end());
308    m_used.clear();
309}
310