1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3 *     Copyright 2015 Couchbase, Inc
4 *
5 *   Licensed under the Apache License, Version 2.0 (the "License");
6 *   you may not use this file except in compliance with the License.
7 *   You may obtain a copy of the License at
8 *
9 *       http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *   Unless required by applicable law or agreed to in writing, software
12 *   distributed under the License is distributed on an "AS IS" BASIS,
13 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *   See the License for the specific language governing permissions and
15 *   limitations under the License.
16 */
17 
18 #define INCLUDE_JSONSL_SRC
19 #include "subdoc-api.h"
20 #include "path.h"
21 
22 using namespace Subdoc;
23 
24 const char *
convert_escaped(const char *src, size_t& len)25 Path::convert_escaped(const char *src, size_t& len)
26 {
27     if (m_cached.empty()) {
28         m_used.push_back(new std::string());
29     } else {
30         m_used.push_back(m_cached.back());
31         m_cached.pop_back();
32     }
33     std::string& s = *m_used.back();
34 
35     for (size_t ii = 0; ii < len; ii++) {
36         if (src[ii] != '`') {
37             s += src[ii];
38         } else if(src[ii] == '`' && ii+1 < len && src[ii+1] == '`') {
39             s += src[ii++];
40         }
41     }
42     len = s.size();
43     return s.c_str();
44 }
45 
46 /* Adds a numeric component */
47 int
add_num_component(const char *component, size_t len)48 Path::add_num_component(const char *component, size_t len)
49 {
50     unsigned ii;
51     size_t numval = 0;
52 
53     if (component[0] == '-') {
54         if (len != 2 || component[1] != '1') {
55             return JSONSL_ERROR_INVALID_NUMBER;
56         } else {
57             return add_array_index(-1);
58         }
59     }
60 
61     for (ii = 0; ii < len; ii++) {
62         const char *c = &component[ii];
63         if (*c < 0x30 || *c > 0x39) {
64             return JSONSL_ERROR_INVALID_NUMBER;
65         } else {
66             size_t tmpval = numval;
67             tmpval *= 10;
68             tmpval += *c - 0x30;
69 
70             /* check for overflow */
71             if (tmpval < numval) {
72                 return JSONSL_ERROR_INVALID_NUMBER;
73             } else {
74                 numval = tmpval;
75             }
76         }
77     }
78     return add_array_index(numval);
79 }
80 
81 int
add_str_component(const char *component, size_t len, int n_backtick)82 Path::add_str_component(const char *component, size_t len, int n_backtick)
83 {
84     /* Allocate first component: */
85     if (len > 1 && component[0] == '`' && component[len-1] == '`') {
86         component++;
87         n_backtick -= 2;
88         len -= 2;
89     }
90 
91     if (size() == Limits::MAX_COMPONENTS) {
92         return JSONSL_ERROR_LEVELS_EXCEEDED;
93     }
94     if (len == 0) {
95         return JSONSL_ERROR_JPR_BADPATH;
96     }
97 
98     if (n_backtick) {
99         /* OHNOEZ! Slow path */
100         component = convert_escaped(component, len);
101     }
102 
103     Component& jpr_comp = add(JSONSL_PATH_STRING);
104     jpr_comp.pstr = const_cast<char*>(component);
105     jpr_comp.len = len;
106     jpr_comp.is_neg = 0;
107     return 0;
108 }
109 
110 jsonsl_error_t
add_array_index(long ixnum)111 Path::add_array_index(long ixnum)
112 {
113     if (size() == Limits::MAX_COMPONENTS) {
114         return JSONSL_ERROR_LEVELS_EXCEEDED;
115     }
116 
117     Component& comp = add(JSONSL_PATH_NUMERIC);
118     comp.len = 0;
119     comp.idx = ixnum;
120     comp.pstr = NULL;
121     if (ixnum == -1) {
122         has_negix = true;
123         comp.is_neg = 1;
124     } else {
125         comp.is_neg = 0;
126     }
127     return JSONSL_ERROR_SUCCESS;
128 }
129 
130 /* Copied over from jsonsl */
131 static const int allowed_json_escapes[0x100] = {
132         /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
133         /* 0x20 */ 0,0, /* 0x21 */
134         /* 0x22 */ 1 /* <"> */, /* 0x22 */
135         /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */
136         /* 0x2f */ 1 /* </> */, /* 0x2f */
137         /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */
138         /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
139         /* 0x5c */ 1 /* <\> */, /* 0x5c */
140         /* 0x5d */ 0,0,0,0,0, /* 0x61 */
141         /* 0x62 */ 1 /* <b> */, /* 0x62 */
142         /* 0x63 */ 0,0,0, /* 0x65 */
143         /* 0x66 */ 1 /* <f> */, /* 0x66 */
144         /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
145         /* 0x6e */ 1 /* <n> */, /* 0x6e */
146         /* 0x6f */ 0,0,0, /* 0x71 */
147         /* 0x72 */ 1 /* <r> */, /* 0x72 */
148         /* 0x73 */ 0, /* 0x73 */
149         /* 0x74 */ 1 /* <t> */, /* 0x74 */
150         /* 0x75 */ 1 /* <u> */, /* 0x75 */
151         /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */
152         /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */
153         /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */
154         /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */
155         /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */
156 };
157 
158 int
parse_bracket(const char *path, size_t len, size_t *n_consumed)159 Path::parse_bracket(const char *path, size_t len, size_t *n_consumed)
160 {
161     // Check if 0 before decreasing! */
162     if (len == 0) {
163         return JSONSL_ERROR_JPR_BADPATH;
164     }
165 
166     // Adjust positions so we don't parse the first '[':
167     len--, path++, *n_consumed = 1;
168 
169     for (size_t ii = 0; ii < len; ii++) {
170         if (path[ii] == ']') {
171             *n_consumed += (ii + 1);
172             return add_num_component(path, ii);
173         }
174     }
175 
176     // Didn't find the closing ']'
177     return JSONSL_ERROR_JPR_BADPATH;
178 }
179 
180 int
parse_string(const char *path, size_t len, size_t *n_consumed)181 Path::parse_string(const char *path, size_t len, size_t *n_consumed)
182 {
183     bool in_n1ql_escape = false;
184     bool in_json_escape = false;
185     int n_backticks = 0;
186 
187     if (len == 0) {
188         return JSONSL_ERROR_JPR_BADPATH;
189     }
190 
191     for (size_t ii = 0; ii < len; ii++) {
192         const uint8_t cur_c = static_cast<uint8_t>(path[ii]);
193         // Escape handling
194         int can_jescape = allowed_json_escapes[static_cast<int>(cur_c)];
195         if (in_json_escape) {
196             if (!can_jescape) {
197                 return JSONSL_ERROR_JPR_BADPATH;
198             } else if (cur_c == 'u') {
199                 /* We can't handle \u-escapes in paths now! */
200                 return JSONSL_ERROR_JPR_BADPATH;
201             }
202             in_json_escape = false;
203         } else if (cur_c == '\\') {
204             in_json_escape = true;
205         } else if (cur_c == '"' || cur_c < 0x1F) {
206             // Needs escape!
207             return JSONSL_ERROR_JPR_BADPATH;
208         }
209 
210         if (cur_c == '`') {
211             n_backticks++;
212             in_n1ql_escape = !in_n1ql_escape;
213         }
214         if (in_n1ql_escape) {
215             continue;
216         }
217 
218         // Token handling
219         if (cur_c == ']') {
220             return JSONSL_ERROR_JPR_BADPATH;
221         } else if (cur_c == '[' || cur_c == '.') {
222             *n_consumed = ii;
223             if (cur_c == '.') {
224                 *n_consumed += 1;
225             }
226 
227             if (in_n1ql_escape || in_json_escape) {
228                 return JSONSL_ERROR_JPR_BADPATH;
229             }
230             return add_str_component(path, ii, n_backticks);
231         }
232     }
233 
234     if (in_n1ql_escape || in_json_escape) {
235         return JSONSL_ERROR_JPR_BADPATH;
236     }
237 
238     *n_consumed = len;
239     return add_str_component(path, len, n_backticks);
240 }
241 
242 /* So this should somehow give us a 'JPR' object.. */
243 int
parse(const char *path, size_t len)244 Path::parse(const char *path, size_t len)
245 {
246     /* Path's buffers cannot change */
247     ncomponents = 0;
248     has_negix = false;
249     add(JSONSL_PATH_ROOT);
250 
251     size_t ii = 0;
252 
253     while (ii < len) {
254         size_t to_adv = 0;
255         int rv;
256 
257         if (path[ii] == '[') {
258             rv = parse_bracket(path + ii, len-ii, &to_adv);
259             if (rv == 0) {
260                 ii += to_adv;
261                 if (ii == len) {
262                     // Last character. Will implicitly break
263 
264                 } else if (path[ii] == '[') {
265                     // Parse it on the next iteration
266 
267                 } else if (path[ii] == '.') {
268                     // Skip another character. Ignore the '.'
269                     ii++;
270                 } else {
271                     return JSONSL_ERROR_JPR_BADPATH;
272                 }
273             }
274         } else {
275             rv = parse_string(path + ii, len - ii, &to_adv);
276             ii += to_adv;
277         }
278 
279         if (rv != 0) {
280             return rv;
281         }
282     }
283     return JSONSL_ERROR_SUCCESS;
284 }
285 
Path()286 Path::Path() : PathComponentInfo(components_s, 0) {
287     has_negix = false;
288     memset(components_s, 0, sizeof components_s);
289 }
290 
~Path()291 Path::~Path() {
292     clear();
293     for (auto ii : m_cached) {
294         delete ii;
295     }
296 }
297 
298 void
clear()299 Path::clear() {
300     unsigned ii;
301     for (ii = 1; ii < size(); ii++) {
302         Component& comp = get_component(ii);
303         comp.pstr = NULL;
304         comp.ptype = JSONSL_PATH_NONE;
305         comp.is_neg = 0;
306     }
307 
308     // Reset all used components back to default state (ready for re-use); and
309     // transfer to head of cached list.
310     for (auto& component : m_used) {
311         component->clear();
312     }
313     m_cached.splice(m_cached.begin(), m_used);
314 }
315