1 /*
2  * Copyright 2010, Lloyd Hilaiel.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *  1. Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in
13  *     the documentation and/or other materials provided with the
14  *     distribution.
15  *
16  *  3. Neither the name of Lloyd Hilaiel nor the names of its
17  *     contributors may be used to endorse or promote products derived
18  *     from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
29  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include "yajl_encode.h"
34 
35 #include <assert.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
39 
CharToHex(unsigned char c, char * hexBuf)40 static void CharToHex(unsigned char c, char * hexBuf)
41 {
42     const char * hexchar = "0123456789ABCDEF";
43     hexBuf[0] = hexchar[c >> 4];
44     hexBuf[1] = hexchar[c & 0x0F];
45 }
46 
47 void
yajl_string_encode(yajl_buf buf, const unsigned char * str, unsigned int len)48 yajl_string_encode(yajl_buf buf, const unsigned char * str,
49                    unsigned int len)
50 {
51     yajl_string_encode2((const yajl_print_t) &yajl_buf_append, buf, str, len);
52 }
53 
54 void
yajl_string_encode2(const yajl_print_t print, void * ctx, const unsigned char * str, unsigned int len)55 yajl_string_encode2(const yajl_print_t print,
56                     void * ctx,
57                     const unsigned char * str,
58                     unsigned int len)
59 {
60     unsigned int beg = 0;
61     unsigned int end = 0;
62     char hexBuf[7];
63     hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
64     hexBuf[6] = 0;
65 
66     while (end < len) {
67         const char * escaped = NULL;
68         switch (str[end]) {
69             case '\r': escaped = "\\r"; break;
70             case '\n': escaped = "\\n"; break;
71             case '\\': escaped = "\\\\"; break;
72             /* case '/': escaped = "\\/"; break; */
73             case '"': escaped = "\\\""; break;
74             case '\f': escaped = "\\f"; break;
75             case '\b': escaped = "\\b"; break;
76             case '\t': escaped = "\\t"; break;
77             default:
78                 if ((unsigned char) str[end] < 32) {
79                     CharToHex(str[end], hexBuf + 4);
80                     escaped = hexBuf;
81                 }
82                 break;
83         }
84         if (escaped != NULL) {
85             print(ctx, (const char *) (str + beg), end - beg);
86             print(ctx, escaped, strlen(escaped));
87             beg = ++end;
88         } else {
89             ++end;
90         }
91     }
92     print(ctx, (const char *) (str + beg), end - beg);
93 }
94 
hexToDigit(unsigned int * val, const unsigned char * hex)95 static void hexToDigit(unsigned int * val, const unsigned char * hex)
96 {
97     unsigned int i;
98     for (i=0;i<4;i++) {
99         unsigned char c = hex[i];
100         if (c >= 'A') c = (c & ~0x20) - 7;
101         c -= '0';
102         assert(!(c & 0xF0));
103         *val = (*val << 4) | c;
104     }
105 }
106 
Utf32toUtf8(unsigned int codepoint, char * utf8Buf)107 static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf)
108 {
109     if (codepoint < 0x80) {
110         utf8Buf[0] = (char) codepoint;
111         utf8Buf[1] = 0;
112     } else if (codepoint < 0x0800) {
113         utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0);
114         utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80);
115         utf8Buf[2] = 0;
116     } else if (codepoint < 0x10000) {
117         utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0);
118         utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80);
119         utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80);
120         utf8Buf[3] = 0;
121     } else if (codepoint < 0x200000) {
122         utf8Buf[0] =(char)((codepoint >> 18) | 0xF0);
123         utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80);
124         utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80);
125         utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80);
126         utf8Buf[4] = 0;
127     } else {
128         utf8Buf[0] = '?';
129         utf8Buf[1] = 0;
130     }
131 }
132 
yajl_string_decode(yajl_buf buf, const unsigned char * str, unsigned int len)133 void yajl_string_decode(yajl_buf buf, const unsigned char * str,
134                         unsigned int len)
135 {
136     unsigned int beg = 0;
137     unsigned int end = 0;
138 
139     while (end < len) {
140         if (str[end] == '\\') {
141             char utf8Buf[5];
142             const char * unescaped = "?";
143             yajl_buf_append(buf, str + beg, end - beg);
144             switch (str[++end]) {
145                 case 'r': unescaped = "\r"; break;
146                 case 'n': unescaped = "\n"; break;
147                 case '\\': unescaped = "\\"; break;
148                 case '/': unescaped = "/"; break;
149                 case '"': unescaped = "\""; break;
150                 case 'f': unescaped = "\f"; break;
151                 case 'b': unescaped = "\b"; break;
152                 case 't': unescaped = "\t"; break;
153                 case 'u': {
154                     unsigned int codepoint = 0;
155                     hexToDigit(&codepoint, str + ++end);
156                     end+=3;
157                     /* check if this is a surrogate */
158                     if ((codepoint & 0xFC00) == 0xD800) {
159                         end++;
160                         if (str[end] == '\\' && str[end + 1] == 'u') {
161                             unsigned int surrogate = 0;
162                             hexToDigit(&surrogate, str + end + 2);
163                             codepoint =
164                                 (((codepoint & 0x3F) << 10) |
165                                  ((((codepoint >> 6) & 0xF) + 1) << 16) |
166                                  (surrogate & 0x3FF));
167                             end += 5;
168                         } else {
169                             unescaped = "?";
170                             break;
171                         }
172                     }
173 
174                     Utf32toUtf8(codepoint, utf8Buf);
175                     unescaped = utf8Buf;
176                     break;
177                 }
178                 default:
179                     assert("this should never happen" == NULL);
180             }
181             yajl_buf_append(buf, unescaped, strlen(unescaped));
182             beg = ++end;
183         } else {
184             end++;
185         }
186     }
187     yajl_buf_append(buf, str + beg, end - beg);
188 }
189