1%% @author Bob Ippolito <bob@mochimedia.com>
2%% @copyright 2007 Mochi Media, Inc.
3
4%% @doc Utilities for parsing and quoting.
5
6-module(mochiweb_util).
7-author('bob@mochimedia.com').
8-export([join/2, quote_plus/1, urlencode/1, parse_qs/1, unquote/1]).
9-export([path_split/1]).
10-export([urlsplit/1, urlsplit_path/1, urlunsplit/1, urlunsplit_path/1]).
11-export([guess_mime/1, parse_header/1]).
12-export([shell_quote/1, cmd/1, cmd_string/1, cmd_port/2]).
13-export([record_to_proplist/2, record_to_proplist/3]).
14-export([test/0]).
15
16-define(PERCENT, 37).  % $\%
17-define(FULLSTOP, 46). % $\.
18-define(IS_HEX(C), ((C >= $0 andalso C =< $9) orelse
19                    (C >= $a andalso C =< $f) orelse
20                    (C >= $A andalso C =< $F))).
21-define(QS_SAFE(C), ((C >= $a andalso C =< $z) orelse
22                     (C >= $A andalso C =< $Z) orelse
23                     (C >= $0 andalso C =< $9) orelse
24                     (C =:= ?FULLSTOP orelse C =:= $- orelse C =:= $~ orelse
25                      C =:= $_))).
26
27hexdigit(C) when C < 10 -> $0 + C;
28hexdigit(C) when C < 16 -> $A + (C - 10).
29
30unhexdigit(C) when C >= $0, C =< $9 -> C - $0;
31unhexdigit(C) when C >= $a, C =< $f -> C - $a + 10;
32unhexdigit(C) when C >= $A, C =< $F -> C - $A + 10.
33
34%% @spec shell_quote(string()) -> string()
35%% @doc Quote a string according to UNIX shell quoting rules, returns a string
36%%      surrounded by double quotes.
37shell_quote(L) ->
38    shell_quote(L, [$\"]).
39
40%% @spec cmd_port([string()], Options) -> port()
41%% @doc open_port({spawn, mochiweb_util:cmd_string(Argv)}, Options).
42cmd_port(Argv, Options) ->
43    open_port({spawn, cmd_string(Argv)}, Options).
44
45%% @spec cmd([string()]) -> string()
46%% @doc os:cmd(cmd_string(Argv)).
47cmd(Argv) ->
48    os:cmd(cmd_string(Argv)).
49
50%% @spec cmd_string([string()]) -> string()
51%% @doc Create a shell quoted command string from a list of arguments.
52cmd_string(Argv) ->
53    join([shell_quote(X) || X <- Argv], " ").
54
55%% @spec join([string()], Separator) -> string()
56%% @doc Join a list of strings together with the given separator
57%%      string or char.
58join([], _Separator) ->
59    [];
60join([S], _Separator) ->
61    lists:flatten(S);
62join(Strings, Separator) ->
63    lists:flatten(revjoin(lists:reverse(Strings), Separator, [])).
64
65revjoin([], _Separator, Acc) ->
66    Acc;
67revjoin([S | Rest], Separator, []) ->
68    revjoin(Rest, Separator, [S]);
69revjoin([S | Rest], Separator, Acc) ->
70    revjoin(Rest, Separator, [S, Separator | Acc]).
71
72%% @spec quote_plus(atom() | integer() | string()) -> string()
73%% @doc URL safe encoding of the given term.
74quote_plus(Atom) when is_atom(Atom) ->
75    quote_plus(atom_to_list(Atom));
76quote_plus(Int) when is_integer(Int) ->
77    quote_plus(integer_to_list(Int));
78quote_plus(String) ->
79    quote_plus(String, []).
80
81quote_plus([], Acc) ->
82    lists:reverse(Acc);
83quote_plus([C | Rest], Acc) when ?QS_SAFE(C) ->
84    quote_plus(Rest, [C | Acc]);
85quote_plus([$\s | Rest], Acc) ->
86    quote_plus(Rest, [$+ | Acc]);
87quote_plus([C | Rest], Acc) ->
88    <<Hi:4, Lo:4>> = <<C>>,
89    quote_plus(Rest, [hexdigit(Lo), hexdigit(Hi), ?PERCENT | Acc]).
90
91%% @spec urlencode([{Key, Value}]) -> string()
92%% @doc URL encode the property list.
93urlencode(Props) ->
94    RevPairs = lists:foldl(fun ({K, V}, Acc) ->
95                                   [[quote_plus(K), $=, quote_plus(V)] | Acc]
96                           end, [], Props),
97    lists:flatten(revjoin(RevPairs, $&, [])).
98
99%% @spec parse_qs(string() | binary()) -> [{Key, Value}]
100%% @doc Parse a query string or application/x-www-form-urlencoded.
101parse_qs(Binary) when is_binary(Binary) ->
102    parse_qs(binary_to_list(Binary));
103parse_qs(String) ->
104    parse_qs(String, []).
105
106parse_qs([], Acc) ->
107    lists:reverse(Acc);
108parse_qs(String, Acc) ->
109    {Key, Rest} = parse_qs_key(String),
110    {Value, Rest1} = parse_qs_value(Rest),
111    parse_qs(Rest1, [{Key, Value} | Acc]).
112
113parse_qs_key(String) ->
114    parse_qs_key(String, []).
115
116parse_qs_key([], Acc) ->
117    {qs_revdecode(Acc), ""};
118parse_qs_key([$= | Rest], Acc) ->
119    {qs_revdecode(Acc), Rest};
120parse_qs_key(Rest=[$; | _], Acc) ->
121    {qs_revdecode(Acc), Rest};
122parse_qs_key(Rest=[$& | _], Acc) ->
123    {qs_revdecode(Acc), Rest};
124parse_qs_key([C | Rest], Acc) ->
125    parse_qs_key(Rest, [C | Acc]).
126
127parse_qs_value(String) ->
128    parse_qs_value(String, []).
129
130parse_qs_value([], Acc) ->
131    {qs_revdecode(Acc), ""};
132parse_qs_value([$; | Rest], Acc) ->
133    {qs_revdecode(Acc), Rest};
134parse_qs_value([$& | Rest], Acc) ->
135    {qs_revdecode(Acc), Rest};
136parse_qs_value([C | Rest], Acc) ->
137    parse_qs_value(Rest, [C | Acc]).
138
139%% @spec unquote(string() | binary()) -> string()
140%% @doc Unquote a URL encoded string.
141unquote(Binary) when is_binary(Binary) ->
142    unquote(binary_to_list(Binary));
143unquote(String) ->
144    qs_revdecode(lists:reverse(String)).
145
146qs_revdecode(S) ->
147    qs_revdecode(S, []).
148
149qs_revdecode([], Acc) ->
150    Acc;
151qs_revdecode([$+ | Rest], Acc) ->
152    qs_revdecode(Rest, [$\s | Acc]);
153qs_revdecode([Lo, Hi, ?PERCENT | Rest], Acc) when ?IS_HEX(Lo), ?IS_HEX(Hi) ->
154    qs_revdecode(Rest, [(unhexdigit(Lo) bor (unhexdigit(Hi) bsl 4)) | Acc]);
155qs_revdecode([C | Rest], Acc) ->
156    qs_revdecode(Rest, [C | Acc]).
157
158%% @spec urlsplit(Url) -> {Scheme, Netloc, Path, Query, Fragment}
159%% @doc Return a 5-tuple, does not expand % escapes. Only supports HTTP style
160%%      URLs.
161urlsplit(Url) ->
162    {Scheme, Url1} = urlsplit_scheme(Url),
163    {Netloc, Url2} = urlsplit_netloc(Url1),
164    {Path, Query, Fragment} = urlsplit_path(Url2),
165    {Scheme, Netloc, Path, Query, Fragment}.
166
167urlsplit_scheme(Url) ->
168    urlsplit_scheme(Url, []).
169
170urlsplit_scheme([], Acc) ->
171    {"", lists:reverse(Acc)};
172urlsplit_scheme(":" ++ Rest, Acc) ->
173    {string:to_lower(lists:reverse(Acc)), Rest};
174urlsplit_scheme([C | Rest], Acc) ->
175    urlsplit_scheme(Rest, [C | Acc]).
176
177urlsplit_netloc("//" ++ Rest) ->
178    urlsplit_netloc(Rest, []);
179urlsplit_netloc(Path) ->
180    {"", Path}.
181
182urlsplit_netloc(Rest=[C | _], Acc) when C =:= $/; C =:= $?; C =:= $# ->
183    {lists:reverse(Acc), Rest};
184urlsplit_netloc([C | Rest], Acc) ->
185    urlsplit_netloc(Rest, [C | Acc]).
186
187
188%% @spec path_split(string()) -> {Part, Rest}
189%% @doc Split a path starting from the left, as in URL traversal.
190%%      path_split("foo/bar") = {"foo", "bar"},
191%%      path_split("/foo/bar") = {"", "foo/bar"}.
192path_split(S) ->
193    path_split(S, []).
194
195path_split("", Acc) ->
196    {lists:reverse(Acc), ""};
197path_split("/" ++ Rest, Acc) ->
198    {lists:reverse(Acc), Rest};
199path_split([C | Rest], Acc) ->
200    path_split(Rest, [C | Acc]).
201
202
203%% @spec urlunsplit({Scheme, Netloc, Path, Query, Fragment}) -> string()
204%% @doc Assemble a URL from the 5-tuple. Path must be absolute.
205urlunsplit({Scheme, Netloc, Path, Query, Fragment}) ->
206    lists:flatten([case Scheme of "" -> "";  _ -> [Scheme, "://"] end,
207                   Netloc,
208                   urlunsplit_path({Path, Query, Fragment})]).
209
210%% @spec urlunsplit_path({Path, Query, Fragment}) -> string()
211%% @doc Assemble a URL path from the 3-tuple.
212urlunsplit_path({Path, Query, Fragment}) ->
213    lists:flatten([Path,
214                   case Query of "" -> ""; _ -> [$? | Query] end,
215                   case Fragment of "" -> ""; _ -> [$# | Fragment] end]).
216
217%% @spec urlsplit_path(Url) -> {Path, Query, Fragment}
218%% @doc Return a 3-tuple, does not expand % escapes. Only supports HTTP style
219%%      paths.
220urlsplit_path(Path) ->
221    urlsplit_path(Path, []).
222
223urlsplit_path("", Acc) ->
224    {lists:reverse(Acc), "", ""};
225urlsplit_path("?" ++ Rest, Acc) ->
226    {Query, Fragment} = urlsplit_query(Rest),
227    {lists:reverse(Acc), Query, Fragment};
228urlsplit_path("#" ++ Rest, Acc) ->
229    {lists:reverse(Acc), "", Rest};
230urlsplit_path([C | Rest], Acc) ->
231    urlsplit_path(Rest, [C | Acc]).
232
233urlsplit_query(Query) ->
234    urlsplit_query(Query, []).
235
236urlsplit_query("", Acc) ->
237    {lists:reverse(Acc), ""};
238urlsplit_query("#" ++ Rest, Acc) ->
239    {lists:reverse(Acc), Rest};
240urlsplit_query([C | Rest], Acc) ->
241    urlsplit_query(Rest, [C | Acc]).
242
243%% @spec guess_mime(string()) -> string()
244%% @doc  Guess the mime type of a file by the extension of its filename.
245guess_mime(File) ->
246    case filename:extension(File) of
247        ".html" ->
248            "text/html";
249        ".xhtml" ->
250            "application/xhtml+xml";
251        ".xml" ->
252            "application/xml";
253        ".css" ->
254            "text/css";
255        ".js" ->
256            "application/x-javascript";
257        ".jpg" ->
258            "image/jpeg";
259        ".gif" ->
260            "image/gif";
261        ".png" ->
262            "image/png";
263        ".swf" ->
264            "application/x-shockwave-flash";
265        ".zip" ->
266            "application/zip";
267        ".bz2" ->
268            "application/x-bzip2";
269        ".gz" ->
270            "application/x-gzip";
271        ".tar" ->
272            "application/x-tar";
273        ".tgz" ->
274            "application/x-gzip";
275        ".txt" ->
276            "text/plain";
277        ".doc" ->
278            "application/msword";
279        ".pdf" ->
280            "application/pdf";
281        ".xls" ->
282            "application/vnd.ms-excel";
283        ".rtf" ->
284            "application/rtf";
285        ".mov" ->
286            "video/quicktime";
287        ".mp3" ->
288            "audio/mpeg";
289        ".z" ->
290            "application/x-compress";
291        ".wav" ->
292            "audio/x-wav";
293        ".ico" ->
294            "image/x-icon";
295        ".bmp" ->
296            "image/bmp";
297        ".m4a" ->
298            "audio/mpeg";
299        ".m3u" ->
300            "audio/x-mpegurl";
301        ".exe" ->
302            "application/octet-stream";
303        ".csv" ->
304            "text/csv";
305        _ ->
306            "text/plain"
307    end.
308
309%% @spec parse_header(string()) -> {Type, [{K, V}]}
310%% @doc  Parse a Content-Type like header, return the main Content-Type
311%%       and a property list of options.
312parse_header(String) ->
313    %% TODO: This is exactly as broken as Python's cgi module.
314    %%       Should parse properly like mochiweb_cookies.
315    [Type | Parts] = [string:strip(S) || S <- string:tokens(String, ";")],
316    F = fun (S, Acc) ->
317                case lists:splitwith(fun (C) -> C =/= $= end, S) of
318                    {"", _} ->
319                        %% Skip anything with no name
320                        Acc;
321                    {_, ""} ->
322                        %% Skip anything with no value
323                        Acc;
324                    {Name, [$\= | Value]} ->
325                        [{string:to_lower(string:strip(Name)),
326                          unquote_header(string:strip(Value))} | Acc]
327                end
328        end,
329    {string:to_lower(Type),
330     lists:foldr(F, [], Parts)}.
331
332unquote_header("\"" ++ Rest) ->
333    unquote_header(Rest, []);
334unquote_header(S) ->
335    S.
336
337unquote_header("", Acc) ->
338    lists:reverse(Acc);
339unquote_header("\"", Acc) ->
340    lists:reverse(Acc);
341unquote_header([$\\, C | Rest], Acc) ->
342    unquote_header(Rest, [C | Acc]);
343unquote_header([C | Rest], Acc) ->
344    unquote_header(Rest, [C | Acc]).
345
346%% @spec record_to_proplist(Record, Fields) -> proplist()
347%% @doc calls record_to_proplist/3 with a default TypeKey of '__record'
348record_to_proplist(Record, Fields) ->
349    record_to_proplist(Record, Fields, '__record').
350
351%% @spec record_to_proplist(Record, Fields, TypeKey) -> proplist()
352%% @doc Return a proplist of the given Record with each field in the
353%%      Fields list set as a key with the corresponding value in the Record.
354%%      TypeKey is the key that is used to store the record type
355%%      Fields should be obtained by calling record_info(fields, record_type)
356%%      where record_type is the record type of Record
357record_to_proplist(Record, Fields, TypeKey)
358  when is_tuple(Record),
359       is_list(Fields),
360       size(Record) - 1 =:= length(Fields) ->
361    lists:zip([TypeKey | Fields], tuple_to_list(Record)).
362
363
364shell_quote([], Acc) ->
365    lists:reverse([$\" | Acc]);
366shell_quote([C | Rest], Acc) when C =:= $\" orelse C =:= $\` orelse
367                                  C =:= $\\ orelse C =:= $\$ ->
368    shell_quote(Rest, [C, $\\ | Acc]);
369shell_quote([C | Rest], Acc) ->
370    shell_quote(Rest, [C | Acc]).
371
372test() ->
373    test_join(),
374    test_quote_plus(),
375    test_unquote(),
376    test_urlencode(),
377    test_parse_qs(),
378    test_urlsplit_path(),
379    test_urlunsplit_path(),
380    test_urlsplit(),
381    test_urlunsplit(),
382    test_path_split(),
383    test_guess_mime(),
384    test_parse_header(),
385    test_shell_quote(),
386    test_cmd(),
387    test_cmd_string(),
388    ok.
389
390test_shell_quote() ->
391    "\"foo \\$bar\\\"\\`' baz\"" = shell_quote("foo $bar\"`' baz"),
392    ok.
393
394test_cmd() ->
395    "$bling$ `word`!\n" = cmd(["echo", "$bling$ `word`!"]),
396    ok.
397
398test_cmd_string() ->
399    "\"echo\" \"\\$bling\\$ \\`word\\`!\"" = cmd_string(["echo", "$bling$ `word`!"]),
400    ok.
401
402test_parse_header() ->
403    {"multipart/form-data", [{"boundary", "AaB03x"}]} =
404        parse_header("multipart/form-data; boundary=AaB03x"),
405    ok.
406
407test_guess_mime() ->
408    "text/plain" = guess_mime(""),
409    "text/plain" = guess_mime(".text"),
410    "application/zip" = guess_mime(".zip"),
411    "application/zip" = guess_mime("x.zip"),
412    "text/html" = guess_mime("x.html"),
413    "application/xhtml+xml" = guess_mime("x.xhtml"),
414    ok.
415
416test_path_split() ->
417    {"", "foo/bar"} = path_split("/foo/bar"),
418    {"foo", "bar"} = path_split("foo/bar"),
419    {"bar", ""} = path_split("bar"),
420    ok.
421
422test_urlsplit() ->
423    {"", "", "/foo", "", "bar?baz"} = urlsplit("/foo#bar?baz"),
424    {"http", "host:port", "/foo", "", "bar?baz"} =
425        urlsplit("http://host:port/foo#bar?baz"),
426    ok.
427
428test_urlsplit_path() ->
429    {"/foo/bar", "", ""} = urlsplit_path("/foo/bar"),
430    {"/foo", "baz", ""} = urlsplit_path("/foo?baz"),
431    {"/foo", "", "bar?baz"} = urlsplit_path("/foo#bar?baz"),
432    {"/foo", "", "bar?baz#wibble"} = urlsplit_path("/foo#bar?baz#wibble"),
433    {"/foo", "bar", "baz"} = urlsplit_path("/foo?bar#baz"),
434    {"/foo", "bar?baz", "baz"} = urlsplit_path("/foo?bar?baz#baz"),
435    ok.
436
437test_urlunsplit() ->
438    "/foo#bar?baz" = urlunsplit({"", "", "/foo", "", "bar?baz"}),
439    "http://host:port/foo#bar?baz" =
440        urlunsplit({"http", "host:port", "/foo", "", "bar?baz"}),
441    ok.
442
443test_urlunsplit_path() ->
444    "/foo/bar" = urlunsplit_path({"/foo/bar", "", ""}),
445    "/foo?baz" = urlunsplit_path({"/foo", "baz", ""}),
446    "/foo#bar?baz" = urlunsplit_path({"/foo", "", "bar?baz"}),
447    "/foo#bar?baz#wibble" = urlunsplit_path({"/foo", "", "bar?baz#wibble"}),
448    "/foo?bar#baz" = urlunsplit_path({"/foo", "bar", "baz"}),
449    "/foo?bar?baz#baz" = urlunsplit_path({"/foo", "bar?baz", "baz"}),
450    ok.
451
452test_join() ->
453    "foo,bar,baz" = join(["foo", "bar", "baz"], $,),
454    "foo,bar,baz" = join(["foo", "bar", "baz"], ","),
455    "foo bar" = join([["foo", " bar"]], ","),
456    "foo bar,baz" = join([["foo", " bar"], "baz"], ","),
457    "foo" = join(["foo"], ","),
458    "foobarbaz" = join(["foo", "bar", "baz"], ""),
459    ok.
460
461test_quote_plus() ->
462    "foo" = quote_plus(foo),
463    "1" = quote_plus(1),
464    "foo" = quote_plus("foo"),
465    "foo+bar" = quote_plus("foo bar"),
466    "foo%0A" = quote_plus("foo\n"),
467    "foo%0A" = quote_plus("foo\n"),
468    "foo%3B%26%3D" = quote_plus("foo;&="),
469    ok.
470
471test_unquote() ->
472    "foo bar" = unquote("foo+bar"),
473    "foo bar" = unquote("foo%20bar"),
474    "foo\r\n" = unquote("foo%0D%0A"),
475    ok.
476
477test_urlencode() ->
478    "foo=bar&baz=wibble+%0D%0A&z=1" = urlencode([{foo, "bar"},
479                                                 {"baz", "wibble \r\n"},
480                                                 {z, 1}]),
481    ok.
482
483test_parse_qs() ->
484    [{"foo", "bar"}, {"baz", "wibble \r\n"}, {"z", "1"}] =
485        parse_qs("foo=bar&baz=wibble+%0D%0A&z=1"),
486    ok.
487