xref: /6.6.0/couchdb/src/couchdb/couch_util.erl (revision 1ffe7550)
1% Licensed under the Apache License, Version 2.0 (the "License"); you may not
2% use this file except in compliance with the License. You may obtain a copy of
3% the License at
4%
5%   http://www.apache.org/licenses/LICENSE-2.0
6%
7% Unless required by applicable law or agreed to in writing, software
8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10% License for the specific language governing permissions and limitations under
11% the License.
12
13-module(couch_util).
14
15-export([priv_dir/0, normpath/1]).
16-export([should_flush/0, should_flush/1, to_existing_atom/1]).
17-export([rand32/0, implode/2]).
18-export([abs_pathname/1,abs_pathname/2, trim/1]).
19-export([encodeBase64Url/1, decodeBase64Url/1]).
20-export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
21-export([get_nested_json_value/2, json_user_ctx/1]).
22-export([proplist_apply_field/2, json_apply_field/2]).
23-export([to_binary/1, to_integer/1, to_list/1, url_encode/1]).
24-export([verify/2,simple_call/2,shutdown_sync/1]).
25-export([get_value/2, get_value/3]).
26-export([md5/1, md5_init/0, md5_update/2, md5_final/1]).
27-export([reorder_results/2]).
28-export([url_strip_password/1]).
29-export([encode_doc_id/1]).
30-export([brace/1, debrace/1]).
31-export([split_iolist/2]).
32-export([log_data/2]).
33-export([strong_rand_bytes/1]).
34-export([parse_view_name/1, log_parse_post/1]).
35-export([log_do_parse/1]).
36-export([get_view_list/1, find_match/3]).
37
38-include("couch_db.hrl").
39
40% arbitrarily chosen amount of memory to use before flushing to disk
41-define(FLUSH_MAX_MEM, 10000000).
42
43priv_dir() ->
44    case code:priv_dir(couch) of
45        {error, bad_name} ->
46            % small hack, in dev mode "app" is couchdb. Fixing requires
47            % renaming src/couch to src/couch. Not really worth the hassle.
48            % -Damien
49            code:priv_dir(couchdb);
50        Dir -> Dir
51    end.
52
53% Normalize a pathname by removing .. and . components.
54normpath(Path) ->
55    normparts(filename:split(Path), []).
56
57normparts([], Acc) ->
58    filename:join(lists:reverse(Acc));
59normparts([".." | RestParts], [_Drop | RestAcc]) ->
60    normparts(RestParts, RestAcc);
61normparts(["." | RestParts], Acc) ->
62    normparts(RestParts, Acc);
63normparts([Part | RestParts], Acc) ->
64    normparts(RestParts, [Part | Acc]).
65
66% works like list_to_existing_atom, except can be list or binary and it
67% gives you the original value instead of an error if no existing atom.
68to_existing_atom(V) when is_list(V) ->
69    try list_to_existing_atom(V) catch _:_ -> V end;
70to_existing_atom(V) when is_binary(V) ->
71    try list_to_existing_atom(?b2l(V)) catch _:_ -> V end;
72to_existing_atom(V) when is_atom(V) ->
73    V.
74
75shutdown_sync(Pid) when not is_pid(Pid)->
76    ok;
77shutdown_sync(Pid) ->
78    MRef = erlang:monitor(process, Pid),
79    try
80        catch unlink(Pid),
81        catch exit(Pid, shutdown),
82        receive
83        {'DOWN', MRef, _, _, _} ->
84            receive
85            {'EXIT', Pid, _} ->
86                ok
87            after 0 ->
88                ok
89            end
90        end
91    after
92        erlang:demonitor(MRef, [flush])
93    end.
94
95
96simple_call(Pid, Message) ->
97    MRef = erlang:monitor(process, Pid),
98    try
99        Pid ! {self(), Message},
100        receive
101        {Pid, Result} ->
102            Result;
103        {'DOWN', MRef, _, _, Reason} ->
104            exit(Reason)
105        end
106    after
107        erlang:demonitor(MRef, [flush])
108    end.
109
110validate_utf8(Val) when is_binary(Val) ->
111    case unicode:characters_to_binary(Val, utf8, utf8) of
112    Bin when Val =:= Bin ->
113        true;
114    _ ->
115        false
116    end;
117validate_utf8(Val) when is_list(Val) ->
118    validate_utf8(?l2b(Val)).
119
120to_hex([]) ->
121    [];
122to_hex(Bin) when is_binary(Bin) ->
123    to_hex(binary_to_list(Bin));
124to_hex([H|T]) ->
125    [to_digit(H div 16), to_digit(H rem 16) | to_hex(T)].
126
127to_digit(N) when N < 10 -> $0 + N;
128to_digit(N)             -> $a + N-10.
129
130
131parse_term(Bin) when is_binary(Bin) ->
132    parse_term(binary_to_list(Bin));
133parse_term(List) ->
134    {ok, Tokens, _} = erl_scan:string(List ++ "."),
135    erl_parse:parse_term(Tokens).
136
137get_value(Key, List) ->
138    get_value(Key, List, undefined).
139
140get_value(Key, List, Default) ->
141    case lists:keysearch(Key, 1, List) of
142    {value, {Key,Value}} ->
143        Value;
144    false ->
145        Default
146    end.
147
148get_nested_json_value({Props}, [Key|Keys]) ->
149    case get_value(Key, Props, nil) of
150    nil -> throw({not_found, <<"missing json key: ", Key/binary>>});
151    Value -> get_nested_json_value(Value, Keys)
152    end;
153get_nested_json_value(Value, []) ->
154    Value;
155get_nested_json_value(_NotJSONObj, _) ->
156    throw({not_found, json_mismatch}).
157
158proplist_apply_field(H, L) ->
159    {R} = json_apply_field(H, {L}),
160    R.
161
162json_apply_field(H, {L}) ->
163    json_apply_field(H, L, []).
164json_apply_field({Key, NewValue}, [{Key, _OldVal} | Headers], Acc) ->
165    json_apply_field({Key, NewValue}, Headers, Acc);
166json_apply_field({Key, NewValue}, [{OtherKey, OtherVal} | Headers], Acc) ->
167    json_apply_field({Key, NewValue}, Headers, [{OtherKey, OtherVal} | Acc]);
168json_apply_field({Key, NewValue}, [], Acc) ->
169    {[{Key, NewValue}|Acc]}.
170
171json_user_ctx(#db{name=DbName, user_ctx=Ctx}) ->
172    {[{<<"db">>, DbName},
173            {<<"name">>,Ctx#user_ctx.name},
174            {<<"roles">>,Ctx#user_ctx.roles}]}.
175
176
177% returns a random integer
178rand32() ->
179    crypto:rand_uniform(0, 16#100000000).
180
181% given a pathname "../foo/bar/" it gives back the fully qualified
182% absolute pathname.
183abs_pathname(" " ++ Filename) ->
184    % strip leading whitspace
185    abs_pathname(Filename);
186abs_pathname([$/ |_]=Filename) ->
187    Filename;
188abs_pathname(Filename) ->
189    {ok, Cwd} = file:get_cwd(),
190    {Filename2, Args} = separate_cmd_args(Filename, ""),
191    abs_pathname(Filename2, Cwd) ++ Args.
192
193abs_pathname(Filename, Dir) ->
194    Name = filename:absname(Filename, Dir ++ "/"),
195    OutFilename = filename:join(fix_path_list(filename:split(Name), [])),
196    % If the filename is a dir (last char slash, put back end slash
197    case string:right(Filename,1) of
198    "/" ->
199        OutFilename ++ "/";
200    "\\" ->
201        OutFilename ++ "/";
202    _Else->
203        OutFilename
204    end.
205
206% if this as an executable with arguments, seperate out the arguments
207% ""./foo\ bar.sh -baz=blah" -> {"./foo\ bar.sh", " -baz=blah"}
208separate_cmd_args("", CmdAcc) ->
209    {lists:reverse(CmdAcc), ""};
210separate_cmd_args("\\ " ++ Rest, CmdAcc) -> % handle skipped value
211    separate_cmd_args(Rest, " \\" ++ CmdAcc);
212separate_cmd_args(" " ++ Rest, CmdAcc) ->
213    {lists:reverse(CmdAcc), " " ++ Rest};
214separate_cmd_args([Char|Rest], CmdAcc) ->
215    separate_cmd_args(Rest, [Char | CmdAcc]).
216
217% Is a character whitespace?
218is_whitespace($\s) -> true;
219is_whitespace($\t) -> true;
220is_whitespace($\n) -> true;
221is_whitespace($\r) -> true;
222is_whitespace(_Else) -> false.
223
224
225% removes leading and trailing whitespace from a string
226trim(String) ->
227    String2 = lists:dropwhile(fun is_whitespace/1, String),
228    lists:reverse(lists:dropwhile(fun is_whitespace/1, lists:reverse(String2))).
229
230% takes a heirarchical list of dirs and removes the dots ".", double dots
231% ".." and the corresponding parent dirs.
232fix_path_list([], Acc) ->
233    lists:reverse(Acc);
234fix_path_list([".."|Rest], [_PrevAcc|RestAcc]) ->
235    fix_path_list(Rest, RestAcc);
236fix_path_list(["."|Rest], Acc) ->
237    fix_path_list(Rest, Acc);
238fix_path_list([Dir | Rest], Acc) ->
239    fix_path_list(Rest, [Dir | Acc]).
240
241
242implode(List, Sep) ->
243    implode(List, Sep, []).
244
245implode([], _Sep, Acc) ->
246    lists:flatten(lists:reverse(Acc));
247implode([H], Sep, Acc) ->
248    implode([], Sep, [H|Acc]);
249implode([H|T], Sep, Acc) ->
250    implode(T, Sep, [Sep,H|Acc]).
251
252
253should_flush() ->
254    should_flush(?FLUSH_MAX_MEM).
255
256should_flush(MemThreshHold) ->
257    {memory, ProcMem} = process_info(self(), memory),
258    BinMem = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end,
259        0, element(2,process_info(self(), binary))),
260    if ProcMem+BinMem > 2*MemThreshHold ->
261        garbage_collect(),
262        {memory, ProcMem2} = process_info(self(), memory),
263        BinMem2 = lists:foldl(fun({_Id, Size, _NRefs}, Acc) -> Size+Acc end,
264            0, element(2,process_info(self(), binary))),
265        ProcMem2+BinMem2 > MemThreshHold;
266    true -> false end.
267
268encodeBase64Url(Url) ->
269    Url1 = re:replace(base64:encode(Url), ["=+", $$], ""),
270    Url2 = re:replace(Url1, "/", "_", [global]),
271    re:replace(Url2, "\\+", "-", [global, {return, binary}]).
272
273decodeBase64Url(Url64) ->
274    Url1 = re:replace(Url64, "-", "+", [global]),
275    Url2 = re:replace(Url1, "_", "/", [global]),
276    Padding = lists:duplicate((4 - iolist_size(Url2) rem 4) rem 4, $=),
277    base64:decode(iolist_to_binary([Url2, Padding])).
278
279dict_find(Key, Dict, DefaultValue) ->
280    case dict:find(Key, Dict) of
281    {ok, Value} ->
282        Value;
283    error ->
284        DefaultValue
285    end.
286
287to_binary(V) when is_binary(V) ->
288    V;
289to_binary(V) when is_list(V) ->
290    try
291        list_to_binary(V)
292    catch
293        _:_ ->
294            list_to_binary(io_lib:format("~p", [V]))
295    end;
296to_binary(V) when is_atom(V) ->
297    list_to_binary(atom_to_list(V));
298to_binary(V) ->
299    list_to_binary(io_lib:format("~p", [V])).
300
301to_integer(V) when is_integer(V) ->
302    V;
303to_integer(V) when is_list(V) ->
304    erlang:list_to_integer(V);
305to_integer(V) when is_binary(V) ->
306    erlang:list_to_integer(binary_to_list(V)).
307
308to_list(V) when is_list(V) ->
309    V;
310to_list(V) when is_binary(V) ->
311    binary_to_list(V);
312to_list(V) when is_atom(V) ->
313    atom_to_list(V);
314to_list(V) ->
315    lists:flatten(io_lib:format("~p", [V])).
316
317url_encode(Bin) when is_binary(Bin) ->
318    url_encode(binary_to_list(Bin));
319url_encode([H|T]) ->
320    if
321    H >= $a, $z >= H ->
322        [H|url_encode(T)];
323    H >= $A, $Z >= H ->
324        [H|url_encode(T)];
325    H >= $0, $9 >= H ->
326        [H|url_encode(T)];
327    H == $_; H == $.; H == $-; H == $: ->
328        [H|url_encode(T)];
329    true ->
330        case lists:flatten(io_lib:format("~.16.0B", [H])) of
331        [X, Y] ->
332            [$%, X, Y | url_encode(T)];
333        [X] ->
334            [$%, $0, X | url_encode(T)]
335        end
336    end;
337url_encode([]) ->
338    [].
339
340verify([X|RestX], [Y|RestY], Result) ->
341    verify(RestX, RestY, (X bxor Y) bor Result);
342verify([], [], Result) ->
343    Result == 0.
344
345verify(<<X/binary>>, <<Y/binary>>) ->
346    verify(?b2l(X), ?b2l(Y));
347verify(X, Y) when is_list(X) and is_list(Y) ->
348    case length(X) == length(Y) of
349        true ->
350            verify(X, Y, 0);
351        false ->
352            false
353    end;
354verify(_X, _Y) -> false.
355
356-spec md5(Data::(iolist() | binary())) -> Digest::binary().
357md5(Data) ->
358    try crypto:hash(md5, Data) catch error:_ -> erlang:md5(Data) end.
359
360-spec md5_init() -> Context::term().
361md5_init() ->
362    try crypto:hash_init(md5) catch error:_ -> erlang:md5_init() end.
363
364-spec md5_update(Context::term(), Data::(iolist() | binary())) ->
365    NewContext::term().
366md5_update(Ctx, D) ->
367    try crypto:hash_update(Ctx,D) catch error:_ -> erlang:md5_update(Ctx,D) end.
368
369-spec md5_final(Context::term()) -> Digest::binary().
370md5_final(Ctx) ->
371    try crypto:hash_final(Ctx) catch error:_ -> erlang:md5_final(Ctx) end.
372
373% linear search is faster for small lists, length() is 0.5 ms for 100k list
374reorder_results(Keys, SortedResults) when length(Keys) < 100 ->
375    [couch_util:get_value(Key, SortedResults) || Key <- Keys];
376reorder_results(Keys, SortedResults) ->
377    KeyDict = dict:from_list(SortedResults),
378    [dict:fetch(Key, KeyDict) || Key <- Keys].
379
380url_strip_password(Url) ->
381    re:replace(Url,
382        "http(s)?://([^:]+):[^@]+@(.*)$",
383        "http\\1://\\2:*****@\\3",
384        [{return, list}]).
385
386encode_doc_id(#doc{id = Id}) ->
387    encode_doc_id(Id);
388encode_doc_id(Id) when is_list(Id) ->
389    encode_doc_id(?l2b(Id));
390encode_doc_id(<<"_design/", Rest/binary>>) ->
391    "_design/" ++ url_encode(Rest);
392encode_doc_id(<<"_local/", Rest/binary>>) ->
393    "_local/" ++ url_encode(Rest);
394encode_doc_id(Id) ->
395    url_encode(Id).
396
397debrace(IoList) ->
398    JsonBinary = iolist_to_binary(IoList),
399    InnerSize = size(JsonBinary) - 2,
400    <<"{", Inner:InnerSize/binary, "}">> = JsonBinary,
401    Inner.
402
403brace(JsonBinary) ->
404    iolist_to_binary(["{", JsonBinary, "}"]).
405
406
407%% @doc Returns a tuple where the first element contains the leading SplitAt
408%% bytes of the original iolist, and the 2nd element is the tail. If SplitAt
409%% is larger than byte_size(IoList), return the difference.
410-spec split_iolist(IoList::iolist(), SplitAt::non_neg_integer()) ->
411    {iolist(), iolist()} | non_neg_integer().
412
413split_iolist(List, SplitAt) ->
414    split_iolist(List, SplitAt, []).
415
416split_iolist(List, 0, BeginAcc) ->
417    {lists:reverse(BeginAcc), List};
418split_iolist([], SplitAt, _BeginAcc) ->
419    SplitAt;
420split_iolist([Bin | Rest], SplitAt, BeginAcc) when is_binary(Bin), SplitAt > byte_size(Bin) ->
421    split_iolist(Rest, SplitAt - byte_size(Bin), [Bin | BeginAcc]);
422split_iolist([Bin | Rest], SplitAt, BeginAcc) when is_binary(Bin) ->
423    <<Begin:SplitAt/binary,End/binary>> = Bin,
424    split_iolist([End | Rest], 0, [Begin | BeginAcc]);
425split_iolist([Sublist| Rest], SplitAt, BeginAcc) when is_list(Sublist) ->
426    case split_iolist(Sublist, SplitAt, BeginAcc) of
427    {Begin, End} ->
428        {Begin, [End | Rest]};
429    SplitRemaining ->
430        split_iolist(Rest, SplitAt - (SplitAt - SplitRemaining), [Sublist | BeginAcc])
431    end;
432split_iolist([Byte | Rest], SplitAt, BeginAcc) when is_integer(Byte) ->
433    split_iolist(Rest, SplitAt - 1, [Byte | BeginAcc]).
434
435log_data(Tag, Arg) when is_binary(Arg) ->
436    io_lib:format("<~p>~s</~p>", [Tag, Arg, Tag]);
437log_data(Tag, Arg) ->
438    io_lib:format("<~p>~p</~p>", [Tag, Arg, Tag]).
439
440strong_rand_bytes(N) ->
441    strong_rand_bytes(N, 10).
442
443strong_rand_bytes(N, Retries) ->
444    try
445        crypto:strong_rand_bytes(N)
446    catch
447        error:low_entropy when N > 1 ->
448            SeedTerm = {erlang:monotonic_time(), erlang:unique_integer()},
449            SeedBin = crypto:hash(sha256, term_to_binary(SeedTerm)),
450            crypto:rand_seed(SeedBin),
451            strong_rand_bytes(N, Retries - 1)
452    end.
453
454parse_view_name(Name) ->
455    Tokens = string:tokens(couch_util:trim(?b2l(Name)), "/"),
456    case [?l2b(couch_httpd:unquote(Token)) || Token <- Tokens] of
457        [DDocName, ViewName] ->
458            {<<"_design/", DDocName/binary>>, ViewName};
459        [<<"_design">>, DDocName, ViewName] ->
460            {<<"_design/", DDocName/binary>>, ViewName};
461        _ ->
462            throw({bad_request, "A `view` property must have the shape"
463            " `ddoc_name/view_name`."})
464    end.
465
466log_parse_post(Req) ->
467    try log_do_parse(Req) of Str -> ?l2b(Str)
468    catch _:_ -> "" end.
469
470log_do_parse(#httpd{method='POST'} = Req) ->
471    {[{Bucket, {Props}}]} = get_nested_json_value(
472        couch_httpd:json_body_obj(Req), [<<"views">>, <<"sets">>]),
473    ViewName = get_value(<<"view">>, Props),
474    {DDoc, View} = parse_view_name(ViewName),
475    [<<"/">>, Bucket, <<"/">>, DDoc, <<"/_view/">>, View].
476
477get_view_list(undefined) ->
478    [];
479get_view_list({Views}) ->
480    case couch_util:get_value(<<"views">>, Views) of
481    undefined -> [];
482    {ViewDef} -> ViewDef
483    end;
484get_view_list(_) ->
485    [].
486
487find_match([], _, Default) ->
488    Default;
489find_match([Head|Tail], Prefix, Default) ->
490    case string:prefix(Head, Prefix) of
491    nomatch ->
492        find_match(Tail, Prefix, Default);
493    Matched ->
494        Matched
495    end.
496