xref: /6.6.0/geocouch/vtree/src/vtree_io.erl (revision 263b2ce8)
1% Licensed under the Apache License, Version 2.0 (the "License"); you may not
2% use this file except in compliance with the License. You may obtain a copy of
3% the License at
4%
5%   http://www.apache.org/licenses/LICENSE-2.0
6%
7% Unless required by applicable law or agreed to in writing, software
8% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10% License for the specific language governing permissions and limitations under
11% the License.
12
13% This module implements the insertion into the vtree. It follows the normal
14% R-tree rules and is implementation independent. It just calls out to
15% modules for the choosing the correct subtree and splitting the nodes.
16
17% NOTE vmx 2012-09-04: It might make sense to rename this module to
18% geocouch_io, as it is more of storage specific to the backend (Apache
19% CouchDB vs. Couchbase). I'm not really sure about it, as geocouch_file
20% should abstract away from the differences between the backends.
21-module(vtree_io).
22
23-include("vtree.hrl").
24-include("couch_db.hrl").
25
26-export([write_node/3, read_node/2]).
27-export([encode_mbb/1, decode_mbb/1]).
28-export([treesize/1]).
29-export([decode_dups/1]).
30
31-ifdef(makecheck).
32-compile(nowarn_export_all).
33-compile(export_all).
34-endif.
35
36% keys and docIds have the same size
37-define(KEY_BITS,       12).
38-define(VALUE_BITS,     28).
39-define(POINTER_BITS,   48).
40-define(TREE_SIZE_BITS, 48).
41-define(RED_BITS,       16).
42% 12 bits would be enough for MbbO, but we like to have it padded to full bytes
43-define(MBBO_BITS,      16).
44
45-define(MAX_KEY_SIZE,     ((1 bsl ?KEY_BITS) - 1)).
46-define(MAX_VALUE_SIZE,   ((1 bsl ?VALUE_BITS) - 1)).
47
48-define(BLOB_SIZE, 24).
49
50
51% Writes a node of the tree (which is a list of KV- or KV-nodes) to disk and
52% return a KP-node with the corresponding information. No checks on the number
53% of nodes is performed, they are just written do disk as given. Potential
54% node splitting needs to happen before.
55-spec write_node(Fd :: file:io_device(), Nodes :: [#kv_node{} | #kp_node{}],
56                 Less :: lessfun()) -> {ok, #kp_node{}}.
57write_node(Fd, Nodes, Less) ->
58    {Bin, TreeSize} = encode_node(Nodes),
59    {ok, Pointer, Size} = geocouch_file:append_chunk(Fd, Bin),
60    geocouch_file:flush(Fd),
61    % The enclosing bounding box for all children
62    Mbb = vtree_util:nodes_mbb(Nodes, Less),
63    KpNode = #kp_node{
64      key = Mbb,
65      childpointer = Pointer,
66      treesize = Size+TreeSize,
67      % The original MBB is set to the MBB when the node is created
68      mbb_orig = Mbb
69     },
70    {ok, KpNode}.
71
72
73-spec read_node(Fd :: file:io_device(), Pointer :: non_neg_integer()) ->
74                       [#kp_node{} | #kv_node{}].
75read_node(Fd, Pointer) ->
76    {ok, BinNode} = geocouch_file:pread_chunk(Fd, Pointer),
77    decode_node(BinNode).
78
79
80% Returns the binary that will be stored, and the number size of the subtree
81% (resp. in case of a KV node, the number of bytes that were written during
82% encoding), to make sure the calculation of the subtree size is correct.
83-spec encode_node(Nodes :: [#kp_node{} | #kv_node{}]) ->
84                         {binary(), non_neg_integer()}.
85encode_node([#kv_node{}|_]=Nodes) ->
86    encode_node(Nodes, {<<?KV_NODE:8>>, 0});
87encode_node([#kp_node{}|_]=Nodes) ->
88    encode_node(Nodes, {<<?KP_NODE:8>>, 0}).
89-spec encode_node(Nodes :: [#kp_node{} | #kv_node{}],
90                  Acc :: {binary(), non_neg_integer()}) ->
91                         {binary(), non_neg_integer()}.
92encode_node([], Acc) ->
93    Acc;
94encode_node([Node|T], {BinAcc, TreeSizeAcc}) ->
95    BinK = encode_key(Node),
96    SizeK = erlang:size(BinK),
97    case SizeK < ?MAX_KEY_SIZE of
98        true -> ok;
99        false -> throw({error, key_too_long})
100    end,
101
102    {BinV, TreeSize} = encode_value(Node),
103    SizeV = erlang:iolist_size(BinV),
104    case SizeV < ?MAX_VALUE_SIZE of
105        true -> ok;
106        false -> throw({error, value_too_big})
107    end,
108
109    Bin = <<SizeK:?KEY_BITS, SizeV:?VALUE_BITS, BinK/binary, BinV/binary>>,
110    encode_node(T, {<<BinAcc/binary, Bin/binary>>, TreeSize + TreeSizeAcc}).
111
112
113encode_key(#kv_node{key = Key, docid = DocId}) ->
114    encode_key_docid(Key, DocId);
115encode_key(#kp_node{key = Mbb}) ->
116    BinMbb = encode_mbb(Mbb),
117    <<(length(Mbb) * 2):16, BinMbb/binary>>.
118
119
120% Encode the value of a Key-Value pair. It returns the encoded value and the
121% size of the subtree (in case of a KV node, the number of bytes that were
122% written during encoding). The treesize is used to calculate the disk usage
123% of the data in the tree.
124-spec encode_value(Node :: #kv_node{} | #kp_node{}) ->
125                          {Bin :: binary(), Size :: non_neg_integer()}.
126encode_value(#kv_node{}=Node) ->
127    #kv_node{
128       body = Body,
129       partition = PartId,
130       geometry = Geom
131      } = Node,
132    Value = case Body of
133                {dups, BodyDups} ->
134                    {dups, GeomDups} = Geom,
135                    lists:foldl(fun(BodyGeom, Acc) ->
136                                        Bin = encode_body_geom(BodyGeom),
137                                        <<Acc/binary, Bin/binary>>
138                                end, <<>>, lists:zip(BodyDups, GeomDups));
139                _ ->
140                    encode_body_geom({Body, Geom})
141            end,
142    Value2 = <<PartId:16, Value/binary>>,
143    {Value2, byte_size(Value2)};
144encode_value(#kp_node{}=Node) ->
145    #kp_node{
146              childpointer = PointerNode,
147              treesize = TreeSize,
148              mbb_orig = MbbO
149            } = Node,
150    BinMbbO = encode_mbb(MbbO),
151    NumMbbO = length(MbbO) * 2,
152    BinReduce = <<NumMbbO:16, BinMbbO/binary>>,
153    SizeReduce = byte_size(BinReduce),
154    BinValue = <<PointerNode:?POINTER_BITS, TreeSize:?TREE_SIZE_BITS,
155                 SizeReduce:?RED_BITS, BinReduce:SizeReduce/binary>>,
156    % Return `0` as no additional bytes are written. The bytes that will
157    % be written are accounted when the whole chunk gets written.
158    {BinValue, 0}.
159
160
161-spec encode_body_geom({binary(), binary()}) -> binary().
162encode_body_geom({Body, Geom}) ->
163    <<(byte_size(Body)):?BLOB_SIZE, (byte_size(Geom)):?BLOB_SIZE,
164      Body/binary, Geom/binary>>.
165
166
167-spec decode_dups(binary()) -> [{binary(), binary()}].
168decode_dups(Dups) ->
169    [{Body, Geom} ||
170        <<BodySize:?BLOB_SIZE, GeomSize:?BLOB_SIZE,
171          Body:BodySize/binary, Geom:GeomSize/binary>> <= Dups].
172
173
174-spec decode_kvnode_value(binary()) -> #kv_node{}.
175decode_kvnode_value(<<PartId:16, BodySize:?BLOB_SIZE, GeomSize:?BLOB_SIZE,
176                      Body:BodySize/binary, Geom:GeomSize/binary,
177                      Dups/binary>>) ->
178    case Dups of
179        <<>> ->
180            #kv_node{
181               body = Body,
182               partition = PartId,
183               geometry = Geom,
184               % XXX vmx 2014-07-20: What is the size used for?
185               size = 0
186              };
187        _ ->
188            {Bodies, Geoms} = lists:unzip(decode_dups(Dups)),
189            #kv_node{
190               body = {dups, [Body | Bodies]},
191               partition = PartId,
192               geometry = {dups, [Geom | Geoms]},
193               % XXX vmx 2014-07-20: What is the size used for?
194               size = 0
195              }
196    end.
197
198
199% Decode the value of a KP-node pair
200-spec decode_kpnode_value(BinValue :: binary()) -> #kp_node{}.
201decode_kpnode_value(BinValue) ->
202    <<PointerNode:?POINTER_BITS, TreeSize:?TREE_SIZE_BITS,
203      SizeReduce:?RED_BITS, Reduce:SizeReduce/binary>> = BinValue,
204    <<_NumMbb:16, BinMbbO/binary>> = Reduce,
205    MbbO = decode_mbb(BinMbbO),
206    #kp_node{
207              childpointer = PointerNode,
208              treesize = TreeSize,
209              mbb_orig = MbbO
210            }.
211
212
213% Decode the value of the KP nodes to Erlang terms
214-spec decode_node(BinValue :: binary()) -> [#kp_node{} | #kv_node{}].
215decode_node(<<?KV_NODE:8, Rest/binary>>) ->
216    decode_kvnode_pairs(Rest, []);
217decode_node(<<?KP_NODE:8, Rest/binary>>) ->
218    decode_kpnode_pairs(Rest, []).
219
220
221% Decode KV-nodes key value pairs to an Erlang record
222-spec decode_kvnode_pairs(BinValue :: binary(), Acc :: [#kv_node{}]) ->
223                                 [#kv_node{}].
224decode_kvnode_pairs(<<>>, Acc) ->
225    lists:reverse(Acc);
226% Matchning the binary in the function (and not the body) is an optimization
227decode_kvnode_pairs(<<SizeK:?KEY_BITS, SizeV:?VALUE_BITS,
228                      BinK:SizeK/binary, BinV:SizeV/binary,
229                      Rest/binary>>, Acc) ->
230    {Mbb, DocId} = decode_key_docid(BinK),
231    Node0 = decode_kvnode_value(BinV),
232    Node = Node0#kv_node{key = Mbb, docid = DocId},
233    decode_kvnode_pairs(Rest, [Node|Acc]).
234
235
236% Decode KP-nodes key value pairs to an Erlang record
237-spec decode_kpnode_pairs(BinValue :: binary(), Acc :: [#kp_node{}]) ->
238                                 [#kp_node{}].
239decode_kpnode_pairs(<<>>, Acc) ->
240    lists:reverse(Acc);
241% Matchning the binary in the function (and not the body) is an optimization
242decode_kpnode_pairs(<<SizeK:?KEY_BITS, SizeV:?VALUE_BITS,
243                      BinK:SizeK/binary, BinV:SizeV/binary,
244                      Rest/binary>>, Acc) ->
245    <<_NumMbb:16, BinMbb/binary>> = BinK,
246    Mbb = decode_mbb(BinMbb),
247    Node0 = decode_kpnode_value(BinV),
248    Node = Node0#kp_node{key = Mbb},
249    decode_kpnode_pairs(Rest, [Node|Acc]).
250
251
252-spec encode_mbb(Mbb :: mbb()) -> binary().
253encode_mbb(Mbb) ->
254    << <<Min:64/native-float, Max:64/native-float>> || {Min, Max} <- Mbb>>.
255
256
257-spec encode_key_docid(Mbb :: mbb(), DocId :: binary()) -> binary().
258encode_key_docid(Mbb, DocId) ->
259    BinMbb = encode_mbb(Mbb),
260    % Number of numbers is two times the dimension
261    <<(length(Mbb) * 2):16, BinMbb/binary, DocId/binary>>.
262
263-spec decode_mbb(BinMbb :: binary()) -> mbb().
264decode_mbb(<<BinMbb/binary>>) ->
265    [{Min, Max} || <<Min:64/native-float, Max:64/native-float>> <= BinMbb].
266
267
268-spec decode_key_docid(Key :: binary()) -> {mbb(), binary()}.
269decode_key_docid(<<Num:16, KeyDocId/binary>>) ->
270    KeySize = Num * 8,
271    <<BinMbb:KeySize/binary, DocId/binary>> = KeyDocId,
272    {decode_mbb(BinMbb), DocId}.
273
274
275-spec treesize(KpNode :: #kp_node{} | nil) -> non_neg_integer().
276treesize(nil) ->
277    0;
278treesize(#kp_node{treesize = Size}) ->
279    Size.
280