1% -*- Mode: Erlang; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3% Licensed under the Apache License, Version 2.0 (the "License"); you may not
4% use this file except in compliance with the License. You may obtain a copy of
5% the License at
6%
7%   http://www.apache.org/licenses/LICENSE-2.0
8%
9% Unless required by applicable law or agreed to in writing, software
10% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12% License for the specific language governing permissions and limitations under
13% the License.
14
15-define(MAX_NUM_PARTITIONS, 1024).
16
17-define(set_view_group_stats_key(Group),
18    {
19        Group#set_view_group.set_name,
20        Group#set_view_group.name,
21        Group#set_view_group.sig,
22        Group#set_view_group.type
23    }
24).
25
26-define(USER_REDUCTION_SIZE_BITS,   16).
27-define(MAX_USER_REDUCTION_SIZE,    ((1 bsl ?USER_REDUCTION_SIZE_BITS) - 1)).
28
29-define(COUCHSTORE_VIEW_TYPE_MAPREDUCE, 0).
30-define(COUCHSTORE_VIEW_TYPE_SPATIAL,   1).
31
32-define(dbname(SetName, PartId),
33      <<SetName/binary, $/, (?l2b(integer_to_list(PartId)))/binary>>).
34
35-define(master_dbname(SetName), <<SetName/binary, "/master">>).
36
37-define(set_num_partitions(SetViewGroup),
38        (SetViewGroup#set_view_group.index_header)#set_view_index_header.num_partitions).
39
40-define(set_abitmask(SetViewGroup),
41        (SetViewGroup#set_view_group.index_header)#set_view_index_header.abitmask).
42
43-define(set_pbitmask(SetViewGroup),
44        (SetViewGroup#set_view_group.index_header)#set_view_index_header.pbitmask).
45
46-define(set_cbitmask(SetViewGroup),
47        (SetViewGroup#set_view_group.index_header)#set_view_index_header.cbitmask).
48
49-define(set_seqs(SetViewGroup),
50        (SetViewGroup#set_view_group.index_header)#set_view_index_header.seqs).
51
52-define(set_replicas_on_transfer(SetViewGroup),
53        (SetViewGroup#set_view_group.index_header)#set_view_index_header.replicas_on_transfer).
54
55-define(set_pending_transition(SetViewGroup),
56        (SetViewGroup#set_view_group.index_header)#set_view_index_header.pending_transition).
57
58-define(set_unindexable_seqs(SetViewGroup),
59        (SetViewGroup#set_view_group.index_header)#set_view_index_header.unindexable_seqs).
60
61-define(set_partition_versions(SetViewGroup),
62        (SetViewGroup#set_view_group.index_header)#set_view_index_header.partition_versions).
63
64-define(pending_transition_active(Trans),
65        (case Trans of
66        nil ->
67            [];
68        _ ->
69            Trans#set_view_transition.active
70        end)).
71-define(pending_transition_passive(Trans),
72        (case Trans of
73        nil ->
74            [];
75        _ ->
76            Trans#set_view_transition.passive
77        end)).
78-define(pending_transition_unindexable(Trans),
79        (case Trans of
80        nil ->
81            [];
82        _ ->
83            Trans#set_view_transition.unindexable
84        end)).
85
86
87-type uint64()                   :: 0..18446744073709551615.
88-type partition_id()             :: non_neg_integer().
89-type staleness()                :: 'update_after' | 'ok' | 'false'.
90-type bitmask()                  :: non_neg_integer().
91-type bitmap()                   :: non_neg_integer().
92-type update_seq()               :: non_neg_integer().
93-type btree_state()              :: 'nil' | binary().
94-type uuid()                     :: uint64().
95-type partition_seq()            :: {partition_id(), update_seq()}.
96% Manipulate via ordsets or orddict, keep it ordered by partition id.
97-type partition_seqs()           :: ordsets:ordset(partition_seq()).
98-type partition_version()        :: [{uuid(), update_seq()}].
99% Manipulate via ordsets or orddict, keep it ordered by partition id.
100-type partition_versions()       :: ordsets:ordset({partition_id(), partition_version()}).
101-type view_state()               :: btree_state().
102-type set_view_group_type()      :: 'main' | 'replica'.
103-type set_view_ets_stats_key()   :: {binary(), binary(), binary(), set_view_group_type()}.
104-type ejson_object()             :: {[{binary() | atom(), term()}]}.
105-type set_view_updater_state()   :: 'updating_active' | 'updating_passive'.
106
107-type set_view_key()             :: {Key::term(), DocId::binary()}.
108-type set_view_value()           :: {partition_id(), Value::term()}.
109-type set_view_key_value()       :: {set_view_key(), set_view_value()}.
110-type set_view_reduction()       :: {Count::non_neg_integer(), bitmask()} |
111                                    {Count::non_neg_integer(), UserReductions::[term()], bitmask()}.
112
113-type set_view_btree_purge_fun() :: fun(('branch' | 'value',
114                                         set_view_reduction() | set_view_key_value(),
115                                         Acc::{'go', term()}) ->
116                                    {'purge', FinalAcc::{'go' | 'stop', term()}} |
117                                    {'keep', FinalAcc::{'go' | 'stop', term()}} |
118                                    {'partial_purge', FinalAcc::{'go' | 'stop', term()}} |
119                                    {'stop', FinalAcc::{'stop', term()}}).
120
121-type set_view_fold_fun()        :: fun((set_view_key_value(), Offset::term(), Acc::term()) ->
122                                    {'ok' | 'stop', FinalAcc::term()}).
123-type set_view_fold_reduce_fun() :: fun((set_view_key(), Reduction::term(), Acc::term()) ->
124                                    {'ok' | 'stop', FinalAcc::term()}).
125
126-type view_op()                  :: {'insert', Key::binary(), Value::binary()} |
127                                    {'remove', Key::binary(), 'nil'}.
128
129-type view_btree_less_fun()      :: fun((binary(), binary()) -> boolean()).
130
131
132% Used to configure a new set view.
133-record(set_view_params, {
134    max_partitions = 0         :: non_neg_integer(),
135    active_partitions = []     :: [partition_id()],
136    passive_partitions = []    :: [partition_id()],
137    use_replica_index = false  :: boolean()
138}).
139
140-record(set_view_group_req, {
141    stale = updater_after   :: staleness(),
142    update_stats = false    :: boolean(),
143    wanted_partitions = []  :: [partition_id()],
144    debug = false           :: boolean(),
145    type = main             :: set_view_group_type(),
146    category = prod         :: 'prod' | 'dev'
147}).
148
149-record(set_view_transition, {
150    active = []      :: ordsets:ordset(partition_id()),
151    passive = []     :: ordsets:ordset(partition_id()),
152    unindexable = [] :: ordsets:ordset(partition_id())
153}).
154
155-define(LATEST_COUCH_SET_VIEW_HEADER_VERSION, 2).
156
157-record(set_view_index_header, {
158    version = ?LATEST_COUCH_SET_VIEW_HEADER_VERSION :: non_neg_integer(),
159    % Maximum number of partitions this set view supports, nil means not yet defined.
160    num_partitions = 0                              :: non_neg_integer(),
161    % active partitions bitmap
162    abitmask = 0                                    :: bitmask(),
163    % passive partitions bitmap
164    pbitmask = 0                                    :: bitmask(),
165    % cleanup partitions bitmap
166    cbitmask = 0                                    :: bitmask(),
167    seqs = []                                       :: partition_seqs(),
168    id_btree_state = nil                            :: btree_state(),
169    view_states = []                                :: [view_state()],
170    has_replica = false                             :: boolean(),
171    replicas_on_transfer = []                       :: ordsets:ordset(partition_id()),
172    % Pending partition states transition.
173    pending_transition = nil                        :: 'nil' | #set_view_transition{},
174    unindexable_seqs = []                           :: partition_seqs(),
175    partition_versions = []                         :: 'nil' | partition_versions()
176}).
177
178% Keep all stats values as valid EJSON (except ets key).
179-record(set_view_group_stats, {
180    % as generated by ?set_view_group_stats_key(#set_view_group{})
181    ets_key                 :: set_view_ets_stats_key(),
182    % # accesses for view streaming
183    accesses = 0            :: non_neg_integer(),
184    full_updates = 0        :: non_neg_integer(),
185    % # of updates that only finished updating the active partitions
186    % (in the phase of updating passive partitions). Normally its value
187    % is full_updates - 1.
188    partial_updates = 0     :: non_neg_integer(),
189    % # of times the updater was forced to stop (because partition states
190    % were updated) while it was still indexing the active partitions.
191    stopped_updates = 0     :: non_neg_integer(),
192    compactions = 0         :: non_neg_integer(),
193    % # of interrupted cleanups. Cleanups which were stopped (in order to do
194    % higher priority tasks) and left the index in a not yet clean state (but
195    % hopefully closer to a clean state).
196    cleanup_stops = 0       :: non_neg_integer(),
197    cleanups = 0            :: non_neg_integer(),
198    updater_cleanups = 0    :: non_neg_integer(),
199    update_errors = 0       :: non_neg_integer(),
200    update_history = []     :: [ejson_object()],
201    compaction_history = [] :: [ejson_object()],
202    cleanup_history = []    :: [ejson_object()],
203    dup_partitions_counter = 0 :: non_neg_integer()
204}).
205
206-record(set_view_debug_info, {
207    original_abitmask = 0             :: bitmask(),
208    original_pbitmask = 0             :: bitmask(),
209    stats = #set_view_group_stats{}   :: #set_view_group_stats{},
210    replica_partitions = []           :: ordsets:ordset(partition_id()),
211    wanted_seqs = []                  :: partition_seqs()
212}).
213
214-record(set_view, {
215    id_num = 0        :: non_neg_integer(),
216    def = <<>>        :: binary(),
217    ref               :: reference(),
218    indexer = {}      :: tuple()
219}).
220
221% XXX vmx 2012-12-21: This should go into the mapreduce specific header file
222-record(mapreduce_view, {
223    map_names = []    :: [binary()],
224    btree = nil       :: 'nil' | #btree{},
225    reduce_funs = []  :: [{binary(), binary()}],
226    options = []      :: [term()]
227}).
228
229% XXX vmx 2012-12-21: This should go into the spatial specific header file
230-record(spatial_view, {
231    map_names = []    :: [binary()],
232    % The bitmask for the partitions. It's stored in the #spatial_view{}
233    % record, as spatial indexes don't have a reduce
234    % XXX vmx 2013-02-21: When is this bitmap set? It also needs to be
235    %     persited somehow with the tree.
236    %bitmap = <<>>        :: binary(),
237    bitmap = 0        :: non_neg_integer(),
238    vtree = nil       %:: 'nil' | #vtree{}
239}).
240
241
242-record(set_view_group, {
243    sig = binary:copy(<<0>>, 16)            :: <<_:128>>,
244    fd = nil                                :: 'nil' | pid(),
245    set_name = <<>>                         :: binary(),
246    name = <<>>                             :: binary(),
247    design_options = []                     :: [any()],
248    views = []                              :: [#set_view{}],
249    id_btree = nil                          :: 'nil' | #btree{},
250    ref_counter = nil                       :: 'nil' | pid(),
251    index_header = #set_view_index_header{} :: #set_view_index_header{},
252    type = main                             :: set_view_group_type(),
253    replica_group = nil                     :: 'nil' | #set_view_group{},
254    replica_pid = nil                       :: 'nil' | pid(),
255    debug_info = nil                        :: #set_view_debug_info{} | 'nil',
256    filepath = ""                           :: string(),
257    % This is the module that populated the set view. It can be a module for
258    % the MapReduce index or the spatial index
259    mod = nil                                :: 'nil' | 'mapreduce_view' |
260                                                'spatial_view',
261    % The file extension that will be used for the index files
262    extension = ""                          :: string(), %".view" | ".spatial"
263    % Used to distinguish production and development set view groups
264    category = nil                          :: 'nil' | 'prod' | 'dev',
265    stats_ets = nil                         :: atom(),
266    header_pos = 0                          :: non_neg_integer(),
267    dcp_pid = nil                           :: 'nil' | pid(),
268    index_xattr_on_deleted_docs = false     :: boolean()
269}).
270
271-record(set_view_updater_stats, {
272    indexing_time = 0.0        :: float(),  % seconds
273    blocked_time = 0.0         :: float(),  % seconds
274    cleanup_kv_count = 0       :: non_neg_integer(),
275    cleanup_time = 0.0         :: float(),  % seconds
276    inserted_ids = 0           :: non_neg_integer(),
277    deleted_ids = 0            :: non_neg_integer(),
278    inserted_kvs = 0           :: non_neg_integer(),
279    deleted_kvs = 0            :: non_neg_integer(),
280    seqs = 0                   :: non_neg_integer()
281}).
282
283-record(set_view_updater_result, {
284    group = #set_view_group{}          :: #set_view_group{},
285    state = updating_active            :: set_view_updater_state(),
286    stats = #set_view_updater_stats{}  :: #set_view_updater_stats{},
287    tmp_file = nil                     :: 'nil' | pid()
288}).
289
290-record(set_view_compactor_result, {
291    group = #set_view_group{}  :: #set_view_group{},
292    compact_time = 0.0         :: float(), % seconds
293    cleanup_kv_count = 0       :: non_neg_integer()
294}).
295
296-record(set_view_tmp_file_info, {
297    name = nil,
298    fd = nil,
299    size = 0,
300    % Additional meta information about the file (can be set by the indexer)
301    % The spatial indexer stores the enclosing bounding box of the data
302    % within the file
303    extra = nil
304}).
305