1c16cc059SDustin Sallings/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
266eb94d0SMike Wiederhold/*
366eb94d0SMike Wiederhold *     Copyright 2010 Couchbase, Inc
466eb94d0SMike Wiederhold *
566eb94d0SMike Wiederhold *   Licensed under the Apache License, Version 2.0 (the "License");
666eb94d0SMike Wiederhold *   you may not use this file except in compliance with the License.
766eb94d0SMike Wiederhold *   You may obtain a copy of the License at
866eb94d0SMike Wiederhold *
966eb94d0SMike Wiederhold *       http://www.apache.org/licenses/LICENSE-2.0
1066eb94d0SMike Wiederhold *
1166eb94d0SMike Wiederhold *   Unless required by applicable law or agreed to in writing, software
1266eb94d0SMike Wiederhold *   distributed under the License is distributed on an "AS IS" BASIS,
1366eb94d0SMike Wiederhold *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1466eb94d0SMike Wiederhold *   See the License for the specific language governing permissions and
1566eb94d0SMike Wiederhold *   limitations under the License.
1666eb94d0SMike Wiederhold */
17c16cc059SDustin Sallings
18d067db62STrond Norbye#include "config.h"
19244c0146SMike Wiederhold
20c16cc059SDustin Sallings#include <cstdlib>
21244c0146SMike Wiederhold#include <iostream>
22244c0146SMike Wiederhold#include <limits>
2353b0aa40SDustin Sallings#include <list>
24244c0146SMike Wiederhold#include <string>
25244c0146SMike Wiederhold#include <utility>
26c16cc059SDustin Sallings
27fc9615cdSMike Wiederhold#include "common.h"
28fc9615cdSMike Wiederhold#include "ep.h"
29fa288713SLiang Guo#include "ep_engine.h"
30244c0146SMike Wiederhold#include "item_pager.h"
313e9ca12cSMike Wiederhold#include "connmap.h"
32c16cc059SDustin Sallings
33777edee8SChiyoung Seostatic const size_t MAX_PERSISTENCE_QUEUE_SIZE = 1000000;
34c16cc059SDustin Sallings
35c16cc059SDustin Sallings/**
36c16cc059SDustin Sallings * As part of the ItemPager, visit all of the objects in memory and
37c16cc059SDustin Sallings * eject some within a constrained probability
38c16cc059SDustin Sallings */
39c16cc059SDustin Sallingsclass PagingVisitor : public VBucketVisitor {
40c16cc059SDustin Sallingspublic:
41c16cc059SDustin Sallings
42c16cc059SDustin Sallings    /**
43c16cc059SDustin Sallings     * Construct a PagingVisitor that will attempt to evict the given
44c16cc059SDustin Sallings     * percentage of objects.
45c16cc059SDustin Sallings     *
4699f9ce07SDustin Sallings     * @param s the store that will handle the bulk removal
4799f9ce07SDustin Sallings     * @param st the stats where we'll track what we've done
4836f2bf15SDustin Sallings     * @param pcnt percentage of objects to attempt to evict (0-1)
4999f9ce07SDustin Sallings     * @param sfin pointer to a bool to be set to true after run completes
5018770839SSundar Sridharan     * @param pause flag indicating if PagingVisitor can pause between vbucket
5118770839SSundar Sridharan     *              visits
525691d832SChiyoung Seo     * @param bias active vbuckets eviction probability bias multiplier (0-1)
535691d832SChiyoung Seo     * @param phase pointer to an item_pager_phase to be set
54c16cc059SDustin Sallings     */
550e182a8bSLiang Guo    PagingVisitor(EventuallyPersistentStore &s, EPStats &st, double pcnt,
565691d832SChiyoung Seo                  bool *sfin, bool pause = false,
575691d832SChiyoung Seo                  double bias = 1, item_pager_phase *phase = NULL)
585691d832SChiyoung Seo      : store(s), stats(st), percent(pcnt),
59b95fb6d8Sabhinavdangeti        activeBias(bias), ejected(0),
600e182a8bSLiang Guo        startTime(ep_real_time()), stateFinalizer(sfin), canPause(pause),
61636dec5aSabhinavdangeti        completePhase(true), wasHighMemoryUsage(s.isMemoryUsageTooHigh()),
62636dec5aSabhinavdangeti        pager_phase(phase) {}
63c16cc059SDustin Sallings
64c16cc059SDustin Sallings    void visit(StoredValue *v) {
653ff6d651SChiyoung Seo        // Delete expired items for an active vbucket.
663ff6d651SChiyoung Seo        bool isExpired = (currentBucket->getState() == vbucket_state_active) &&
673ff6d651SChiyoung Seo            v->isExpired(startTime) && !v->isDeleted();
6800dea4cfSChiyoung Seo        if (isExpired || v->isTempNonExistentItem() || v->isTempDeletedItem()) {
6918770839SSundar Sridharan            expired.push_back(std::make_pair(currentBucket->getId(),
7018770839SSundar Sridharan                                             v->getKey()));
7153b0aa40SDustin Sallings            return;
7253b0aa40SDustin Sallings        }
7353b0aa40SDustin Sallings
7404a9436eSLiang Guo        // return if not ItemPager, which uses valid eviction percentage
755691d832SChiyoung Seo        if (percent <= 0 || !pager_phase) {
76fa288713SLiang Guo            return;
77fa288713SLiang Guo        }
78fa288713SLiang Guo
79fa288713SLiang Guo        // always evict unreferenced items, or randomly evict referenced item
805691d832SChiyoung Seo        double r = *pager_phase == PAGING_UNREFERENCED ?
8118770839SSundar Sridharan            1 :
8218770839SSundar Sridharan            static_cast<double>(std::rand()) / static_cast<double>(RAND_MAX);
835691d832SChiyoung Seo
8418770839SSundar Sridharan        if (*pager_phase == PAGING_UNREFERENCED &&
8518770839SSundar Sridharan            v->getNRUValue() == MAX_NRU_VALUE) {
865691d832SChiyoung Seo            doEviction(v);
8718770839SSundar Sridharan        } else if (*pager_phase == PAGING_RANDOM &&
8818770839SSundar Sridharan                   v->incrNRUValue() == MAX_NRU_VALUE &&
895691d832SChiyoung Seo                   r <= percent) {
905691d832SChiyoung Seo            doEviction(v);
91c16cc059SDustin Sallings        }
92c16cc059SDustin Sallings    }
93c16cc059SDustin Sallings
944a73f0e1STrond Norbye    bool visitBucket(RCPtr<VBucket> &vb) {
95fa288713SLiang Guo        update();
96fa288713SLiang Guo
9745686f15SChiyoung Seo        bool newCheckpointCreated = false;
9845686f15SChiyoung Seo        size_t removed = vb->checkpointManager.removeClosedUnrefCheckpoints(vb,
9945686f15SChiyoung Seo                                                         newCheckpointCreated);
10045686f15SChiyoung Seo        stats.itemsRemovedFromCheckpoints.fetch_add(removed);
10145686f15SChiyoung Seo        // If the new checkpoint is created, notify this event to the
10290defd6dSMike Wiederhold        // corresponding paused TAP & DCP connections.
10345686f15SChiyoung Seo        if (newCheckpointCreated) {
10445686f15SChiyoung Seo            store.getEPEngine().getTapConnMap().notifyVBConnections(
10545686f15SChiyoung Seo                                                                   vb->getId());
10690defd6dSMike Wiederhold            store.getEPEngine().getDcpConnMap().notifyVBConnections(
107f12b299dSabhinavdangeti                                        vb->getId(),
108f12b299dSabhinavdangeti                                        vb->checkpointManager.getHighSeqno());
10945686f15SChiyoung Seo        }
11045686f15SChiyoung Seo
111fa288713SLiang Guo        // fast path for expiry item pager
1125691d832SChiyoung Seo        if (percent <= 0 || !pager_phase) {
1135691d832SChiyoung Seo            return VBucketVisitor::visitBucket(vb);
114fa288713SLiang Guo        }
115fa288713SLiang Guo
11604a9436eSLiang Guo        // skip active vbuckets if active resident ratio is lower than replica
117fa288713SLiang Guo        double current = static_cast<double>(stats.getTotalMemoryUsed());
118fa288713SLiang Guo        double lower = static_cast<double>(stats.mem_low_wat);
11904a9436eSLiang Guo        double high = static_cast<double>(stats.mem_high_wat);
12004a9436eSLiang Guo        if (vb->getState() == vbucket_state_active && current < high &&
12118770839SSundar Sridharan            store.cachedResidentRatio.activeRatio <
12218770839SSundar Sridharan            store.cachedResidentRatio.replicaRatio)
123fa288713SLiang Guo        {
12404a9436eSLiang Guo            return false;
12504a9436eSLiang Guo        }
12604a9436eSLiang Guo
12704a9436eSLiang Guo        if (current > lower) {
128fa288713SLiang Guo            double p = (current - static_cast<double>(lower)) / current;
12904a9436eSLiang Guo            adjustPercent(p, vb->getState());
130fa288713SLiang Guo            return VBucketVisitor::visitBucket(vb);
1315691d832SChiyoung Seo        } else { // stop eviction whenever memory usage is below low watermark
1325691d832SChiyoung Seo            completePhase = false;
1335691d832SChiyoung Seo            return false;
134fa288713SLiang Guo        }
13599f9ce07SDustin Sallings    }
13699f9ce07SDustin Sallings
13799f9ce07SDustin Sallings    void update() {
1380e182a8bSLiang Guo        store.deleteExpiredItems(expired);
13999f9ce07SDustin Sallings
14099f9ce07SDustin Sallings        if (numEjected() > 0) {
141a1213f50SMike Wiederhold            LOG(EXTENSION_LOG_INFO, "Paged out %ld values", numEjected());
14299f9ce07SDustin Sallings        }
14399f9ce07SDustin Sallings
144aa1e2970SChiyoung Seo        size_t num_expired = expired.size();
145aa1e2970SChiyoung Seo        if (num_expired > 0) {
146a1213f50SMike Wiederhold            LOG(EXTENSION_LOG_INFO, "Purged %ld expired items", num_expired);
14799f9ce07SDustin Sallings        }
148aa1e2970SChiyoung Seo
14999f9ce07SDustin Sallings        ejected = 0;
15099f9ce07SDustin Sallings        expired.clear();
15199f9ce07SDustin Sallings    }
15299f9ce07SDustin Sallings
153777edee8SChiyoung Seo    bool pauseVisitor() {
1547c6809d2STrond Norbye        size_t queueSize = stats.diskQueueSize.load();
155777edee8SChiyoung Seo        return canPause && queueSize >= MAX_PERSISTENCE_QUEUE_SIZE;
156777edee8SChiyoung Seo    }
157777edee8SChiyoung Seo
15899f9ce07SDustin Sallings    void complete() {
15999f9ce07SDustin Sallings        update();
16099f9ce07SDustin Sallings        if (stateFinalizer) {
16199f9ce07SDustin Sallings            *stateFinalizer = true;
16299f9ce07SDustin Sallings        }
1635691d832SChiyoung Seo
1645691d832SChiyoung Seo        if (pager_phase && completePhase) {
1655691d832SChiyoung Seo            if (*pager_phase == PAGING_UNREFERENCED) {
1665691d832SChiyoung Seo                *pager_phase = PAGING_RANDOM;
1675691d832SChiyoung Seo            } else {
1685691d832SChiyoung Seo                *pager_phase = PAGING_UNREFERENCED;
1695691d832SChiyoung Seo            }
1705691d832SChiyoung Seo        }
172636dec5aSabhinavdangeti        // Wake up any sleeping backfill tasks if the memory usage is lowered
173636dec5aSabhinavdangeti        // below the high watermark as a result of checkpoint removal.
174636dec5aSabhinavdangeti        if (wasHighMemoryUsage && !store.isMemoryUsageTooHigh()) {
175636dec5aSabhinavdangeti            store.getEPEngine().getDcpConnMap().notifyBackfillManagerTasks();
176636dec5aSabhinavdangeti        }
17799f9ce07SDustin Sallings    }
17899f9ce07SDustin Sallings
179c16cc059SDustin Sallings    /**
180c16cc059SDustin Sallings     * Get the number of items ejected during the visit.
181c16cc059SDustin Sallings     */
182c16cc059SDustin Sallings    size_t numEjected() { return ejected; }
183c16cc059SDustin Sallings
18499f9ce07SDustin Sallingsprivate:
18504a9436eSLiang Guo    void adjustPercent(double prob, vbucket_state_t state) {
18604a9436eSLiang Guo        if (state == vbucket_state_replica ||
18704a9436eSLiang Guo            state == vbucket_state_dead)
18804a9436eSLiang Guo        {
189fa288713SLiang Guo            // replica items should have higher eviction probability
1906424600aSLiang Guo            double p = prob*(2 - activeBias);
191fa288713SLiang Guo            percent = p < 0.9 ? p : 0.9;
19204a9436eSLiang Guo        } else {
193fa288713SLiang Guo            // active items have lower eviction probability
1946424600aSLiang Guo            percent = prob*activeBias;
195fa288713SLiang Guo        }
196fa288713SLiang Guo    }
197fa288713SLiang Guo
1985691d832SChiyoung Seo    void doEviction(StoredValue *v) {
1999a8679e2SChiyoung Seo        item_eviction_policy_t policy = store.getItemEvictionPolicy();
200b6a97f8eSabhinavdangeti        std::string key = v->getKey();
202d621da3fSChiyoung Seo        if (currentBucket->ht.unlocked_ejectItem(v, policy)) {
2035691d832SChiyoung Seo            ++ejected;
205b6a97f8eSabhinavdangeti            /**
206b6a97f8eSabhinavdangeti             * For FULL EVICTION MODE, add all items that are being
207b6a97f8eSabhinavdangeti             * evicted to the corresponding bloomfilter.
208b6a97f8eSabhinavdangeti             */
209b6a97f8eSabhinavdangeti            if (policy == FULL_EVICTION) {
210b6a97f8eSabhinavdangeti                currentBucket->addToFilter(key);
211b6a97f8eSabhinavdangeti            }
2125691d832SChiyoung Seo        }
2135691d832SChiyoung Seo    }
2145691d832SChiyoung Seo
21553b0aa40SDustin Sallings    std::list<std::pair<uint16_t, std::string> > expired;
21653b0aa40SDustin Sallings
2170e182a8bSLiang Guo    EventuallyPersistentStore &store;
2185691d832SChiyoung Seo    EPStats &stats;
2195691d832SChiyoung Seo    double percent;
2205691d832SChiyoung Seo    double activeBias;
2215691d832SChiyoung Seo    size_t ejected;
2225691d832SChiyoung Seo    time_t startTime;
2235691d832SChiyoung Seo    bool *stateFinalizer;
2245691d832SChiyoung Seo    bool canPause;
2255691d832SChiyoung Seo    bool completePhase;
226636dec5aSabhinavdangeti    bool wasHighMemoryUsage;
2275691d832SChiyoung Seo    item_pager_phase *pager_phase;
228c16cc059SDustin Sallings};
229c16cc059SDustin Sallings
230ac3ea552SSundar Sridharanbool ItemPager::run(void) {
231ac3ea552SSundar Sridharan    EventuallyPersistentStore *store = engine->getEpStore();
2325fd7ba2cSMike Wiederhold    double current = static_cast<double>(stats.getTotalMemoryUsed());
23332d9f64fSDustin Sallings    double upper = static_cast<double>(stats.mem_high_wat);
23432d9f64fSDustin Sallings    double lower = static_cast<double>(stats.mem_low_wat);
2355691d832SChiyoung Seo    double sleepTime = 5;
2372db030dbSabhinavdangeti    if (current <= lower) {
2382db030dbSabhinavdangeti        doEvict = false;
2392db030dbSabhinavdangeti    }
2412db030dbSabhinavdangeti    if (available && ((current > upper) || doEvict)) {
2422db030dbSabhinavdangeti        if (store->getItemEvictionPolicy() == VALUE_ONLY) {
2432db030dbSabhinavdangeti            doEvict = true;
2442db030dbSabhinavdangeti        }
24691f1052aSDustin Sallings        ++stats.pagerRuns;
24791f1052aSDustin Sallings
24836f2bf15SDustin Sallings        double toKill = (current - static_cast<double>(lower)) / current;
249c16cc059SDustin Sallings
250ed6be743STrond Norbye        std::stringstream ss;
2515fd7ba2cSMike Wiederhold        ss << "Using " << stats.getTotalMemoryUsed()
252ed6be743STrond Norbye           << " bytes of memory, paging out %0f%% of items." << std::endl;
253a1213f50SMike Wiederhold        LOG(EXTENSION_LOG_INFO, ss.str().c_str(), (toKill*100.0));
254c16cc059SDustin Sallings
2556424600aSLiang Guo        // compute active vbuckets evicition bias factor
256ac3ea552SSundar Sridharan        Configuration &cfg = engine->getConfiguration();
25775f421f6SLiang Guo        size_t activeEvictPerc = cfg.getPagerActiveVbPcnt();
2586424600aSLiang Guo        double bias = static_cast<double>(activeEvictPerc) / 50;
2596424600aSLiang Guo
26099f9ce07SDustin Sallings        available = false;
261ac3ea552SSundar Sridharan        shared_ptr<PagingVisitor> pv(new PagingVisitor(*store, stats, toKill,
2625691d832SChiyoung Seo                                                       &available,
2635691d832SChiyoung Seo                                                       false, bias, &phase));
264ac3ea552SSundar Sridharan        store->visit(pv, "Item pager", NONIO_TASK_IDX,
265ac3ea552SSundar Sridharan                    Priority::ItemPagerPriority);
266c16cc059SDustin Sallings    }
267c16cc059SDustin Sallings
268cee09357SSundar Sridharan    snooze(sleepTime);
269c16cc059SDustin Sallings    return true;
270c16cc059SDustin Sallings}
271dbef58b9SChiyoung Seo
272ac3ea552SSundar Sridharanbool ExpiredItemPager::run(void) {
273ac3ea552SSundar Sridharan    EventuallyPersistentStore *store = engine->getEpStore();
27499f9ce07SDustin Sallings    if (available) {
27599f9ce07SDustin Sallings        ++stats.expiryPagerRuns;
276dbef58b9SChiyoung Seo
27799f9ce07SDustin Sallings        available = false;
278ac3ea552SSundar Sridharan        shared_ptr<PagingVisitor> pv(new PagingVisitor(*store, stats, -1,
2795691d832SChiyoung Seo                                                       &available,
2805691d832SChiyoung Seo                                                       true, 1, NULL));
281ac3ea552SSundar Sridharan        // track spawned tasks for shutdown..
282ac3ea552SSundar Sridharan        store->visit(pv, "Expired item remover", NONIO_TASK_IDX,
283ac3ea552SSundar Sridharan                Priority::ItemPagerPriority, 10);
28499f9ce07SDustin Sallings    }
285cee09357SSundar Sridharan    snooze(sleepTime);
286dbef58b9SChiyoung Seo    return true;
287dbef58b9SChiyoung Seo}