1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package scorch 16 17import ( 18 "bytes" 19 "fmt" 20 "reflect" 21 "sync/atomic" 22 23 "github.com/blevesearch/bleve/index" 24 "github.com/blevesearch/bleve/index/scorch/segment" 25 "github.com/blevesearch/bleve/size" 26) 27 28var reflectStaticSizeIndexSnapshotTermFieldReader int 29 30func init() { 31 var istfr IndexSnapshotTermFieldReader 32 reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size()) 33} 34 35type IndexSnapshotTermFieldReader struct { 36 term []byte 37 field string 38 snapshot *IndexSnapshot 39 dicts []segment.TermDictionary 40 postings []segment.PostingsList 41 iterators []segment.PostingsIterator 42 segmentOffset int 43 includeFreq bool 44 includeNorm bool 45 includeTermVectors bool 46 currPosting segment.Posting 47 currID index.IndexInternalID 48} 49 50func (i *IndexSnapshotTermFieldReader) Size() int { 51 sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr + 52 len(i.term) + 53 len(i.field) + 54 len(i.currID) 55 56 for _, entry := range i.postings { 57 sizeInBytes += entry.Size() 58 } 59 60 for _, entry := range i.iterators { 61 sizeInBytes += entry.Size() 62 } 63 64 if i.currPosting != nil { 65 sizeInBytes += i.currPosting.Size() 66 } 67 68 return sizeInBytes 69} 70 71func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { 72 rv := preAlloced 73 if rv == nil { 74 rv = &index.TermFieldDoc{} 75 } 76 // find the next hit 77 for i.segmentOffset < len(i.postings) { 78 next, err := i.iterators[i.segmentOffset].Next() 79 if err != nil { 80 return nil, err 81 } 82 if next != nil { 83 // make segment number into global number by adding offset 84 globalOffset := i.snapshot.offsets[i.segmentOffset] 85 nnum := next.Number() 86 rv.ID = docNumberToBytes(rv.ID, nnum+globalOffset) 87 i.postingToTermFieldDoc(next, rv) 88 89 i.currID = rv.ID 90 i.currPosting = next 91 return rv, nil 92 } 93 i.segmentOffset++ 94 } 95 return nil, nil 96} 97 98func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) { 99 if i.includeFreq { 100 rv.Freq = next.Frequency() 101 } 102 if i.includeNorm { 103 rv.Norm = next.Norm() 104 } 105 if i.includeTermVectors { 106 locs := next.Locations() 107 if cap(rv.Vectors) < len(locs) { 108 rv.Vectors = make([]*index.TermFieldVector, len(locs)) 109 backing := make([]index.TermFieldVector, len(locs)) 110 for i := range backing { 111 rv.Vectors[i] = &backing[i] 112 } 113 } 114 rv.Vectors = rv.Vectors[:len(locs)] 115 for i, loc := range locs { 116 *rv.Vectors[i] = index.TermFieldVector{ 117 Start: loc.Start(), 118 End: loc.End(), 119 Pos: loc.Pos(), 120 ArrayPositions: loc.ArrayPositions(), 121 Field: loc.Field(), 122 } 123 } 124 } 125} 126 127func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { 128 // FIXME do something better 129 // for now, if we need to seek backwards, then restart from the beginning 130 if i.currPosting != nil && bytes.Compare(i.currID, ID) >= 0 { 131 i2, err := i.snapshot.TermFieldReader(i.term, i.field, 132 i.includeFreq, i.includeNorm, i.includeTermVectors) 133 if err != nil { 134 return nil, err 135 } 136 *i = *(i2.(*IndexSnapshotTermFieldReader)) 137 } 138 num, err := docInternalToNumber(ID) 139 if err != nil { 140 return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err) 141 } 142 segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num) 143 if segIndex >= len(i.snapshot.segment) { 144 return nil, fmt.Errorf("computed segment index %d out of bounds %d", 145 segIndex, len(i.snapshot.segment)) 146 } 147 // skip directly to the target segment 148 i.segmentOffset = segIndex 149 next, err := i.iterators[i.segmentOffset].Advance(ldocNum) 150 if err != nil { 151 return nil, err 152 } 153 if next == nil { 154 // we jumped directly to the segment that should have contained it 155 // but it wasn't there, so reuse Next() which should correctly 156 // get the next hit after it (we moved i.segmentOffset) 157 return i.Next(preAlloced) 158 } 159 160 if preAlloced == nil { 161 preAlloced = &index.TermFieldDoc{} 162 } 163 preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+ 164 i.snapshot.offsets[segIndex]) 165 i.postingToTermFieldDoc(next, preAlloced) 166 i.currID = preAlloced.ID 167 i.currPosting = next 168 return preAlloced, nil 169} 170 171func (i *IndexSnapshotTermFieldReader) Count() uint64 { 172 var rv uint64 173 for _, posting := range i.postings { 174 rv += posting.Count() 175 } 176 return rv 177} 178 179func (i *IndexSnapshotTermFieldReader) Close() error { 180 if i.snapshot != nil { 181 atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1)) 182 i.snapshot.recycleTermFieldReader(i) 183 } 184 return nil 185} 186