1//  Copyright (c) 2017 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package vellum
16
17import (
18	"bytes"
19	"reflect"
20	"testing"
21
22	"github.com/couchbase/vellum/levenshtein"
23	"github.com/couchbase/vellum/regexp"
24)
25
26func TestIterator(t *testing.T) {
27	var buf bytes.Buffer
28	b, err := New(&buf, nil)
29	if err != nil {
30		t.Fatalf("error creating builder: %v", err)
31	}
32
33	err = insertStringMap(b, smallSample)
34	if err != nil {
35		t.Fatalf("error building: %v", err)
36	}
37
38	err = b.Close()
39	if err != nil {
40		t.Fatalf("error closing: %v", err)
41	}
42
43	fst, err := Load(buf.Bytes())
44	if err != nil {
45		t.Fatalf("error loading set: %v", err)
46	}
47
48	got := map[string]uint64{}
49	itr, err := fst.Iterator(nil, nil)
50	for err == nil {
51		key, val := itr.Current()
52		got[string(key)] = val
53		err = itr.Next()
54	}
55	if err != ErrIteratorDone {
56		t.Errorf("iterator error: %v", err)
57	}
58	if !reflect.DeepEqual(smallSample, got) {
59		t.Errorf("expected %v, got: %v", smallSample, got)
60	}
61}
62
63func TestIteratorReset(t *testing.T) {
64	var buf bytes.Buffer
65	b, err := New(&buf, nil)
66	if err != nil {
67		t.Fatalf("error creating builder: %v", err)
68	}
69
70	err = insertStringMap(b, smallSample)
71	if err != nil {
72		t.Fatalf("error building: %v", err)
73	}
74
75	err = b.Close()
76	if err != nil {
77		t.Fatalf("error closing: %v", err)
78	}
79
80	fst, err := Load(buf.Bytes())
81	if err != nil {
82		t.Fatalf("error loading set: %v", err)
83	}
84
85	itr, err := fst.Iterator(nil, nil)
86	if err != nil {
87		t.Fatalf("error creating an iterator: %v", err)
88	}
89
90	buf.Reset()
91	b, err = New(&buf, nil)
92	if err != nil {
93		t.Fatalf("error creating builder: %v", err)
94	}
95
96	smallSample2 := map[string]uint64{
97		"bold": 25,
98		"last": 1,
99		"next": 500,
100		"tank": 0,
101	}
102	err = insertStringMap(b, smallSample2)
103	if err != nil {
104		t.Fatalf("error building: %v", err)
105	}
106
107	err = b.Close()
108	if err != nil {
109		t.Fatalf("error closing: %v", err)
110	}
111
112	fst, err = Load(buf.Bytes())
113	if err != nil {
114		t.Fatalf("error loading set: %v", err)
115	}
116
117	got := map[string]uint64{}
118	err = itr.Reset(fst, nil, nil, nil)
119	for err == nil {
120		key, val := itr.Current()
121		got[string(key)] = val
122		err = itr.Next()
123	}
124	if err != ErrIteratorDone {
125		t.Errorf("iterator error: %v", err)
126	}
127	if !reflect.DeepEqual(smallSample2, got) {
128		t.Errorf("expected %v, got: %v", smallSample2, got)
129	}
130
131}
132
133func TestIteratorStartKey(t *testing.T) {
134	var buf bytes.Buffer
135	b, err := New(&buf, nil)
136	if err != nil {
137		t.Fatalf("error creating builder: %v", err)
138	}
139
140	err = insertStringMap(b, smallSample)
141	if err != nil {
142		t.Fatalf("error building: %v", err)
143	}
144
145	err = b.Close()
146	if err != nil {
147		t.Fatalf("error closing: %v", err)
148	}
149
150	fst, err := Load(buf.Bytes())
151	if err != nil {
152		t.Fatalf("error loading set: %v", err)
153	}
154
155	// with start key < "mon", we should still get it
156	got := map[string]uint64{}
157	itr, err := fst.Iterator([]byte("a"), nil)
158	for err == nil {
159		key, val := itr.Current()
160		got[string(key)] = val
161		err = itr.Next()
162	}
163	if err != ErrIteratorDone {
164		t.Errorf("iterator error: %v", err)
165	}
166	if !reflect.DeepEqual(smallSample, got) {
167		t.Errorf("expected %v, got: %v", smallSample, got)
168	}
169
170	// with start key = "mon", we should still get it
171	got = map[string]uint64{}
172	itr, err = fst.Iterator([]byte("mon"), nil)
173	for err == nil {
174		key, val := itr.Current()
175		got[string(key)] = val
176		err = itr.Next()
177	}
178	if err != ErrIteratorDone {
179		t.Errorf("iterator error: %v", err)
180	}
181	if !reflect.DeepEqual(smallSample, got) {
182		t.Errorf("expected %v, got: %v", smallSample, got)
183	}
184
185	// with start key > "mon", we don't expect to get it
186	expect := map[string]uint64{
187		"tues":  smallSample["tues"],
188		"thurs": smallSample["thurs"],
189		"tye":   smallSample["tye"],
190	}
191	got = map[string]uint64{}
192	itr, err = fst.Iterator([]byte("mona"), nil)
193	for err == nil {
194		key, val := itr.Current()
195		got[string(key)] = val
196		err = itr.Next()
197	}
198	if err != ErrIteratorDone {
199		t.Errorf("iterator error: %v", err)
200	}
201	if !reflect.DeepEqual(expect, got) {
202		t.Errorf("expected %v, got: %v", expect, got)
203	}
204
205	// with start key > "mon", we don't expect to get it
206	expect = map[string]uint64{
207		"tues":  smallSample["tues"],
208		"thurs": smallSample["thurs"],
209		"tye":   smallSample["tye"],
210	}
211	got = map[string]uint64{}
212	itr, err = fst.Iterator([]byte("my"), nil)
213	for err == nil {
214		key, val := itr.Current()
215		got[string(key)] = val
216		err = itr.Next()
217	}
218	if err != ErrIteratorDone {
219		t.Errorf("iterator error: %v", err)
220	}
221	if !reflect.DeepEqual(expect, got) {
222		t.Errorf("expected %v, got: %v", expect, got)
223	}
224}
225
226func TestIteratorEndKey(t *testing.T) {
227	var buf bytes.Buffer
228	b, err := New(&buf, nil)
229	if err != nil {
230		t.Fatalf("error creating builder: %v", err)
231	}
232
233	err = insertStringMap(b, smallSample)
234	if err != nil {
235		t.Fatalf("error building: %v", err)
236	}
237
238	err = b.Close()
239	if err != nil {
240		t.Fatalf("error closing: %v", err)
241	}
242
243	fst, err := Load(buf.Bytes())
244	if err != nil {
245		t.Fatalf("error loading set: %v", err)
246	}
247
248	// with end key > "tye", we should still get it
249	got := map[string]uint64{}
250	itr, err := fst.Iterator(nil, []byte("zeus"))
251	for err == nil {
252		key, val := itr.Current()
253		got[string(key)] = val
254		err = itr.Next()
255	}
256	if err != ErrIteratorDone {
257		t.Errorf("iterator error: %v", err)
258	}
259	if !reflect.DeepEqual(smallSample, got) {
260		t.Errorf("expected %v, got: %v", smallSample, got)
261	}
262
263	// with end key = "tye", we should NOT get it (end key exclusive)
264	expect := map[string]uint64{
265		"mon":   smallSample["mon"],
266		"tues":  smallSample["tues"],
267		"thurs": smallSample["thurs"],
268	}
269	got = map[string]uint64{}
270	itr, err = fst.Iterator(nil, []byte("tye"))
271	for err == nil {
272		key, val := itr.Current()
273		got[string(key)] = val
274		err = itr.Next()
275	}
276	if err != ErrIteratorDone {
277		t.Errorf("iterator error: %v", err)
278	}
279	if !reflect.DeepEqual(expect, got) {
280		t.Errorf("expected %v, got: %v", expect, got)
281	}
282
283	// with start key < "tye", we don't expect to get it
284	got = map[string]uint64{}
285	itr, err = fst.Iterator(nil, []byte("tv"))
286	for err == nil {
287		key, val := itr.Current()
288		got[string(key)] = val
289		err = itr.Next()
290	}
291	if err != ErrIteratorDone {
292		t.Errorf("iterator error: %v", err)
293	}
294	if !reflect.DeepEqual(expect, got) {
295		t.Errorf("expected %v, got: %v", expect, got)
296	}
297}
298
299func TestIteratorSeek(t *testing.T) {
300	var buf bytes.Buffer
301	b, err := New(&buf, nil)
302	if err != nil {
303		t.Fatalf("error creating builder: %v", err)
304	}
305
306	err = insertStringMap(b, smallSample)
307	if err != nil {
308		t.Fatalf("error building: %v", err)
309	}
310
311	err = b.Close()
312	if err != nil {
313		t.Fatalf("error closing: %v", err)
314	}
315
316	fst, err := Load(buf.Bytes())
317	if err != nil {
318		t.Fatalf("error loading set: %v", err)
319	}
320
321	// seek past thurs (exactly to tues)
322	expect := map[string]uint64{
323		"mon":  smallSample["mon"],
324		"tues": smallSample["tues"],
325		"tye":  smallSample["tye"],
326	}
327	got := map[string]uint64{}
328	itr, err := fst.Iterator(nil, nil)
329	for err == nil {
330		key, val := itr.Current()
331		got[string(key)] = val
332
333		if string(key) == "mon" {
334			err = itr.Seek([]byte("tue"))
335		} else {
336			err = itr.Next()
337		}
338	}
339	if err != ErrIteratorDone {
340		t.Errorf("iterator error: %v", err)
341	}
342	if !reflect.DeepEqual(expect, got) {
343		t.Errorf("expected %v, got: %v", expect, got)
344	}
345
346	// similar but seek to something after thurs before tues
347	got = map[string]uint64{}
348	itr, err = fst.Iterator(nil, nil)
349	for err == nil {
350		key, val := itr.Current()
351		got[string(key)] = val
352
353		if string(key) == "mon" {
354			err = itr.Seek([]byte("thv"))
355		} else {
356			err = itr.Next()
357		}
358	}
359	if err != ErrIteratorDone {
360		t.Errorf("iterator error: %v", err)
361	}
362	if !reflect.DeepEqual(expect, got) {
363		t.Errorf("expected %v, got: %v", expect, got)
364	}
365
366	// similar but seek to thurs+suffix
367	got = map[string]uint64{}
368	itr, err = fst.Iterator(nil, nil)
369	for err == nil {
370		key, val := itr.Current()
371		got[string(key)] = val
372
373		if string(key) == "mon" {
374			err = itr.Seek([]byte("thursday"))
375		} else {
376			err = itr.Next()
377		}
378	}
379	if err != ErrIteratorDone {
380		t.Errorf("iterator error: %v", err)
381	}
382	if !reflect.DeepEqual(expect, got) {
383		t.Errorf("expected %v, got: %v", expect, got)
384	}
385
386	// seek past last key (still inside iterator boundaries)
387	expect = map[string]uint64{
388		"mon": smallSample["mon"],
389	}
390	got = map[string]uint64{}
391	itr, err = fst.Iterator(nil, nil)
392	for err == nil {
393		key, val := itr.Current()
394		got[string(key)] = val
395
396		if string(key) == "mon" {
397			err = itr.Seek([]byte("zzz"))
398		} else {
399			err = itr.Next()
400		}
401	}
402	if err != ErrIteratorDone {
403		t.Errorf("iterator error: %v", err)
404	}
405	if !reflect.DeepEqual(expect, got) {
406		t.Errorf("expected %v, got: %v", expect, got)
407	}
408}
409
410func TestIteratorSeekOutsideBoundaries(t *testing.T) {
411	var buf bytes.Buffer
412	b, err := New(&buf, nil)
413	if err != nil {
414		t.Fatalf("error creating builder: %v", err)
415	}
416
417	err = insertStringMap(b, smallSample)
418	if err != nil {
419		t.Fatalf("error building: %v", err)
420	}
421
422	err = b.Close()
423	if err != nil {
424		t.Fatalf("error closing: %v", err)
425	}
426
427	fst, err := Load(buf.Bytes())
428	if err != nil {
429		t.Fatalf("error loading set: %v", err)
430	}
431
432	// first test with boundaries should just see thurs/tues
433	expect := map[string]uint64{
434		"thurs": smallSample["thurs"],
435		"tues":  smallSample["tues"],
436	}
437	got := map[string]uint64{}
438	itr, err := fst.Iterator([]byte("th"), []byte("tuesd"))
439	for err == nil {
440		key, val := itr.Current()
441		got[string(key)] = val
442		err = itr.Next()
443	}
444	if err != ErrIteratorDone {
445		t.Errorf("iterator error: %v", err)
446	}
447	if !reflect.DeepEqual(expect, got) {
448		t.Errorf("expected %v, got: %v", expect, got)
449	}
450
451	// this time try to seek before the start,
452	// still shouldn't see mon
453	got = map[string]uint64{}
454	itr, err = fst.Iterator([]byte("th"), []byte("tuesd"))
455	if err != nil {
456		t.Fatalf("error before seeking: %v", err)
457	}
458	err = itr.Seek([]byte("cat"))
459	for err == nil {
460		key, val := itr.Current()
461		got[string(key)] = val
462		err = itr.Next()
463	}
464	if err != ErrIteratorDone {
465		t.Errorf("iterator error: %v", err)
466	}
467	if !reflect.DeepEqual(expect, got) {
468		t.Errorf("expected %v, got: %v", expect, got)
469	}
470
471	// this time try to seek past the end
472	// should see nothing
473
474	itr, err = fst.Iterator([]byte("th"), []byte("tuesd"))
475	if err != nil {
476		t.Fatalf("error before seeking: %v", err)
477	}
478	err = itr.Seek([]byte("ty"))
479	if err != ErrIteratorDone {
480		t.Fatalf("expected ErrIteratorDone, got %v", err)
481	}
482}
483
484var key []byte
485var val uint64
486
487func BenchmarkFSTIteratorAllInMem(b *testing.B) {
488	// first build the FST once
489	dataset := thousandTestWords
490	randomThousandVals := randomValues(dataset)
491	var buf bytes.Buffer
492	builder, err := New(&buf, nil)
493	if err != nil {
494		b.Fatalf("error creating builder: %v", err)
495	}
496	err = insertStrings(builder, dataset, randomThousandVals)
497	if err != nil {
498		b.Fatalf("error inserting thousand words: %v", err)
499	}
500	err = builder.Close()
501	if err != nil {
502		b.Fatalf("error closing builder: %v", err)
503	}
504
505	b.ResetTimer()
506
507	for i := 0; i < b.N; i++ {
508
509		fst, err := Load(buf.Bytes())
510		if err != nil {
511			b.Fatalf("error loading FST: %v", err)
512		}
513
514		itr, err := fst.Iterator(nil, nil)
515		for err == nil {
516			key, val = itr.Current()
517			err = itr.Next()
518		}
519		if err != ErrIteratorDone {
520			b.Fatalf("iterator error: %v", err)
521		}
522
523		err = fst.Close()
524		if err != nil {
525			b.Fatalf("error closing FST: %v", err)
526		}
527
528	}
529}
530
531func TestFuzzySearch(t *testing.T) {
532	var buf bytes.Buffer
533	b, err := New(&buf, nil)
534	if err != nil {
535		t.Fatalf("error creating builder: %v", err)
536	}
537
538	err = insertStringMap(b, smallSample)
539	if err != nil {
540		t.Fatalf("error building: %v", err)
541	}
542
543	err = b.Close()
544	if err != nil {
545		t.Fatalf("error closing: %v", err)
546	}
547
548	fst, err := Load(buf.Bytes())
549	if err != nil {
550		t.Fatalf("error loading set: %v", err)
551	}
552
553	lb, err := levenshtein.NewLevenshteinAutomatonBuilder(uint8(1), false)
554	if err != nil {
555		t.Fatalf("error loading set: %v", err)
556	}
557
558	fuzzy, err := lb.BuildDfa("tue", 1)
559	if err != nil {
560		t.Fatalf("error building levenshtein automaton: %v", err)
561	}
562
563	want := map[string]uint64{
564		"tues": 3,
565		"tye":  99,
566	}
567	got := map[string]uint64{}
568	itr, err := fst.Search(fuzzy, nil, nil)
569	for err == nil {
570		key, val := itr.Current()
571		got[string(key)] = val
572		err = itr.Next()
573	}
574	if err != ErrIteratorDone {
575		t.Errorf("iterator error: %v", err)
576	}
577	if !reflect.DeepEqual(want, got) {
578		t.Errorf("expected %v, got: %v", want, got)
579	}
580}
581
582func TestRegexpSearch(t *testing.T) {
583	var buf bytes.Buffer
584	b, err := New(&buf, nil)
585	if err != nil {
586		t.Fatalf("error creating builder: %v", err)
587	}
588
589	err = insertStringMap(b, smallSample)
590	if err != nil {
591		t.Fatalf("error building: %v", err)
592	}
593
594	err = b.Close()
595	if err != nil {
596		t.Fatalf("error closing: %v", err)
597	}
598
599	fst, err := Load(buf.Bytes())
600	if err != nil {
601		t.Fatalf("error loading set: %v", err)
602	}
603
604	r, err := regexp.New(`t.*s`)
605	if err != nil {
606		t.Fatalf("error building regexp automaton: %v", err)
607	}
608
609	want := map[string]uint64{
610		"thurs": 5,
611		"tues":  3,
612	}
613
614	got := map[string]uint64{}
615	itr, err := fst.Search(r, nil, nil)
616	for err == nil {
617		key, val := itr.Current()
618		got[string(key)] = val
619		err = itr.Next()
620	}
621	if err != ErrIteratorDone {
622		t.Errorf("iterator error: %v", err)
623	}
624	if !reflect.DeepEqual(want, got) {
625		t.Errorf("expected %v, got: %v", want, got)
626	}
627
628	got = map[string]uint64{}
629	itr, err = fst.Search(r, []byte("t"), nil)
630	for err == nil {
631		key, val := itr.Current()
632		got[string(key)] = val
633		err = itr.Next()
634	}
635	if err != ErrIteratorDone {
636		t.Errorf("iterator error: %v", err)
637	}
638	if !reflect.DeepEqual(want, got) {
639		t.Errorf("with start key t, expected %v, got: %v", want, got)
640	}
641
642	got = map[string]uint64{}
643	itr, err = fst.Search(r, nil, []byte("u"))
644	for err == nil {
645		key, val := itr.Current()
646		got[string(key)] = val
647		err = itr.Next()
648	}
649	if err != ErrIteratorDone {
650		t.Errorf("iterator error: %v", err)
651	}
652	if !reflect.DeepEqual(want, got) {
653		t.Errorf("with end key u, expected %v, got: %v", want, got)
654	}
655
656	got = map[string]uint64{}
657	itr, err = fst.Search(r, []byte("t"), []byte("u"))
658	for err == nil {
659		key, val := itr.Current()
660		got[string(key)] = val
661		err = itr.Next()
662	}
663	if err != ErrIteratorDone {
664		t.Errorf("iterator error: %v", err)
665	}
666	if !reflect.DeepEqual(want, got) {
667		t.Errorf("with start key t, end key u, expected %v, got: %v", want, got)
668	}
669}
670