1//  Copyright (c) 2014 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package snowball
16
17import (
18	"fmt"
19
20	"github.com/blevesearch/bleve/analysis"
21	"github.com/blevesearch/bleve/registry"
22
23	"github.com/kljensen/snowball"
24)
25
26const Name = "stemmer_snowball"
27
28type SnowballStemmer struct {
29	langauge string
30}
31
32func NewSnowballStemmer(language string) *SnowballStemmer {
33	return &SnowballStemmer{
34		langauge: language,
35	}
36}
37
38func (s *SnowballStemmer) Filter(input analysis.TokenStream) analysis.TokenStream {
39	for _, token := range input {
40		// if it is not a protected keyword, stem it
41		if !token.KeyWord {
42			stemmed, _ := snowball.Stem(string(token.Term), s.langauge, true)
43			token.Term = []byte(stemmed)
44		}
45	}
46	return input
47}
48
49func SnowballStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
50	language, ok := config["language"].(string)
51	if !ok {
52		return nil, fmt.Errorf("must specify language")
53	}
54	return NewSnowballStemmer(language), nil
55}
56
57func init() {
58	registry.RegisterTokenFilter(Name, SnowballStemmerConstructor)
59}
60