1// Copyright © 2016 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package cmd
16
17import (
18	"bufio"
19	"encoding/json"
20	"fmt"
21	"math/rand"
22	"os"
23
24	"github.com/spf13/cobra"
25)
26
27var batchSize int
28
29// bulkCmd represents the bulk command
30var bulkCmd = &cobra.Command{
31	Use:   "bulk [index path] [data paths ...]",
32	Short: "bulk loads from newline delimited JSON files",
33	Long:  `The bulk command will perform batch loading of documents in one or more newline delimited JSON files.`,
34	Annotations: map[string]string{
35		canMutateBleveIndex: "true",
36	},
37	RunE: func(cmd *cobra.Command, args []string) error {
38		if len(args) < 2 {
39			return fmt.Errorf("must specify at least one path")
40		}
41
42		i := 0
43		batch := idx.NewBatch()
44
45		for _, file := range args[1:] {
46
47			file, err := os.Open(file)
48			if err != nil {
49				return err
50			}
51
52			fmt.Printf("Indexing: %s\n", file.Name())
53			r := bufio.NewReader(file)
54
55			for {
56				if i%batchSize == 0 {
57					fmt.Printf("Indexing batch (%d docs)...\n", i)
58					err := idx.Batch(batch)
59					if err != nil {
60						return err
61					}
62					batch = idx.NewBatch()
63				}
64
65				b, _ := r.ReadBytes('\n')
66				if len(b) == 0 {
67					break
68				}
69
70				var doc interface{}
71				doc = b
72				var err error
73				if parseJSON {
74					err = json.Unmarshal(b, &doc)
75					if err != nil {
76						return fmt.Errorf("error parsing JSON: %v", err)
77					}
78				}
79
80				docID := randomString(5)
81				err = batch.Index(docID, doc)
82				if err != nil {
83					return err
84				}
85				i++
86			}
87			err = idx.Batch(batch)
88			if err != nil {
89				return err
90			}
91
92			err = file.Close()
93			if err != nil {
94				return err
95			}
96
97		}
98		return nil
99	},
100}
101
102var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
103
104func randomString(n int) string {
105	b := make([]rune, n)
106	for i := range b {
107		b[i] = letters[rand.Intn(len(letters))]
108	}
109	return string(b)
110}
111
112func init() {
113	RootCmd.AddCommand(bulkCmd)
114
115	bulkCmd.Flags().IntVarP(&batchSize, "batch", "b", 1000, "Batch size for loading, default 1000.")
116	bulkCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON, defaults true.")
117}
118