1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3/**
4 * @copyright 2014 Couchbase, Inc.
5 *
6 * @author Sarath Lakshman  <sarath@couchbase.com>
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
9 * use this file except in compliance with the License. You may obtain a copy of
10 * the License at
11 *
12 *  http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
17 * License for the specific language governing permissions and limitations under
18 * the License.
19 **/
20
21#include "config.h"
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include "macros.h"
26#include "../src/file_merger.h"
27#include "file_tests.h"
28
29#define N_FILES 4
30#define MAX_RECORDS_PER_FILE 10000
31
32typedef struct {
33    int key;
34    int fileno;
35} test_record_t;
36
37static int read_record(FILE *f, void **buffer, void *ctx)
38{
39    test_record_t *rec = (test_record_t *) malloc(sizeof(test_record_t));
40    (void) ctx;
41
42    if (rec == NULL) {
43        return FILE_MERGER_ERROR_ALLOC;
44    }
45
46    if (fread(rec, sizeof(test_record_t), 1, f) != 1) {
47        free(rec);
48        if (feof(f)) {
49            return 0;
50        } else {
51            return FILE_MERGER_ERROR_FILE_READ;
52        }
53    }
54
55    *buffer = rec;
56
57    return sizeof(test_record_t);
58}
59
60static file_merger_error_t write_record(FILE *f, void *buffer, void *ctx)
61{
62    (void) ctx;
63
64    if (fwrite(buffer, sizeof(test_record_t), 1, f) != 1) {
65        return FILE_MERGER_ERROR_FILE_WRITE;
66    }
67
68    return FILE_MERGER_SUCCESS;
69}
70
71static int compare_records(const void *rec1, const void *rec2, void *ctx)
72{
73    int ret;
74    test_record_t *a, *b;
75    (void) ctx;
76
77    a = (test_record_t *) rec1;
78    b = (test_record_t *) rec2;
79
80    return a->key - b->key;
81
82    return ret;
83}
84
85static void free_record(void *rec, void *ctx)
86{
87   (void) ctx;
88
89   free(rec);
90}
91
92static size_t dedup_records(file_merger_record_t **records, size_t n, void *ctx)
93{
94    size_t max = 0;
95    size_t i;
96    (void) ctx;
97
98    for (i = 1; i < n; i++) {
99        if (records[max]->filenum < records[i]->filenum) {
100            max = i;
101        }
102    }
103
104    return max;
105}
106
107static void check_deduped_file(const char *file_path, int *expected_set, int len)
108{
109    FILE *f;
110    test_record_t *rec;
111    int record_size = 1;
112    size_t i;
113    unsigned long num_records = 0;
114
115    f = fopen(file_path, "rb");
116    assert(f != NULL);
117
118    while (record_size > 0) {
119        record_size = read_record(f, (void **) &rec, NULL);
120        assert(record_size >= 0);
121
122        if (record_size > 0) {
123            if (rec->key % 40 == 0) {
124                assert(rec->fileno == 4);
125            } else if (rec->key % 20 == 0) {
126                assert(rec->fileno == 3);
127            } else if (rec->key % 10 == 0) {
128               assert(rec->fileno == 2);
129            } else {
130                assert(rec->fileno == 1);
131            }
132
133            assert(expected_set[rec->key]);
134            num_records++;
135            free_record((void *) rec, NULL);
136        }
137    }
138
139    /* Verify count */
140    for (i = 0; i < len; i++) {
141        if (expected_set[i]) {
142            num_records--;
143        }
144    }
145
146    assert(num_records == 0);
147
148    fclose(f);
149}
150
151
152void file_deduper_tests(void)
153{
154    const char *source_files[N_FILES] = {
155        "sorted_file_1.tmp",
156        "sorted_file_2.tmp",
157        "sorted_file_3.tmp",
158        "sorted_file_4.tmp"
159    };
160    const char *dest_file = "merged_file.tmp";
161    unsigned i, j;
162    file_merger_error_t ret;
163    test_record_t rec;
164    int key;
165    int multiples[] = {5, 10, 20, 40};
166    int max_arr_size = 40 * MAX_RECORDS_PER_FILE + 1;
167    int *expected_result = calloc(40 * MAX_RECORDS_PER_FILE + 1, sizeof(int));
168    assert(expected_result != NULL);
169
170    fprintf(stderr, "\nRunning file deduper tests...\n");
171
172    for (i = 0; i < N_FILES; ++i) {
173        FILE *f;
174
175        remove(source_files[i]);
176        f = fopen(source_files[i], "ab");
177        assert(f != NULL);
178
179        for (j = 0; j < MAX_RECORDS_PER_FILE; ++j) {
180            key = multiples[i] * (j + 1);
181            rec.key = key;
182            rec.fileno = i + 1;
183            assert(fwrite(&rec, sizeof(test_record_t), 1, f) == 1);
184            expected_result[key] = 1;
185        }
186
187        fclose(f);
188    }
189
190    remove(dest_file);
191    ret = merge_files(source_files, N_FILES,
192                      dest_file,
193                      read_record, write_record, NULL, compare_records,
194                      dedup_records, free_record, 0, NULL);
195
196    assert(ret == FILE_MERGER_SUCCESS);
197    check_deduped_file(dest_file, expected_result, max_arr_size);
198
199    for (i = 0; i < N_FILES; ++i) {
200        remove(source_files[i]);
201    }
202    remove(dest_file);
203
204    fprintf(stderr, "Running file deduper tests passed\n\n");
205    free(expected_result);
206}
207