1/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3/**
4 * @copyright 2013 Couchbase, Inc.
5 *
6 * @author Filipe Manana  <filipe@couchbase.com>
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
9 * use this file except in compliance with the License. You may obtain a copy of
10 * the License at
11 *
12 *  http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
17 * License for the specific language governing permissions and limitations under
18 * the License.
19 **/
20
21#include "config.h"
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include "macros.h"
26#include "../src/file_sorter.h"
27#include "file_tests.h"
28
29#define UNSORTED_FILE_PATH "unsorted_file.data"
30#define SORT_TMP_DIR       "."
31
32static const int data[] = {
33    661, 900, 596, 881, 118, 494, 638, 814, 206, 1217, 160, 504, 259,
34    1038, 125, 849, 168, 216, 1002, 438, 726, 1156, 1162, 1210, 933, 320,
35    273, 1176, 516, 810, 957, 134, 348, 1173, 415, 266, 920, 392, 213, 958,
36    135, 892, 171, 293, 829, 447, 524, 520, 426, 866, 384, 562, 620, 1112,
37    220, 860, 446, 819, 1066, 508, 375, 232, 947, 846, 127, 165, 759, 1196,
38    937, 421, 1005, 963, 629, 1058, 35, 917, 636, 543, 938, 1021, 611, 582, 397,
39    329, 648, 84, 1200, 1143, 1076, 1192, 27, 274, 1123, 1006, 46, 836, 868,
40    1150, 718, 1061, 1171, 349, 450, 493, 1157, 970, 111, 668, 178, 359, 692, 663,
41    1064, 100, 678, 1023, 150, 783, 1015, 62, 1124, 506, 1168, 393, 1083, 894, 591,
42    529, 11, 51, 422, 475, 200, 1195, 519, 408, 16, 761, 950, 869, 746, 922, 271,
43    439, 433, 784, 141, 762, 749, 99, 1054, 872, 1087, 583, 828, 1216, 117, 850,
44    419, 991, 891, 627, 835, 381, 391, 297, 202, 73, 224, 1099, 223, 921, 733,
45    771, 197, 436, 1152, 727, 870, 357, 1114, 343, 7, 729, 145, 308, 941, 1053,
46    1161, 276, 724, 1069, 853, 734, 131, 987, 1081, 559, 1118, 91, 477, 976, 396,
47    1180, 366, 874, 251, 635, 852, 622, 988, 693, 1075, 1085, 208, 1137, 1154, 264,
48    647, 341, 962, 474, 56, 1197, 839, 1004, 268, 371, 270, 766, 364, 52, 705, 90,
49    428, 1129, 1108, 672, 24, 862, 1011, 96, 600, 986, 1145, 1194, 34, 104, 137, 69,
50    247, 218, 20, 187, 885, 63, 186, 755, 687, 575, 775, 282, 602, 299, 943, 53, 183,
51    978, 471, 122, 389, 40, 61, 1204, 592, 227, 345, 334, 617, 742, 676, 1191, 683,
52    1078, 700, 1159, 103, 431, 1105, 538, 456, 210, 21, 568, 350, 544, 710, 579,
53    1453, 1378, 1348, 1319, 1425, 1261, 1333, 1419, 1482, 1499, 1360, 1267, 1451,
54    1417, 1265, 1416, 1427, 1329, 1332, 1287, 1359, 1223, 1262, 1492, 1324, 1278,
55    1318, 1235, 1286, 1420, 1219, 1367, 1382, 1328, 1229, 1237, 1396, 1409, 1236,
56    1298, 1302, 1459, 1271, 1430, 1246, 1282, 1350, 1299, 1325, 1345, 1322, 1323,
57    1234, 1245, 1424, 1291, 1486, 1500, 1357, 1394, 1468, 1444, 1493, 1330, 1273,
58    1268, 1300, 1498, 1490, 1257, 1343, 1320, 1458, 1294, 1386, 1308, 1230, 1281,
59    1392, 1341, 1411, 1388, 1475, 1445, 1311, 1403, 1344, 1252, 1362, 1377, 1436,
60    1407, 1327, 1225, 1276, 1440, 1354, 1480, 1387, 1421, 1331, 1321, 1503, 1402,
61    1240, 1375, 1438, 1347, 1342, 1264, 1288, 1488, 1275, 1243, 1380, 1460, 1248,
62    1254, 1244, 1272, 1304, 1352, 1452, 1485, 1233, 1253, 1435, 1469, 1307, 1431,
63    1358, 1462, 1434, 1501, 1389, 1372, 1391, 1479, 1401, 1270, 1338, 1363, 1306,
64    1364, 1249, 1303, 1406, 1450, 1353, 1222, 1274, 1484, 1346, 1335, 1231, 1494,
65    1412, 1463, 1337, 1376, 1428, 1466, 1433, 1310, 1368, 1383, 1400, 1293, 1242,
66    1371, 1393, 1260, 1442, 1220, 1283, 1258, 1477, 1361, 1351, 1285, 1495, 1226,
67    1410, 1397, 1256, 1414, 1472, 1284, 1218, 1399, 1314, 1297, 1385, 1465, 1390,
68    1443, 1461, 1301, 1464, 1384, 1449, 1266, 1446, 1478, 1405, 1356, 1250, 1373,
69    1309, 1255, 1340, 1423, 1413, 1224, 1315, 1447, 1365, 1415, 1355, 1239, 1317,
70    1374, 1277, 1238, 1221, 1296, 1251, 1336, 1269, 1489, 1491, 1370, 1437, 1441,
71    1481, 1305, 1316, 1496, 1502, 1290, 1334, 1295, 1471, 1448, 1422, 1247, 1487,
72    1455, 1483, 1408, 1395, 1418, 1432, 1404, 1326, 1457, 1470, 1456, 1313, 1349,
73    1312, 1228, 1369, 1292, 1263, 1454, 1280, 1381, 1474, 1289, 1476, 1426, 1279,
74    1429, 1473, 1467, 1366, 1398, 1227, 1339, 1232, 1379, 1259, 1439, 1241, 1497,
75    157, 930, 702, 207, 1084, 625, 1100, 542, 879, 573, 425, 188, 1012, 55, 344, 77,
76    1107, 1207, 1103, 281, 123, 863, 847, 32, 1205, 806, 159, 757, 6, 307, 1027,
77    1032, 237, 162, 598, 241, 563, 521, 72, 97, 1203, 116, 696, 317, 453, 129, 969,
78    33, 777, 594, 115, 442, 953, 511, 682, 476, 369, 310, 468, 164, 373, 763, 1160,
79    555, 1174, 915, 532, 1048, 198, 1208, 557, 240, 1151, 858, 361, 1198, 283, 518,
80    64, 840, 245, 721, 876, 1136, 951, 1018, 788, 296, 838, 946, 925, 634, 539, 1127,
81    175, 483, 253, 1056, 86, 626, 540, 756, 142, 708, 689, 646, 764, 1122, 203, 679,
82    398, 211, 13, 531, 18, 1022, 66, 842, 1202, 400, 437, 1102, 816, 657, 944, 1116,
83    509, 336, 410, 76, 798, 912, 904, 278, 688, 1172, 1045, 979, 136, 934, 14, 968,
84    716, 738, 380, 588, 554, 399, 1178, 205, 902, 230, 707, 737, 472, 671, 808, 57,
85    382, 286, 666, 1164, 765, 871, 1098, 98, 536, 10, 413, 105, 1169, 313, 458, 821,
86    658, 650, 301, 440, 25, 1063, 1167, 1155, 525, 38, 945, 550, 774, 479, 1133, 723,
87    898, 703, 1109, 901, 172, 260, 972, 394, 1209, 653, 769, 505, 748, 1187, 1072,
88    1179, 457, 499, 1033, 466, 156, 1186, 526, 507, 189, 337, 940, 1094, 1029, 287,
89    138, 124, 785, 985, 865, 631, 610, 395, 667, 939, 980, 590, 1090, 368, 1214, 153,
90    409, 214, 1057, 95, 256, 451, 388, 948, 151, 1211, 121, 387, 642, 1020, 551, 414,
91    864, 1034, 811, 363, 226, 851, 444, 130, 492, 290, 435, 1039, 78, 1074, 107, 577,
92    452, 548, 386, 490, 616, 952, 1016, 318, 607, 50, 1170, 621, 706, 942, 751, 736,
93    155, 416, 424, 929, 877, 528, 574, 834, 252, 491, 699, 1013, 262, 17, 429, 719,
94    656, 79, 385, 832, 463, 924, 744, 1138, 996, 637, 911, 29, 147, 833, 1135, 725,
95    109, 1183, 179, 166, 728, 12, 47, 31, 1095, 402, 931, 897, 906, 9, 1163, 530,
96    244, 780, 500, 23, 903, 830, 354, 427, 83, 586, 60, 106, 778, 487, 352, 303,
97    1128, 502, 54, 246, 730, 571, 430, 709, 975, 1003, 152, 1043, 465, 560, 215,
98    861, 608, 434, 263, 81, 1082, 1071, 460, 895, 731, 643, 695, 423, 70, 997, 316,
99    527, 1017, 623, 248, 441, 404, 522, 101, 831, 907, 919, 180, 691, 181, 279, 817,
100    845, 965, 379, 722, 319, 793, 1014, 813, 886, 45, 752, 681, 698, 995, 1199, 584,
101    677, 743, 561, 803, 28, 1126, 71, 974, 143, 1091, 908, 298, 374, 792, 217, 645,
102    192, 501, 541, 510, 1077, 802, 254, 685, 82, 110, 630, 534, 768, 1026, 790, 291,
103    228, 503, 280, 173, 229, 328, 325, 690, 674, 149, 1131, 1041, 741, 85, 1111, 243,
104    1079, 873, 406, 139, 1080, 119, 401, 332, 67, 546, 411, 893, 652, 604, 300, 15, 512,
105    789, 641, 1134, 801, 1009, 796, 1132, 1067, 1215, 570, 261, 1184, 747, 704, 250,
106    1110, 890, 959, 701, 1030, 201, 883, 1093, 1042, 694, 495, 1181, 285, 322, 443,
107    480, 306, 498, 132, 781, 578, 960, 30, 887, 321, 859, 174, 432, 1141, 351, 8, 1140,
108    1130, 330, 448, 65, 484, 1146, 1125, 613, 1153, 489, 185, 720, 128, 662, 5, 370,
109    1088, 339, 715, 896, 454, 49, 234, 609, 212, 1001, 889, 204, 177, 779, 552, 684,
110    984, 880, 473, 935, 405, 258, 566, 1051, 1189, 338, 74, 462, 1182, 1044, 191, 993,
111    169, 805, 664, 1206, 1046, 221, 639, 44, 927, 88, 1055, 323, 140, 309, 576, 1148,
112    717, 114, 928, 225, 327, 824, 867, 606, 910, 844, 739, 2, 1201, 19, 269, 497, 797,
113    488, 265, 644, 469, 856, 587, 331, 87, 649, 68, 916, 360, 572, 1101, 376, 711,
114    1089, 955, 449, 564, 1139, 112, 353, 686, 818, 1190, 712, 556, 651, 595, 1193, 144,
115    913, 1025, 787, 158, 553, 558, 966, 346, 961, 1068, 753, 37, 983, 113, 120, 461,
116    899, 1060, 760, 567, 284, 994, 378, 231, 990, 1147, 514, 655, 800, 1121, 1028, 39,
117    619, 517, 654, 815, 754, 882, 614, 825, 1113, 827, 355, 342, 549, 1000, 884, 513,
118    673, 714, 956, 812, 182, 347, 1008, 804, 981, 1117, 1, 304, 547, 75, 184, 1024,
119    971, 496, 1073, 597, 170, 255, 272, 1188, 275, 1175, 914, 823, 445, 26, 1185,
120    219, 794, 242, 954, 92, 697, 633, 855, 470, 1104, 982, 640, 1144, 786, 1050, 407,
121    89, 417, 22, 59, 848, 277, 267, 669, 909, 412, 773, 888, 235, 176, 973, 964, 675,
122    843, 601, 791, 199, 545, 593, 133, 324, 589, 660, 315, 193, 467, 326, 875, 999,
123    167, 1019, 878, 1115, 478, 209, 1106, 126, 239, 148, 770, 822, 998, 485, 305, 580,
124    1007, 1047, 809, 841, 1177, 195, 826, 420, 356, 732, 455, 992, 758, 1086, 1120,
125    367, 390, 335, 776, 362, 585, 857, 340, 767, 1092, 624, 713, 333, 745, 1065, 196,
126    383, 1142, 807, 154, 236, 294, 257, 1035, 58, 222, 967, 233, 249, 535, 820, 599,
127    481, 612, 618, 1031, 782, 1166, 302, 837, 565, 605, 905, 94, 1037, 48, 108, 680,
128    1049, 670, 936, 932, 1062, 36, 615, 750, 1165, 795, 194, 358, 3, 163, 80, 288,
129    740, 1119, 311, 1040, 464, 1036, 581, 735, 372, 923, 482, 537, 628, 289, 977,
130    1096, 295, 1097, 418, 93, 926, 569, 949, 312, 772, 292, 41, 1149, 1070, 799, 4,
131    1212, 365, 918, 190, 632, 102, 459, 533, 1010, 989, 523, 1213, 1059, 377, 403,
132    515, 314, 1052, 665, 1158, 238, 43, 161, 486, 854, 659, 42, 603, 146
133};
134
135static int *sorted_data;
136
137
138static int read_record(FILE *f, void **buffer, void *ctx)
139{
140    int *rec = (int *) malloc(sizeof(int));
141    (void) ctx;
142
143    if (rec == NULL) {
144        return FILE_MERGER_ERROR_ALLOC;
145    }
146
147    if (fread(rec, sizeof(int), 1, f) != 1) {
148        free(rec);
149        if (feof(f)) {
150            return 0;
151        } else {
152            return FILE_MERGER_ERROR_FILE_READ;
153        }
154    }
155
156    *buffer = rec;
157
158    return sizeof(int);
159}
160
161static file_merger_error_t write_record(FILE *f, void *buffer, void *ctx)
162{
163    (void) ctx;
164
165    if (fwrite(buffer, sizeof(int), 1, f) != 1) {
166        return FILE_MERGER_ERROR_FILE_WRITE;
167    }
168
169    return FILE_MERGER_SUCCESS;
170}
171
172static int compare_records(const void *rec1, const void *rec2, void *ctx)
173{
174    (void) ctx;
175
176    return *((const int *) rec1) - *((const int *) rec2);
177}
178
179static void free_record(void *rec, void *ctx)
180{
181   (void) ctx;
182
183   free(rec);
184}
185
186static int check_file_sorted(const char *file_path)
187{
188    FILE *f;
189    void *record;
190    int record_size;
191    unsigned nrecords = (unsigned) (sizeof(data) / sizeof(int));
192    unsigned i;
193
194    f = fopen(file_path, "rb");
195    assert(f != NULL);
196
197    for (i = 0; i < nrecords; ++i) {
198        record_size = read_record(f, &record, NULL);
199        assert(record_size == sizeof(int));
200        if (*((int *) record) != sorted_data[i]) {
201            fclose(f);
202            free_record(record, NULL);
203            return 0;
204        }
205        free_record(record, NULL);
206    }
207
208    /* Check file has no extra (duplicated or garbage) records. */
209    assert(read_record(f, &record, NULL) == 0);
210
211    fclose(f);
212
213    return 1;
214}
215
216
217static void create_file()
218{
219    unsigned i;
220    FILE *f;
221
222    remove(UNSORTED_FILE_PATH);
223    f = fopen(UNSORTED_FILE_PATH, "ab");
224    assert(f != NULL);
225
226    for (i = 0; i < (sizeof(data) / sizeof(int)); ++i) {
227        assert(fwrite(&data[i], sizeof(int), 1, f) == 1);
228    }
229
230    fclose(f);
231}
232
233static file_merger_error_t check_sorted_callback(void *buf, void *ctx)
234{
235    int *rec = (int *) buf, *i = (int *) ctx;
236    assert(sorted_data[*i] == *rec);
237    (*i)++;
238
239    return FILE_MERGER_SUCCESS;
240}
241
242static void test_file_sort(unsigned buffer_size,
243                           unsigned temp_files,
244                           file_merger_feed_record_t callback,
245                           int skip_writeback)
246{
247    file_sorter_error_t ret;
248    int i = 0;
249    create_file();
250
251    ret = sort_file(UNSORTED_FILE_PATH,
252                    SORT_TMP_DIR,
253                    temp_files,
254                    buffer_size,
255                    read_record,
256                    write_record,
257                    callback,
258                    compare_records,
259                    free_record,
260                    skip_writeback,
261                    &i);
262
263    assert(ret == FILE_SORTER_SUCCESS);
264
265    if (!skip_writeback) {
266        assert(check_file_sorted(UNSORTED_FILE_PATH));
267    } else {
268        assert(check_file_sorted(UNSORTED_FILE_PATH) == 0);
269    }
270
271    remove(UNSORTED_FILE_PATH);
272}
273
274
275static int int_cmp(const void *a, const void *b)
276{
277    return *((const int *) a) - *((const int *) b);
278}
279
280
281void file_sorter_tests(void)
282{
283    const unsigned temp_files[] = {
284        2, 3, 4, 5, 6, 7, 8, 9, 10
285    };
286    const unsigned buffer_sizes[] = {
287        sizeof(int) * 2,
288        sizeof(int) * 3,
289        (sizeof(int) - 1) * 7,
290        sizeof(int) * 10,
291        (sizeof(int) - 1) * 19,
292        sizeof(int) * 33,
293        (sizeof(int) - 1) * 99,
294        sizeof(int) * 1000000
295    };
296
297    unsigned i, j;
298    unsigned long nrecords = (unsigned long) (sizeof(data) / sizeof(int));
299
300    fprintf(stderr, "Running file sorter tests...\n");
301
302    sorted_data = (int *) malloc(sizeof(data));
303    assert(sorted_data != NULL);
304    memcpy(sorted_data, data, sizeof(data));
305    qsort(sorted_data, nrecords, sizeof(int), int_cmp);
306
307    for (i = 0; i < (sizeof(buffer_sizes) / sizeof(unsigned)); ++i) {
308        for (j = 0; j < (sizeof(temp_files) / sizeof(unsigned)); ++j) {
309            fprintf(stderr,
310            "Testing file sort (%lu records) with buffer size of %u bytes"
311            " and %u temporary files\n",
312            nrecords, buffer_sizes[i], temp_files[j]);
313            test_file_sort(buffer_sizes[i], temp_files[j], NULL, 0);
314        }
315    }
316
317    fprintf(stderr,
318            "Testing file sort callback (%lu records) with buffer size of %lu bytes"
319            " and %u temporary files\n",
320            nrecords, sizeof(int) * 501, 3);
321    test_file_sort(sizeof(int) * 501, 3, check_sorted_callback, 0);
322
323    fprintf(stderr,
324            "Testing file sort callback (%lu records) with buffer size of %lu bytes"
325            " and %u temporary files\n",
326            nrecords, sizeof(int) * 50, 10);
327    test_file_sort(sizeof(int) * 50, 10, check_sorted_callback, 0);
328
329
330    fprintf(stderr,
331            "Testing file sort callback with skip writeback (%lu records)"
332            "with buffer size of %lu bytes and %u temporary files\n",
333            nrecords, sizeof(int) * 501, 3);
334    test_file_sort(sizeof(int) * 501, 3, check_sorted_callback, 1);
335
336    fprintf(stderr,
337            "Testing file sort callback with skip writeback (%lu records)"
338            "with buffer size of %lu bytes and %u temporary files\n",
339            nrecords, sizeof(int) * 50, 10);
340    test_file_sort(sizeof(int) * 50, 10, check_sorted_callback, 1);
341
342    fprintf(stderr, "File sorter tests passed\n\n");
343}
344