1 /* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  *     Copyright 2016 Couchbase, Inc
4  *
5  *   Licensed under the Apache License, Version 2.0 (the "License");
6  *   you may not use this file except in compliance with the License.
7  *   You may obtain a copy of the License at
8  *
9  *       http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *   Unless required by applicable law or agreed to in writing, software
12  *   distributed under the License is distributed on an "AS IS" BASIS,
13  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *   See the License for the specific language governing permissions and
15  *   limitations under the License.
16  */
17 #include "couchstore_config.h"
18 #include <fcntl.h>
19 #include <errno.h>
20 #include <phosphor/phosphor.h>
21 #include <platform/cbassert.h>
22 #include <sys/types.h>
23 #include "crc32.h"
24 
25 #include "internal.h"
26 
27 #undef LOG_IO
28 #ifdef LOG_IO
29 #include <stdio.h>
30 #endif
31 
save_errno(couchstore_error_info_t *errinfo)32 static void save_errno(couchstore_error_info_t *errinfo) {
33     if (errinfo) {
34         errinfo->error = errno;
35     }
36 }
37 
38 class PosixFileOps : public FileOpsInterface {
39 public:
PosixFileOps()40     PosixFileOps() {}
41 
42     couch_file_handle constructor(couchstore_error_info_t* errinfo) override ;
43     couchstore_error_t open(couchstore_error_info_t* errinfo,
44                             couch_file_handle* handle, const char* path,
45                             int oflag) override;
46     couchstore_error_t close(couchstore_error_info_t* errinfo,
47                              couch_file_handle handle) override;
48     couchstore_error_t set_periodic_sync(couch_file_handle handle,
49                                          uint64_t period_bytes) override;
50     couchstore_error_t set_tracing_enabled(couch_file_handle handle) override;
51     couchstore_error_t set_write_validation_enabled(
52             couch_file_handle handle) override;
53     couchstore_error_t set_mprotect_enabled(couch_file_handle handle) override;
54     ssize_t pread(couchstore_error_info_t* errinfo,
55                   couch_file_handle handle, void* buf, size_t nbytes,
56                   cs_off_t offset) override;
57     ssize_t pwrite(couchstore_error_info_t* errinfo,
58                    couch_file_handle handle, const void* buf,
59                    size_t nbytes, cs_off_t offset) override;
60     cs_off_t goto_eof(couchstore_error_info_t* errinfo,
61                       couch_file_handle handle) override;
62     couchstore_error_t sync(couchstore_error_info_t* errinfo,
63                             couch_file_handle handle) override;
64     couchstore_error_t advise(couchstore_error_info_t* errinfo,
65                               couch_file_handle handle, cs_off_t offset,
66                               cs_off_t len,
67                               couchstore_file_advice_t advice) override;
68     void destructor(couch_file_handle handle) override;
69 
70 private:
71     // State of a single file handle, as returned by open().
72     struct File {
FilePosixFileOps::File73         File(int fd = -1) : fd(fd) {
74         }
75 
76         /// File descriptor to operate on.
77         int fd;
78 
79         // If non-zero, specifies that sync() should automatically be called after
80         // every N bytes are written.
81         uint64_t periodic_sync_bytes = 0;
82 
83         // for tracing and verifying
84         bool tracing_enabled = false;
85         bool write_validation_enabled = false;
86         bool mprotect_enabled = false;
87 
88         // Count of how many bytes have been written since the last sync().
89         uint64_t bytes_written_since_last_sync = 0;
90     };
91 
to_file(couch_file_handle handle)92     static File* to_file(couch_file_handle handle)
93     {
94         return reinterpret_cast<File*>(handle);
95     }
96 };
97 
pread(couchstore_error_info_t* errinfo, couch_file_handle handle, void* buf, size_t nbyte, cs_off_t offset)98 ssize_t PosixFileOps::pread(couchstore_error_info_t* errinfo,
99                             couch_file_handle handle,
100                             void* buf,
101                             size_t nbyte,
102                             cs_off_t offset)
103 {
104 #ifdef LOG_IO
105     fprintf(stderr, "PREAD  %8llx -- %8llx  (%6.1f kbytes)\n", offset,
106             offset+nbyte, nbyte/1024.0);
107 #endif
108     auto* file = to_file(handle);
109     ssize_t rv;
110     do {
111         rv = ::pread(file->fd, buf, nbyte, offset);
112     } while (rv == -1 && errno == EINTR);
113 
114     if (rv < 0) {
115         save_errno(errinfo);
116         return (ssize_t) COUCHSTORE_ERROR_READ;
117     }
118     return rv;
119 }
120 
pwrite(couchstore_error_info_t* errinfo, couch_file_handle handle, const void* buf, size_t nbyte, cs_off_t offset)121 ssize_t PosixFileOps::pwrite(couchstore_error_info_t* errinfo,
122                              couch_file_handle handle,
123                              const void* buf,
124                              size_t nbyte,
125                              cs_off_t offset)
126 {
127 #ifdef LOG_IO
128     fprintf(stderr, "PWRITE %8llx -- %8llx  (%6.1f kbytes)\n", offset,
129             offset+nbyte, nbyte/1024.0);
130 #endif
131     auto* file = to_file(handle);
132     ssize_t rv;
133     do {
134         rv = ::pwrite(file->fd, buf, nbyte, offset);
135     } while (rv == -1 && errno == EINTR);
136 
137     if (rv < 0) {
138         if (file->tracing_enabled) {
139             TRACE_INSTANT2("couchstore_write",
140                            "pwrite_failure",
141                            "offset",
142                            offset,
143                            "nbyte",
144                            nbyte);
145         }
146         save_errno(errinfo);
147         return (ssize_t) COUCHSTORE_ERROR_WRITE;
148     }
149 
150     if (file->write_validation_enabled) {
151         /* read the rv bytes or 8K bytes written and compare to verify that
152            we read what we wrote */
153         size_t len = (rv < 8192) ? rv : 8192;
154         size_t len_to_read = len;
155         char tmp[8192];
156         char* read_buf = tmp;
157         cs_off_t read_offset = offset;
158         while (len > 0) {
159             ssize_t got_bytes = ::pread(file->fd, read_buf, len, read_offset);
160             if ((file->tracing_enabled) && (got_bytes <= 0)) {
161                 TRACE_INSTANT1("couchstore_write",
162                                "pwrite_verify_CRC_mismatch",
163                                "read_offset",
164                                read_offset);
165                 break;
166             }
167             len -= got_bytes;
168             read_offset += got_bytes;
169             read_buf = (char*)read_buf + got_bytes;
170         }
171 
172         uint32_t write_crc32 =
173                 get_checksum(reinterpret_cast<uint8_t*>(const_cast<void*>(buf)),
174                              len_to_read,
175                              CRC32C);
176         uint32_t read_crc32 = get_checksum(
177                 reinterpret_cast<uint8_t*>(tmp), len_to_read, CRC32C);
178 
179         if (write_crc32 != read_crc32) {
180             if (file->tracing_enabled) {
181                 TRACE_INSTANT2("couchstore_write",
182                                "pwrite",
183                                "read_crc",
184                                read_crc32,
185                                "write_crc",
186                                write_crc32);
187             }
188             PHOSPHOR_INSTANCE.stop();
189             return (ssize_t)COUCHSTORE_ERROR_CHECKSUM_FAIL;
190         }
191     }
192     file->bytes_written_since_last_sync += rv;
193     if ((file->periodic_sync_bytes > 0) &&
194         (file->bytes_written_since_last_sync >= file->periodic_sync_bytes)) {
195         couchstore_error_t sync_rv = sync(errinfo, handle);
196         file->bytes_written_since_last_sync = 0;
197         if (sync_rv != COUCHSTORE_SUCCESS) {
198             return sync_rv;
199         }
200     }
201 
202     return rv;
203 }
204 
open(couchstore_error_info_t* errinfo, couch_file_handle* handle, const char* path, int oflag)205 couchstore_error_t PosixFileOps::open(couchstore_error_info_t* errinfo,
206                                       couch_file_handle* handle,
207                                       const char* path,
208                                       int oflag)
209 {
210     auto* file = to_file(*handle);
211     if (file) {
212         cb_assert(file->fd == -1);
213         delete file;
214         *handle = nullptr;
215     }
216 
217     int fd;
218     do {
219         fd = ::open(path, oflag | O_LARGEFILE, 0666);
220     } while (fd == -1 && errno == EINTR);
221 
222     if (fd < 0) {
223         save_errno(errinfo);
224         if (errno == ENOENT) {
225             return COUCHSTORE_ERROR_NO_SUCH_FILE;
226         } else {
227             return COUCHSTORE_ERROR_OPEN_FILE;
228         }
229     }
230     /* Tell the caller about the new handle (file descriptor) */
231     file = new File(fd);
232     *handle = reinterpret_cast<couch_file_handle>(file);
233     return COUCHSTORE_SUCCESS;
234 }
235 
close(couchstore_error_info_t* errinfo, couch_file_handle handle)236 couchstore_error_t PosixFileOps::close(couchstore_error_info_t* errinfo,
237                                        couch_file_handle handle)
238 {
239     auto* file = to_file(handle);
240     int rv = 0;
241     couchstore_error_t error = COUCHSTORE_SUCCESS;
242 
243     if (file->fd != -1) {
244         do {
245             cb_assert(file->fd >= 3);
246             rv = ::close(file->fd);
247         } while (rv == -1 && errno == EINTR);
248     }
249     if (rv < 0) {
250         save_errno(errinfo);
251         error = COUCHSTORE_ERROR_FILE_CLOSE;
252     }
253     file->fd = -1;
254     return error;
255 }
256 
set_periodic_sync(couch_file_handle handle, uint64_t period_bytes)257 couchstore_error_t PosixFileOps::set_periodic_sync(couch_file_handle handle,
258                                                    uint64_t period_bytes) {
259     auto* file = to_file(handle);
260     file->periodic_sync_bytes = period_bytes;
261     return COUCHSTORE_SUCCESS;
262 }
263 
set_tracing_enabled(couch_file_handle handle)264 couchstore_error_t PosixFileOps::set_tracing_enabled(couch_file_handle handle) {
265     auto* file = to_file(handle);
266     file->tracing_enabled = true;
267     return COUCHSTORE_SUCCESS;
268 }
269 
set_write_validation_enabled( couch_file_handle handle)270 couchstore_error_t PosixFileOps::set_write_validation_enabled(
271         couch_file_handle handle) {
272     auto* file = to_file(handle);
273     file->write_validation_enabled = true;
274     return COUCHSTORE_SUCCESS;
275 }
276 
set_mprotect_enabled( couch_file_handle handle)277 couchstore_error_t PosixFileOps::set_mprotect_enabled(
278         couch_file_handle handle) {
279     auto* file = to_file(handle);
280     file->mprotect_enabled = true;
281     return COUCHSTORE_SUCCESS;
282 }
goto_eof(couchstore_error_info_t* errinfo, couch_file_handle handle)283 cs_off_t PosixFileOps::goto_eof(couchstore_error_info_t* errinfo,
284                                 couch_file_handle handle)
285 {
286     auto* file = to_file(handle);
287     cs_off_t rv = lseek(file->fd, 0, SEEK_END);
288     if (rv < 0) {
289         save_errno(errinfo);
290         rv = static_cast<cs_off_t>(COUCHSTORE_ERROR_READ);
291     }
292     return rv;
293 }
294 
295 
sync(couchstore_error_info_t* errinfo, couch_file_handle handle)296 couchstore_error_t PosixFileOps::sync(couchstore_error_info_t* errinfo,
297                                       couch_file_handle handle)
298 {
299     auto* file = to_file(handle);
300     int rv;
301     do {
302 #ifdef __FreeBSD__
303         rv = fsync(file->fd);
304 #else
305         rv = fdatasync(file->fd);
306 #endif
307     } while (rv == -1 && errno == EINTR);
308 
309     if (rv == -1) {
310         TRACE_INSTANT1("couchstore_write", "sync", "rv", rv);
311         save_errno(errinfo);
312         return COUCHSTORE_ERROR_WRITE;
313     }
314 
315     return COUCHSTORE_SUCCESS;
316 }
317 
constructor(couchstore_error_info_t* errinfo)318 couch_file_handle PosixFileOps::constructor(couchstore_error_info_t* errinfo)
319 {
320     (void)errinfo;
321     return reinterpret_cast<couch_file_handle>(new File());
322 }
323 
destructor(couch_file_handle handle)324 void PosixFileOps::destructor(couch_file_handle handle) {
325     auto* file = to_file(handle);
326     delete file;
327 }
328 
advise(couchstore_error_info_t* errinfo, couch_file_handle handle, cs_off_t offset, cs_off_t len, couchstore_file_advice_t advice)329 couchstore_error_t PosixFileOps::advise(couchstore_error_info_t* errinfo,
330                                         couch_file_handle handle,
331                                         cs_off_t offset,
332                                         cs_off_t len,
333                                         couchstore_file_advice_t advice)
334 {
335 #ifdef POSIX_FADV_NORMAL
336     auto* file = to_file(handle);
337     int error = posix_fadvise(file->fd, offset, len, (int) advice);
338     if (error != 0) {
339         save_errno(errinfo);
340     }
341     switch(error) {
342         case EINVAL:
343         case ESPIPE:
344             return COUCHSTORE_ERROR_INVALID_ARGUMENTS;
345             break;
346         case EBADF:
347             return COUCHSTORE_ERROR_OPEN_FILE;
348             break;
349     }
350 #else
351     (void) handle; (void)offset; (void)len; (void)advice;
352     (void)errinfo;
353 #endif
354     return COUCHSTORE_SUCCESS;
355 }
356 
357 PosixFileOps default_file_ops;
358 
couchstore_get_default_file_ops(void)359 FileOpsInterface* couchstore_get_default_file_ops(void)
360 {
361     return &default_file_ops;
362 }
363 
create_default_file_ops(void)364 FileOpsInterface* create_default_file_ops(void)
365 {
366     return new PosixFileOps();
367 }
368