00001 /* 00002 * libfid - Full-text Index Data structure library 00003 * Copyright (C) 2006, 2007, 2008 Robert Homann 00004 * 00005 * This library is free software; you can redistribute it and/or 00006 * modify it under the terms of the GNU Lesser General Public 00007 * License as published by the Free Software Foundation; either 00008 * version 2.1 of the License, or (at your option) any later version. 00009 * 00010 * This library is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 * Lesser General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public 00016 * License along with this library; if not, write to the Free Software 00017 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00018 * MA 02110-1301 USA 00019 */ 00020 00021 #ifndef FILEUTILS_H 00022 #define FILEUTILS_H 00023 /*!\addtogroup files File handling routines 00024 * This group contains various low-level and high-level routines for 00025 * operating on files. 00026 */ 00027 /*@{*/ 00028 /*! 00029 * \brief Representation of a memory mapped file. 00030 * 00031 * Among others, this structure bundles the file name, a file descriptor, 00032 * and the pointer returned by mmap() to the file content. The protection 00033 * and flags argument passed to mmap() are also stored for re-mapping. 00034 * 00035 * Additionally, this structure allows the use of a memory mapped file as 00036 * a dynamically growing array. The current size of real data is stored in 00037 * fid_Mappedfile::occupied, whereas the allocated disk space is stored in 00038 * fid_Mappedfile::allocated. Whenever fid_Mappedfile::occupied exceeds 00039 * fid_Mappedfile::allocated, enlarge the file, re-map, and continue. 00040 * 00041 * To support the use of allocated dynamic arrays as mapped files, the concept 00042 * of fake mapped files has been implemented. Fake mapped files do not have a 00043 * valid file descriptor, but point to allocated memory. Most functions defined 00044 * on the fid_Mappedfile structure, however, do not work for fake mapped files, 00045 * so their use is somewhat limited. Their main purpose is to enable existing 00046 * code written for reading from memory mapped files to read from allocated 00047 * memory. A fake mapped file can be identified by using the macro 00048 * fid_MAPPEDFILE_IS_FAKED() on a fid_Mappedfile structure pointer. 00049 */ 00050 typedef struct 00051 { 00052 unsigned char *content; /*!<\brief Memory mapped file content. */ 00053 size_t occupied; /*!<\brief Number of bytes used in file. */ 00054 size_t allocated; /*!<\brief Allocated number of bytes in file. */ 00055 int fd; /*!<\brief File descriptor as returned by open(). */ 00056 char *filename; /*!<\brief Name of the file in file system, mainly used 00057 * for diagnostic messages. */ 00058 int mmap_prot; /*!<\brief Protection flags as passed to last call of 00059 * mmap() for this file; used or modified for mapping 00060 * the file again at some later point. */ 00061 int mmap_flags; /*!<\brief Other flags as passed to last call of mmap() 00062 * for this file. \see fid_Mappedfile::mmap_prot. */ 00063 } fid_Mappedfile; 00064 00065 /*! 00066 * \brief Buffer for creating filenames from basename and extension. 00067 * 00068 * The allocated buffer pointed to by fid_Filenamebuffer::buffer contains a 00069 * basename, like "/tmp/myindex", followed by a trailing dot, ".". The 00070 * buffer's size corresponds to the length of the basename plus 5, one for 00071 * the appended dot, 3 for the extension, and 1 for the terminator. Pointer 00072 * fid_Filenamebuffer::bufptr points two bytes after the end of the basename 00073 * and hence right after the dot, into fid_Filenamebuffer::buffer. 00074 * 00075 * This construction is meant to enable easy and fast filename construction. 00076 * Given a filename extension with a length of three characters, this 00077 * extension can simply be copied to fid_Filenamebuffer::bufptr to create a 00078 * full filename. 00079 */ 00080 typedef struct 00081 { 00082 char *buffer; /*!<\brief An allocated buffer that holds a filename. */ 00083 char *bufptr; /*!<\brief A pointer into fid_Filenamebuffer::buffer that 00084 * points right after the "." character to enable fast copying 00085 * of file name extensions to the end of the base name. */ 00086 } fid_Filenamebuffer; 00087 00088 /*! 00089 * \brief Number of bytes to append to a file when running out of space. 00090 */ 00091 #define fid_MAPPEDFILE_GROWSIZE ((size_t)524288) 00092 00093 /*! 00094 * \brief Cast pointer into pointer of different type. 00095 * 00096 * With gcc 3.1, a bare cast of unsigned chars to unsigned integers yields a 00097 * warning (cast increases required alignment of target type), hence we add 00098 * a prior cast to \c void to shut up the compiler in these cases. 00099 */ 00100 #define fid_CAST_POINTER(P,T) ((T *)((void *)(P))) 00101 00102 /*! 00103 * \brief Check if the mapped file is writable. 00104 * 00105 * The check is performed by checking the fid_Mappedfile::mmap_prot field. 00106 * 00107 * \param MF A pointer to a fid_Mappedfile structure. 00108 * 00109 * \returns True if the file is writable, false otherwise. 00110 */ 00111 #define fid_MAPPEDFILE_IS_WRITABLE(MF) (((MF)->mmap_prot&PROT_WRITE) != 0) 00112 00113 /*! 00114 * \brief Check if the mapped file is really a file or not. 00115 * 00116 * \param MF A pointer to a fid_Mappedfile structure. 00117 * 00118 * \returns True if the file is not a real file, but a faked one. This means, 00119 * the content pointer really points to allocated memory that has been obtained 00120 * via \c malloc(), not via \c mmap(). 00121 */ 00122 #define fid_MAPPEDFILE_IS_FAKED(MF) ((MF)->content != NULL && (MF)->fd == -1) 00123 00124 /*! 00125 * \brief Make sure that the mapped file has space to store \p N bytes. 00126 * 00127 * When running out of space, i.e., if the file is too short to store \p N 00128 * bytes, the file length will be increased by \p GROW bytes. 00129 * 00130 * \param MF A pointer to a fid_Mappedfile structure. 00131 * \param N Requested size. 00132 * \param GROW Number of bytes to add to mapped file if too short. 00133 * \param ERR Pointer to a fid_Error structure. 00134 * \param ERRCODE Code to be executed in case of an error condition. 00135 * 00136 * \see For this macro makes use of fid_file_grow_by_size(), you may consider 00137 * reading the notes attached to that function. 00138 */ 00139 #define fid_MAPPEDFILE_CHECKSPACE(MF,N,GROW,ERR,ERRCODE)\ 00140 if((MF)->occupied+(N) > (MF)->allocated)\ 00141 {\ 00142 if(fid_file_grow_by_size(MF,GROW,ERR) == -1)\ 00143 {\ 00144 ERRCODE;\ 00145 }\ 00146 } 00147 00148 /*! 00149 * \brief Append value of given type to end of file. 00150 * 00151 * Use this macro only if you are absolutely sure that the file's allocated 00152 * size is not exceeded by appending the value to it. Use 00153 * fid_MAPPEDFILE_APPEND_GROW() or fid_MAPPEDFILE_APPEND_TRY() otherwise. 00154 * 00155 * \param MF A pointer to a fid_Mappedfile structure. 00156 * \param TYPE Type of the value to be stored. This should be a basic C 00157 * type, no structure. 00158 * \param VAL The value to be stored. It will be casted to \p TYPE by this 00159 * macro. 00160 */ 00161 #define fid_MAPPEDFILE_APPEND_UNSAFE(MF,TYPE,VAL)\ 00162 *fid_CAST_POINTER(&(MF)->content[(MF)->occupied],TYPE)=(TYPE)(VAL);\ 00163 (MF)->occupied+=sizeof(TYPE) 00164 00165 /*! 00166 * \brief Append value of given type to end of file, increase size when needed. 00167 * 00168 * When running out of space, i.e., if the file is too short to store the 00169 * given value, the file length will be increased by #fid_MAPPEDFILE_GROWSIZE 00170 * bytes. 00171 * 00172 * \param MF A pointer to a fid_Mappedfile structure. 00173 * \param TYPE Type of the value to be stored. This should be a basic C 00174 * type, no structure. 00175 * \param VAL The value to be stored. It will be casted to \p TYPE by this 00176 * macro. 00177 * \param ERR Pointer to a fid_Error structure. 00178 * \param ERRCODE Code to be executed is case of an error condition. 00179 */ 00180 #define fid_MAPPEDFILE_APPEND_GROW(MF,TYPE,VAL,ERR,ERRCODE)\ 00181 fid_MAPPEDFILE_CHECKSPACE(MF,sizeof(TYPE),fid_MAPPEDFILE_GROWSIZE,ERR,ERRCODE)\ 00182 fid_MAPPEDFILE_APPEND_UNSAFE(MF,TYPE,VAL);\ 00183 00184 /*! 00185 * \brief Append value of given type to end of file, increase size when needed. 00186 * 00187 * When running out of space, i.e., if the file is too short to store the 00188 * given value, the file length will be increased by #fid_MAPPEDFILE_GROWSIZE 00189 * bytes. The macro does nothing if the file content is \c NULL, i.e., if no 00190 * open file is associated with \p MF, hence this macro is safe to be used at 00191 * any time. 00192 * 00193 * \param MF A pointer to a fid_Mappedfile structure. 00194 * \param TYPE Type of the value to be stored. This should be a basic C 00195 * type, no structure. 00196 * \param VAL The value to be stored. It will be casted to \p TYPE by this 00197 * macro. 00198 * \param ERR Pointer to a fid_Error structure. 00199 * \param ERRCODE Code to be executed is case of an error condition. 00200 */ 00201 #define fid_MAPPEDFILE_APPEND_TRY(MF,TYPE,VAL,ERR,ERRCODE)\ 00202 if((MF)->content != NULL)\ 00203 {\ 00204 fid_MAPPEDFILE_APPEND_GROW(MF,TYPE,VAL,ERR,ERRCODE);\ 00205 } 00206 00207 #ifdef __cplusplus 00208 extern "C" { 00209 #endif 00210 int fid_file_map(fid_Mappedfile *mfile, const char *filename, int writable, 00211 int may_prefetch, fid_Error *error); 00212 int fid_file_new(fid_Mappedfile *mfile, const char *filename, fid_Error *error); 00213 int fid_file_allocate(fid_Mappedfile *mfile, const char *filename, 00214 size_t size, fid_Error *error); 00215 void fid_file_fake(fid_Mappedfile *mfile, void *block, size_t size); 00216 int fid_file_grow_by_size(fid_Mappedfile *mfile, size_t size, 00217 fid_Error *error); 00218 int fid_file_ensure_free_space(fid_Mappedfile *mfile, size_t size, 00219 fid_Error *error); 00220 #ifdef __GNUC__ 00221 int fid_file_write(fid_Mappedfile *mfile, fid_Error *error, 00222 const char *fmt, ...) 00223 __attribute__ ((format (printf, 3, 4))); 00224 #else /* !__GNUC__ */ 00225 int fid_file_write(fid_Mappedfile *mfile, fid_Error *error, 00226 const char *fmt, ...); 00227 #endif /* __GNUC__ */ 00228 int fid_file_dump_to_file(const fid_Mappedfile *mfile, const char *filename, 00229 fid_Error *error); 00230 int fid_file_make_readonly(fid_Mappedfile *mfile, fid_Error *error); 00231 void fid_file_cleanup(fid_Mappedfile *mfile); 00232 void fid_file_prefetch(const fid_Mappedfile *mfile, int smart); 00233 void fid_file_unmap(fid_Mappedfile *mfile); 00234 int fid_filenamebuffer_init(fid_Filenamebuffer *fnamebuf, const char *filename, 00235 fid_Error *error); 00236 int fid_filenamebuffer_init_local(fid_Filenamebuffer *fnamebuf, 00237 fid_Filenamebuffer **extbuffer, 00238 const char *filename, fid_Error *error); 00239 void fid_filenamebuffer_free(fid_Filenamebuffer *fnamebuf); 00240 /*@null@*/ 00241 char *fid_filename_create(const char *basefilename, const char *fileext, 00242 fid_Error *error); 00243 #ifdef __cplusplus 00244 } 00245 #endif 00246 /*@}*/ 00247 00248 #endif /* !FILEUTILS_H */