00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef HAVE_CONFIG_H
00022 #include "config.h"
00023 #endif
00024
00025 #include <stdlib.h>
00026 #include <string.h>
00027 #include <assert.h>
00028
00029 #ifdef HAVE_MADVISE
00030 #include <sys/types.h>
00031 #include <sys/mman.h>
00032 #endif
00033
00034 #include "libdefs.h"
00035 #include "error.h"
00036 #include "arrays.h"
00037 #include "alphabet.h"
00038 #include "fileutils.h"
00039 #include "sequences.h"
00040 #include "dbfiles.h"
00041 #include "projectfile.h"
00042 #include "suffixarray.h"
00043 #include "verify.h"
00044 #include "touint.h"
00045
00046
00047 #if fid_WORDSIZE == 64
00048 #define TABVALFMT "%6lu"
00049 #else
00050 #define TABVALFMT "%6llu"
00051 #endif
00052
00053
00054
00055
00056
00057
00058
00059
00060 void fid_suffixarray_init(fid_Suffixarray *esa, fid_Uintsize uisize)
00061 {
00062 assert(esa != NULL);
00063 fid_sequences_init(&esa->sequences,uisize,&esa->alpha);
00064 esa->uisize=uisize;
00065 fid_SWITCH48(uisize,
00066 {
00067 esa->suftab.v_uint32=NULL;
00068 esa->skiptab.v_uint32=NULL;
00069 esa->stitab.v_uint32=NULL;
00070 esa->num_of_large_lcps.v_uint32=0;
00071 },
00072 {
00073 esa->suftab.v_uint64=NULL;
00074 esa->skiptab.v_uint64=NULL;
00075 esa->stitab.v_uint64=NULL;
00076 esa->num_of_large_lcps.v_uint64=0;
00077 });
00078 esa->suffile.content=NULL;
00079 esa->lcpfile.content=NULL;
00080 esa->llvfile.content=NULL;
00081 esa->skpfile.content=NULL;
00082 esa->stifile.content=NULL;
00083 }
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 static int map_file(fid_Mappedfile *file, fid_Filenamebuffer *fnamebuf,
00104 const char *fileext, int may_prefetch, fid_Error *error)
00105 {
00106 memcpy(fnamebuf->bufptr,fileext,(size_t)4);
00107 return fid_file_map(file,fnamebuf->buffer,0,may_prefetch,error);
00108 }
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122 static int map_more_tables(fid_Suffixarray *esa, fid_Tablerequest tables,
00123 fid_Filenamebuffer *fnamebuf, fid_Error *error)
00124 {
00125 int retcode=0;
00126
00127 assert(esa != NULL);
00128 assert(esa->sequences.alpha != NULL);
00129 assert(fnamebuf != NULL);
00130 assert(fnamebuf->buffer != NULL);
00131
00132 if((tables&fid_TABLE_SUF) != 0)
00133 {
00134 retcode=map_file(&esa->suffile,fnamebuf,"suf",1,error);
00135 }
00136
00137 if(retcode == 0 && (tables&fid_TABLE_LCP) != 0)
00138 {
00139 if((retcode=map_file(&esa->lcpfile,fnamebuf,"lcp",1,error)) == 0)
00140 {
00141 (void)map_file(&esa->llvfile,fnamebuf,"llv",1,NULL);
00142 }
00143 }
00144
00145 if(retcode == 0 && (tables&fid_TABLE_SKP) != 0)
00146 {
00147 retcode=map_file(&esa->skpfile,fnamebuf,"skp",1,error);
00148 }
00149
00150 if(retcode == 0 && (tables&fid_TABLE_STI) != 0)
00151 {
00152 retcode=map_file(&esa->stifile,fnamebuf,"sti",1,error);
00153 }
00154
00155 if(retcode != 0)
00156 {
00157 fid_file_unmap(&esa->suffile);
00158 fid_file_unmap(&esa->lcpfile);
00159 fid_file_unmap(&esa->llvfile);
00160 fid_file_unmap(&esa->skpfile);
00161 fid_file_unmap(&esa->stifile);
00162 }
00163
00164 return retcode;
00165 }
00166
00167
00168
00169 #ifdef HAVE_MADVISE
00170
00171
00172
00173
00174
00175
00176
00177 static void tune_table(fid_Mappedfile *file, int advice)
00178 {
00179 #ifdef DEBUG
00180 int retval;
00181 #endif
00182
00183 if(file->content != NULL)
00184 {
00185
00186 #ifdef DEBUG
00187 retval=
00188 #else
00189 (void)
00190 #endif
00191 madvise((caddr_t)file->content,file->occupied,advice);
00192
00193 #ifdef DEBUG
00194 if(retval == -1)
00195 {
00196 abort();
00197 }
00198 #endif
00199 }
00200 }
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223 static void tune_tables(fid_Suffixarray *esa)
00224 {
00225 int onlinefiles, suffixarray;
00226
00227 onlinefiles=(esa->suffile.content == NULL)?MADV_SEQUENTIAL:MADV_RANDOM;
00228 suffixarray=(esa->skpfile.content == NULL)?MADV_RANDOM:MADV_SEQUENTIAL;
00229
00230 tune_table(&esa->sequences.tisfile,onlinefiles);
00231 tune_table(&esa->sequences.oisfile,onlinefiles);
00232 tune_table(&esa->sequences.sspfile,onlinefiles);
00233 tune_table(&esa->sequences.desfile,onlinefiles);
00234 tune_table(&esa->sequences.sdsfile,onlinefiles);
00235
00236 tune_table(&esa->suffile,suffixarray);
00237 tune_table(&esa->lcpfile,suffixarray);
00238 tune_table(&esa->llvfile,MADV_RANDOM);
00239 tune_table(&esa->skpfile,MADV_SEQUENTIAL);
00240 tune_table(&esa->stifile,MADV_RANDOM);
00241 }
00242 #endif
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262 int fid_suffixarray_load_from_files(fid_Suffixarray *esa,
00263 const char *basefilename,
00264 fid_Tablerequest tables,
00265 fid_Uintsize uisize, fid_Error *error)
00266 {
00267 fid_Filenamebuffer fnamebuf;
00268 int retcode;
00269
00270 assert(esa != NULL);
00271 assert(basefilename != NULL);
00272
00273 VERIFY_REQUEST(tables,error);
00274
00275 if((retcode=fid_filenamebuffer_init(&fnamebuf,basefilename,error)) == 0)
00276 {
00277 memcpy(fnamebuf.bufptr,"al1",(size_t)4);
00278 retcode=fid_alphabet_init_from_specfile(&esa->alpha,fnamebuf.buffer,error);
00279
00280
00281 if(retcode == 0)
00282 {
00283 fid_suffixarray_init(esa,uisize);
00284 if((retcode=fid_sequences_map(&esa->sequences,basefilename,
00285 tables&fid_TABLES_ONLINE_MASK,&fnamebuf,
00286 error)) == 0)
00287 {
00288 if((retcode=map_more_tables(esa,tables&fid_TABLES_OFFLINE_MASK,
00289 &fnamebuf,error)) == 0)
00290 {
00291 retcode=fid_suffixarray_realize(esa,tables,error);
00292 #ifdef HAVE_MADVISE
00293 if(retcode == 0)
00294 {
00295 tune_tables(esa);
00296 }
00297 #endif
00298 }
00299 else
00300 {
00301 fid_sequences_free(&esa->sequences);
00302 }
00303 }
00304 }
00305 fid_filenamebuffer_free(&fnamebuf);
00306 }
00307
00308 return retcode;
00309 }
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320 void fid_suffixarray_free(fid_Suffixarray *esa)
00321 {
00322 fid_sequences_free(&esa->sequences);
00323 fid_file_unmap(&esa->suffile);
00324 fid_file_unmap(&esa->lcpfile);
00325 fid_file_unmap(&esa->llvfile);
00326 fid_file_unmap(&esa->skpfile);
00327 fid_file_unmap(&esa->stifile);
00328 (void)fid_suffixarray_realize(esa,fid_TABLES_OFFLINE,NULL);
00329 }
00330
00331
00332
00333 #include "sequences.32"
00334 #include "suffixarray.32"
00335 #include "suffixarray.templ.c"
00336
00337 #include "sequences.64"
00338 #include "suffixarray.64"
00339 #include "suffixarray.templ.c"
00340
00341 #include "sequences.undef"
00342 #include "suffixarray.undef"
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366 int fid_suffixarray_realize(fid_Suffixarray *esa, fid_Tablerequest tables,
00367 fid_Error *error)
00368 {
00369 VERIFY_REQUEST(tables,error);
00370
00371 if(fid_sequences_realize(&esa->sequences,tables&fid_TABLES_ONLINE_MASK,
00372 error) == -1)
00373 {
00374 return -1;
00375 }
00376
00377 fid_SWITCH48(esa->uisize,
00378 return suffixarray_realize_32(esa,tables,error);,
00379 return suffixarray_realize_64(esa,tables,error););
00380 }
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391 void fid_suffixarray_dump(const fid_Suffixarray *esa, FILE *stream)
00392 {
00393 assert(esa != NULL);
00394
00395 if(stream == NULL)
00396 {
00397 return;
00398 }
00399
00400 fprintf(stream,"----------\n"
00401 "This index is a %s bit index.\n"
00402 "Number of sequences in enhanced suffix array: " fid_U64FMT "\n"
00403 "Number of alphabet symbols/characters: %hu/%hu\n",
00404 (esa->uisize == fid_UINTSIZE_32)?"32":"64",
00405 fid_PRINT48(esa->uisize,esa->sequences.num_of_sequences),
00406 esa->alpha.num_of_syms,esa->alpha.num_of_chars);
00407 if(&esa->alpha != esa->sequences.alpha)
00408 {
00409 fprintf(stream,"*** NOTE: alphabets of enhanced suffix array and "
00410 "sequences may differ.\n");
00411 }
00412 fprintf(stream,"Mapped online files :%s%s%s%s\n",
00413 esa->sequences.tisfile.content == NULL?"":" TIS",
00414 esa->sequences.oisfile.content == NULL?"":" OIS",
00415 esa->sequences.desfile.content == NULL?"":" DES",
00416 esa->sequences.sspfile.content == NULL?"":" SSP");
00417 fprintf(stream,"Mapped offline files:%s%s%s%s%s\n",
00418 esa->suffile.content == NULL?"":" SUF",
00419 esa->stifile.content == NULL?"":" STI",
00420 esa->lcpfile.content == NULL?"":" LCP",
00421 esa->llvfile.content == NULL?"":" LLV",
00422 esa->skpfile.content == NULL?"":" SKP");
00423
00424 fid_SWITCH48(esa->uisize,
00425 suffixarray_dump_32(esa,stream);,
00426 suffixarray_dump_64(esa,stream););
00427
00428 fprintf(stream,"----------\n");
00429 }
00430
00431
00432
00433
00434
00435
00436 static unsigned int number_of_tables(fid_Tablerequest request)
00437 {
00438 fid_Tablerequest tmp=request-((request >> 1)&033333333333)-
00439 ((request >> 2)&011111111111);
00440 return ((tmp+(tmp >> 3))&030707070707)%63;
00441 }
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460 static char *append_names(char *ptr, fid_Tablerequest request,
00461 fid_Tablerequest mask, fid_Tablerequest maxval,
00462 const char *sep, size_t seplen, int not_first,
00463 const char *names[])
00464 {
00465 int i;
00466
00467 for(i=0; mask < maxval; mask<<=1, ++i)
00468 {
00469 if((request&mask) != 0)
00470 {
00471 if(not_first)
00472 {
00473 memcpy(ptr,sep,seplen);
00474 ptr+=seplen;
00475 }
00476 else not_first=1;
00477 memcpy(ptr,names[i],3);
00478 ptr+=3;
00479 }
00480 }
00481
00482 return ptr;
00483 }
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499 char *fid_tablerequest_to_string(fid_Tablerequest request, const char *sep)
00500 {
00501 static const char *online_table_names[]={"TIS","OIS","DES"};
00502 static const char *offline_table_names[]={"SUF","LCP","SKP","STI"};
00503 unsigned int n=number_of_tables(request&fid_TABLES_ALL);
00504 size_t seplen=strlen(sep), len;
00505 char *string, *ptr;
00506
00507 if(n > 0) len=seplen*(n-1)+3*n+1;
00508 else len=1;
00509
00510 ptr=string=(char *)malloc(len);
00511 if(string == NULL) return NULL;
00512
00513 if((request&fid_TABLES_ONLINE) != 0)
00514 {
00515 ptr=append_names(ptr,request,fid_TABLE_TIS,fid_TABLES_ONLINE,
00516 sep,seplen,0,online_table_names);
00517 }
00518
00519 if((request&fid_TABLES_OFFLINE) != 0)
00520 {
00521 ptr=append_names(ptr,request,fid_TABLE_SUF,fid_TABLES_OFFLINE,
00522 sep,seplen,(ptr == string)?0:1,offline_table_names);
00523 }
00524
00525 *ptr='\0';
00526 assert(ptr == string+seplen*(n-1)+3*n);
00527
00528 return string;
00529 }
00530
00531