00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef HAVE_CONFIG_H
00022 #include "config.h"
00023 #endif
00024
00025 #include <stdlib.h>
00026 #include <string.h>
00027 #include <ctype.h>
00028 #include <assert.h>
00029
00030 #include "libdefs.h"
00031 #include "error.h"
00032 #include "fileutils.h"
00033 #include "arrays.h"
00034 #include "alphabet.h"
00035 #include "sequences.h"
00036 #include "createfiles.h"
00037 #include "filereader.h"
00038 #include "verify.h"
00039 #include "touint.h"
00040
00041 #define SKIP_CHARS(PTR,MAX,UNDEF)\
00042 while((PTR) < (MAX) && *(PTR) != (UNDEF))\
00043 {\
00044 ++(PTR);\
00045 }
00046
00047 #define SKIP_GAP(PTR,MAX,UNDEF)\
00048 while((PTR) < (MAX) && *(PTR) == (UNDEF))\
00049 {\
00050 ++(PTR);\
00051 }
00052
00053 #define WSIZECHECK_32(S) assert((S)->uisize == fid_UINTSIZE_32)
00054
00055 #define WSIZECHECK_64(S) assert((S)->uisize == fid_UINTSIZE_64)
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069 #define FAKEFILE_FROM_ARRAY(F,A,TYPE)\
00070 fid_DYNARRAY_SHRINK(&(A),TYPE,);\
00071 if((A).occupied > 0)\
00072 {\
00073 fid_file_fake(&(F),(A).dyndata,(A).occupied*sizeof(TYPE));\
00074 }\
00075 else\
00076 {\
00077 (F).content=NULL;\
00078 (F).occupied=(F).allocated=0;\
00079 }
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097 typedef int (*Appendline)(const char *seq, size_t len,
00098 const fid_Alphabet *alpha,
00099 void *user_data, fid_Error *error);
00100
00101
00102
00103
00104
00105
00106 typedef union
00107 {
00108 fid_ArrayUint32 v_uint32;
00109 fid_ArrayUint64 v_uint64;
00110 } ArrayUint48;
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120 typedef struct
00121 {
00122 fid_ArraySymbol tis;
00123 fid_Arraychar ois;
00124 fid_Arraychar des;
00125 ArrayUint48 ssp;
00126 ArrayUint48 sds;
00127 const char *filename;
00128 } Sequencearrays;
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143 static int append_seq_to_files(const char *seq, size_t len,
00144 const fid_Alphabet *alpha,
00145 void *user_data, fid_Error *error)
00146 {
00147 fid_Sequences *seqs=(fid_Sequences *)user_data;
00148
00149 if(seqs->tisfile.content != NULL)
00150 {
00151 fid_MAPPEDFILE_CHECKSPACE(&seqs->tisfile,len,fid_MAPPEDFILE_GROWSIZE,error,
00152 {
00153 fid_error_throw(error,"Could not write transformed input sequence "
00154 "file \"%s\".",seqs->tisfile.filename);
00155 return -1;
00156 });
00157 (void)fid_alphabet_transform_string(alpha,seq,len,
00158 &seqs->tisfile.content[seqs->tisfile.occupied],
00159 0);
00160 seqs->tisfile.occupied+=len;
00161 }
00162 if(seqs->oisfile.content != NULL)
00163 {
00164 fid_MAPPEDFILE_CHECKSPACE(&seqs->oisfile,len,fid_MAPPEDFILE_GROWSIZE,error,
00165 {
00166 fid_error_throw(error,"Could not write original input sequence "
00167 "file \"%s\".",seqs->oisfile.filename);
00168 return -1;
00169 });
00170 memcpy(&seqs->oisfile.content[seqs->oisfile.occupied],seq,len);
00171 seqs->oisfile.occupied+=len;
00172 }
00173 return 0;
00174 }
00175
00176
00177 #define SEQUENCES_32BIT_VERSION
00178 #include "check32in64.h"
00179 #undef SEQUENCES_32BIT_VERSION
00180
00181 static const char too_large_for_32bits_error[]=
00182 "Size of %s portion of file \"%s\" is too large to be represented by "
00183 "32 bits. Please consider constructing a 64 bit index.";
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197 static int append_seq_to_dynarrays(const char *seq, size_t len,
00198 const fid_Alphabet *alpha,
00199 void *user_data, fid_Error *error)
00200 {
00201 Sequencearrays *ptrs=(Sequencearrays *)user_data;
00202 #if fid_WORDSIZE == 64
00203 const char *what=NULL;
00204 #endif
00205
00206 if(ptrs->tis.dyndata != NULL)
00207 {
00208 fid_DYNARRAY_ENSURE_NFREE(&ptrs->tis,fid_Symbol,len,
00209 fid_OUTOFMEM(error);
00210 return -1;);
00211 (void)fid_alphabet_transform_string(alpha,seq,len,
00212 &ptrs->tis.dyndata[ptrs->tis.occupied],
00213 0);
00214 ptrs->tis.occupied+=len;
00215 CHECK_32_IN_64_ENV(ptrs->tis,"transformed sequences (tis)");
00216 }
00217 if(ptrs->ois.dyndata != NULL)
00218 {
00219 fid_DYNARRAY_ENSURE_NFREE(&ptrs->ois,char,len,
00220 fid_OUTOFMEM(error);
00221 return -1;);
00222 memcpy(&ptrs->ois.dyndata[ptrs->ois.occupied],seq,len);
00223 ptrs->ois.occupied+=len;
00224 CHECK_32_IN_64_ENV(ptrs->ois,"sequences (ois)");
00225 }
00226 return 0;
00227
00228 #if fid_WORDSIZE == 64
00229 bail_out:
00230 fid_error_throw(error,too_large_for_32bits_error,what,ptrs->filename);
00231 return -1;
00232 #endif
00233 }
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246 void fid_sequences_init(fid_Sequences *seqs, fid_Uintsize uisize,
00247 const fid_Alphabet *alpha)
00248 {
00249 assert(seqs != NULL);
00250
00251 seqs->uisize=uisize;
00252 fid_SWITCH48(uisize,
00253 {
00254 seqs->num_of_sequences.v_uint32=0;
00255 seqs->total_length.v_uint32=0;
00256 seqs->descriptions.v_uint32=NULL;
00257 seqs->separators.v_uint32=NULL;
00258 },
00259 {
00260 seqs->num_of_sequences.v_uint64=0;
00261 seqs->total_length.v_uint64=0;
00262 seqs->descriptions.v_uint64=NULL;
00263 seqs->separators.v_uint64=NULL;
00264 });
00265 seqs->alpha=alpha;
00266 seqs->tisfile.content=NULL;
00267 seqs->oisfile.content=NULL;
00268 seqs->sspfile.content=NULL;
00269 seqs->desfile.content=NULL;
00270 seqs->sdsfile.content=NULL;
00271 }
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291 static int map_file(fid_Mappedfile *file, fid_Filenamebuffer *fnamebuf,
00292 const char *fileext, int may_prefetch, fid_Error *error)
00293 {
00294 memcpy(fnamebuf->bufptr,fileext,(size_t)4);
00295 return fid_file_map(file,fnamebuf->buffer,0,may_prefetch,error);
00296 }
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313 int fid_sequences_map(fid_Sequences *seqs, const char *basefilename,
00314 fid_Tablerequest tables, fid_Filenamebuffer *fnamebuf,
00315 fid_Error *error)
00316 {
00317 int retcode=0, free_buffer;
00318 fid_Filenamebuffer local_buffer;
00319
00320 assert(seqs != NULL);
00321 assert(basefilename != NULL);
00322
00323 VERIFY_ONLINE_REQUEST(tables,error);
00324
00325 if((free_buffer=fid_filenamebuffer_init_local(&local_buffer,&fnamebuf,
00326 basefilename,error)) == -1)
00327 {
00328 return -1;
00329 }
00330
00331 if((tables&fid_TABLE_TIS) != 0)
00332 {
00333 if((retcode=map_file(&seqs->tisfile,fnamebuf,"tis",1,error)) == 0)
00334 {
00335 (void)map_file(&seqs->sspfile,fnamebuf,"ssp",1,NULL);
00336 }
00337 }
00338
00339 if(retcode == 0 && (tables&fid_TABLE_OIS) != 0)
00340 {
00341 if((retcode=map_file(&seqs->oisfile,fnamebuf,"ois",1,error)) == 0 &&
00342 seqs->sspfile.content == NULL)
00343 {
00344 (void)map_file(&seqs->sspfile,fnamebuf,"ssp",1,NULL);
00345 }
00346 }
00347
00348 if(retcode == 0 && (tables&fid_TABLE_DES) != 0)
00349 {
00350 if((retcode=map_file(&seqs->desfile,fnamebuf,"des",1,error)) == 0)
00351 {
00352 retcode=map_file(&seqs->sdsfile,fnamebuf,"sds",1,error);
00353 }
00354 }
00355
00356 if(retcode == 0)
00357 {
00358 retcode=fid_sequences_realize(seqs,tables,error);
00359 }
00360
00361 if(retcode != 0)
00362 {
00363 fid_file_unmap(&seqs->tisfile);
00364 fid_file_unmap(&seqs->oisfile);
00365 fid_file_unmap(&seqs->sspfile);
00366 fid_file_unmap(&seqs->desfile);
00367 fid_file_unmap(&seqs->sdsfile);
00368 }
00369
00370 if(free_buffer)
00371 {
00372 fid_filenamebuffer_free(fnamebuf);
00373 }
00374
00375 return retcode;
00376 }
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393 static int append_padding(Sequencearrays *arrays, size_t padding,
00394 fid_Error *error)
00395 {
00396 if(padding == 0)
00397 {
00398 return 0;
00399 }
00400
00401 if(arrays->tis.dyndata != NULL)
00402 {
00403 fid_DYNARRAY_ENSURE_NFREE(&arrays->tis,fid_Symbol,padding,
00404 fid_OUTOFMEM(error);
00405 return -1;);
00406 memset(&arrays->tis.dyndata[arrays->tis.occupied],fid_SEPARATOR,padding);
00407 arrays->tis.occupied+=padding;
00408 }
00409 if(arrays->ois.dyndata != NULL)
00410 {
00411 fid_DYNARRAY_ENSURE_NFREE(&arrays->ois,char,padding,
00412 fid_OUTOFMEM(error);
00413 return -1;);
00414 memset(&arrays->ois.dyndata[arrays->ois.occupied],fid_SEPARATOR,padding);
00415 arrays->ois.occupied+=padding;
00416 }
00417 return 0;
00418 }
00419
00420 #include "parserfuns.32"
00421 #include "sequences.32"
00422 #define SEQUENCES_32BIT_VERSION
00423 #include "parserfuns.templ.c"
00424 #include "sequences.templ.c"
00425 #undef SEQUENCES_32BIT_VERSION
00426
00427 #include "parserfuns.64"
00428 #include "sequences.64"
00429 #include "parserfuns.templ.c"
00430 #include "sequences.templ.c"
00431
00432 #include "sequences.undef"
00433 #include "parserfuns.undef"
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454 int fid_sequences_realize(fid_Sequences *seqs, fid_Tablerequest tables,
00455 fid_Error *error)
00456 {
00457 assert(seqs != NULL);
00458
00459 VERIFY_ONLINE_REQUEST(tables,error);
00460
00461 if((tables&(fid_TABLE_TIS|fid_TABLE_OIS)) == (fid_TABLE_TIS|fid_TABLE_OIS) &&
00462 seqs->tisfile.content != NULL && seqs->oisfile.content != NULL &&
00463 seqs->tisfile.occupied != seqs->oisfile.occupied)
00464 {
00465 fid_error_throw(error,"Sizes of transformed input sequence and "
00466 "original input sequences differ, %lu vs %lu.",
00467 (unsigned long)seqs->tisfile.occupied,
00468 (unsigned long)seqs->oisfile.occupied);
00469 return -1;
00470 }
00471
00472 fid_SWITCH48(seqs->uisize,
00473 return sequences_realize_32(seqs,tables,error);,
00474 return sequences_realize_64(seqs,tables,error););
00475 }
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489 static fid_Fileformat guess_file_type(const char *buffer,
00490 size_t bufsize)
00491 {
00492 fid_Fileformat format=fid_FORMAT_UNDEF;
00493
00494 if(bufsize > (size_t)2)
00495 {
00496 if(buffer[0] == '>')
00497 {
00498 format=fid_FORMAT_FASTA;
00499 }
00500 else if(memcmp(buffer,"ID ",3) == 0)
00501 {
00502 format=fid_FORMAT_UNIPROT;
00503 }
00504 }
00505
00506 return format;
00507 }
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529 void fid_sequences_compute_distribution(fid_Sequences *seqs)
00530 {
00531 const unsigned char *ptr, *maxptr;
00532 double dbsize;
00533 fid_Uint64 histo[UCHAR_MAX+1];
00534 fid_Symbol sym, wcsym;
00535
00536 assert(seqs != NULL);
00537 assert(seqs->alpha != NULL);
00538 assert(seqs->tisfile.content != NULL);
00539 assert(seqs->tisfile.occupied > 0);
00540
00541 memset(histo,0,sizeof(histo));
00542 ptr=seqs->tisfile.content;
00543 maxptr=ptr+seqs->tisfile.occupied;
00544 while(ptr < maxptr)
00545 {
00546 ++histo[(size_t)(*ptr++)];
00547 }
00548
00549 memset(seqs->distribution,0,sizeof(seqs->distribution));
00550 fid_SWITCH48(seqs->uisize,
00551 dbsize=(double)(seqs->tisfile.occupied+1-
00552 seqs->num_of_sequences.v_uint32);,
00553 dbsize=(double)(seqs->tisfile.occupied+1-
00554 seqs->num_of_sequences.v_uint64););
00555 wcsym=seqs->alpha->num_of_syms-1;
00556 for(sym=0; sym < wcsym; ++sym)
00557 {
00558 seqs->distribution[(size_t)sym]=(double)histo[(size_t)sym]/dbsize;
00559 }
00560 seqs->distribution[(size_t)wcsym]=(double)histo[(size_t)fid_WILDCARD]/dbsize;
00561 seqs->distribution[(size_t)fid_WILDCARD]=seqs->distribution[(size_t)wcsym];
00562 }
00563
00564
00565
00566
00567
00568
00569
00570 void fid_sequences_free(fid_Sequences *seqs)
00571 {
00572 fid_file_unmap(&seqs->tisfile);
00573 fid_file_unmap(&seqs->oisfile);
00574 fid_file_unmap(&seqs->sspfile);
00575 fid_file_unmap(&seqs->desfile);
00576 fid_file_unmap(&seqs->sdsfile);
00577 (void)fid_sequences_realize(seqs,fid_TABLES_ONLINE,NULL);
00578 }
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611 int fid_sequences_parse_from_file_to_file(const char *infilename,
00612 fid_Fileformat format,
00613 fid_Tablerequest tables,
00614 const char *basefilename,
00615 const fid_Alphabet *alpha,
00616 fid_Uintsize uisize,
00617 size_t *input_file_size,
00618 fid_Error *error)
00619 {
00620 Filereader infile;
00621 int retcode;
00622
00623 assert(infilename != NULL);
00624 assert(basefilename != NULL);
00625 assert(alpha != NULL);
00626
00627 VERIFY_ONLINE_REQUEST(tables,error);
00628
00629 if((retcode=filereader_open(&infile,infilename,error)) == 0)
00630 {
00631 if((retcode=filereader_read_complete(&infile,error)) == 0)
00632 {
00633 retcode=
00634 fid_sequences_parse_from_memory_to_file(infilename,
00635 (const char *)infile.buffer,
00636 infile.buffersize,format,
00637 tables,basefilename,alpha,
00638 uisize,error);
00639 if(retcode == 0 && input_file_size != NULL)
00640 {
00641 *input_file_size=infile.buffersize;
00642 }
00643 }
00644 filereader_close(&infile);
00645 }
00646 return retcode;
00647 }
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682
00683
00684
00685
00686 int fid_sequences_parse_from_file_to_memory(const char *infilename,
00687 fid_Fileformat format,
00688 fid_Tablerequest tables,
00689 fid_Sequences *seqs,
00690 const fid_Alphabet *alpha,
00691 size_t padding, fid_Uintsize uisize,
00692 size_t *input_file_size,
00693 fid_Error *error)
00694 {
00695 Filereader infile;
00696 int retcode;
00697
00698 assert(infilename != NULL);
00699 assert(seqs != NULL);
00700 assert(alpha != NULL);
00701
00702 VERIFY_ONLINE_REQUEST(tables,error);
00703
00704 if((retcode=filereader_open(&infile,infilename,error)) == 0)
00705 {
00706 if((retcode=filereader_read_complete(&infile,error)) == 0)
00707 {
00708 retcode=
00709 fid_sequences_parse_from_memory_to_memory(infilename,
00710 (const char *)infile.buffer,
00711 infile.buffersize,format,
00712 tables,seqs,alpha,
00713 padding,uisize,error);
00714 if(retcode == 0 && input_file_size != NULL)
00715 {
00716 *input_file_size=infile.buffersize;
00717 }
00718 }
00719 filereader_close(&infile);
00720 }
00721 return retcode;
00722 }
00723
00724
00725
00726
00727
00728
00729
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747 int fid_sequences_parse_from_file_to_lengths(const char *infilename,
00748 fid_Fileformat format,
00749 fid_Sequencefileinfo *seqinfo,
00750 const fid_Alphabet *alpha,
00751 fid_Error *error)
00752 {
00753 Filereader infile;
00754 int retcode;
00755
00756 assert(infilename != NULL);
00757 assert(alpha != NULL);
00758
00759 if((retcode=filereader_open(&infile,infilename,error)) == 0)
00760 {
00761 if((retcode=filereader_read_complete(&infile,error)) == 0)
00762 {
00763 retcode=
00764 fid_sequences_parse_from_memory_to_lengths(infilename,
00765 (const char *)infile.buffer,
00766 infile.buffersize,format,
00767 seqinfo,alpha,error);
00768 }
00769 filereader_close(&infile);
00770 }
00771 return retcode;
00772 }
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791 static int generic_parser_init(const char *infilename, const char *buffer,
00792 size_t bufsize, fid_Fileformat *format,
00793 fid_Tablerequest tables, fid_Error *error)
00794 {
00795 assert(buffer != NULL);
00796 assert(*format >= fid_FORMAT_UNDEF && *format <= fid_FORMAT_FASTA);
00797
00798 VERIFY_ONLINE_REQUEST(tables,error);
00799
00800 if((tables&fid_TABLES_ONLINE) == fid_TABLE_NONE || bufsize == 0)
00801 {
00802
00803 return -2;
00804 }
00805
00806 if(*format == fid_FORMAT_UNDEF)
00807 {
00808 *format=guess_file_type(buffer,bufsize);
00809 }
00810 if(*format == fid_FORMAT_UNDEF)
00811 {
00812 fid_error_throw(error,"Cannot read file \"%s\", unrecognized format.",
00813 infilename);
00814 return -1;
00815 }
00816
00817 return 0;
00818 }
00819
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874 int fid_sequences_parse_from_memory_to_file(const char *infilename,
00875 const char *buffer, size_t bufsize,
00876 fid_Fileformat format,
00877 fid_Tablerequest tables,
00878 const char *basefilename,
00879 const fid_Alphabet *alpha,
00880 fid_Uintsize uisize,
00881 fid_Error *error)
00882 {
00883 fid_Sequences seqs;
00884 Parserfuns_32 funs32;
00885 Parserfuns_64 funs64;
00886 int retcode;
00887
00888 assert(basefilename != NULL);
00889 assert(alpha != NULL);
00890
00891 if((retcode=generic_parser_init(infilename,buffer,bufsize,&format,tables,
00892 error)) < 0)
00893 {
00894 return (retcode == -2)?0:-1;
00895 }
00896
00897 if((retcode=fid_create_online_files(&seqs,alpha,basefilename,
00898 tables&fid_TABLES_ONLINE,uisize,
00899 error)) == 0)
00900 {
00901 fid_SWITCH48(uisize,
00902 {
00903 seqs.num_of_sequences.v_uint32=0;
00904 funs32.appendlinefun=append_seq_to_files;
00905 funs32.appenddescfun=(tables&fid_TABLE_DES)?append_desc_to_files_32:NULL;
00906 funs32.nextseqfun=next_sequence_to_files_32;
00907 retcode=parse_sequences_32(infilename,buffer,bufsize,format,alpha,
00908 &funs32,&seqs,error);
00909 },
00910 {
00911 seqs.num_of_sequences.v_uint64=0;
00912 funs64.appendlinefun=append_seq_to_files;
00913 funs64.appenddescfun=(tables&fid_TABLE_DES)?append_desc_to_files_64:NULL;
00914 funs64.nextseqfun=next_sequence_to_files_64;
00915 retcode=parse_sequences_64(infilename,buffer,bufsize,format,alpha,
00916 &funs64,&seqs,error);
00917 });
00918
00919 fid_file_unmap(&seqs.tisfile);
00920 fid_file_unmap(&seqs.oisfile);
00921 fid_file_unmap(&seqs.sspfile);
00922 fid_file_unmap(&seqs.desfile);
00923 fid_file_unmap(&seqs.sdsfile);
00924 }
00925
00926 return retcode;
00927 }
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977 int fid_sequences_parse_from_memory_to_memory(const char *infilename,
00978 const char *buffer,
00979 size_t bufsize,
00980 fid_Fileformat format,
00981 fid_Tablerequest tables,
00982 fid_Sequences *seqs,
00983 const fid_Alphabet *alpha,
00984 size_t padding,
00985 fid_Uintsize uisize,
00986 fid_Error *error)
00987 {
00988 int retcode;
00989
00990 assert(alpha != NULL);
00991
00992 if((retcode=generic_parser_init(infilename,buffer,bufsize,&format,tables,
00993 error)) < 0)
00994 {
00995 return (retcode == -2)?0:-1;
00996 }
00997
00998 fid_SWITCH48(uisize,
00999 {
01000 retcode=parse_from_mem_to_mem_32(infilename,buffer,bufsize,format,tables,
01001 seqs,alpha,padding,error);
01002 },
01003 {
01004 retcode=parse_from_mem_to_mem_64(infilename,buffer,bufsize,format,tables,
01005 seqs,alpha,padding,error);
01006 });
01007 return retcode;
01008 }
01009
01010
01011
01012
01013
01014
01015
01016 static int count_desc( const char *desc, fid_Uint64 desc_length,
01017 int write_separator, void *user_data,
01018 fid_Error *error)
01019 {
01020 ((fid_Sequencefileinfo *)user_data)->desclen+=desc_length;
01021 return 0;
01022 }
01023
01024
01025
01026
01027
01028
01029
01030 static int count_len( const char *seq, size_t len,
01031 const fid_Alphabet *alpha, void *user_data,
01032 fid_Error *error)
01033 {
01034 ((fid_Sequencefileinfo *)user_data)->seqlen+=len;
01035 return 0;
01036 }
01037
01038
01039
01040
01041
01042
01043
01044
01045 static int count_sep( fid_Uint64 seq_position, void *user_data,
01046 fid_Error *error)
01047 {
01048 ++((fid_Sequencefileinfo *)user_data)->seqlen;
01049 ++((fid_Sequencefileinfo *)user_data)->num_of_sequences;
01050 return 0;
01051 }
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079 int fid_sequences_parse_from_memory_to_lengths(const char *infilename,
01080 const char *buffer,
01081 size_t bufsize,
01082 fid_Fileformat format,
01083 fid_Sequencefileinfo *seqinfo,
01084 const fid_Alphabet *alpha,
01085 fid_Error *error)
01086 {
01087 int retcode;
01088 Parserfuns_64 funs;
01089
01090 assert(alpha != NULL);
01091
01092 if((retcode=generic_parser_init(infilename,buffer,bufsize,&format,
01093 fid_TABLES_ONLINE,error)) < 0)
01094 {
01095 return (retcode == -2)?0:-1;
01096 }
01097
01098 memset(seqinfo,0,sizeof(fid_Sequencefileinfo));
01099 seqinfo->input_file_size=bufsize;
01100 seqinfo->num_of_sequences=1;
01101
01102 funs.appendlinefun=count_len;
01103 funs.appenddescfun=count_desc;
01104 funs.nextseqfun=count_sep;
01105
01106 return parse_sequences_64(infilename,buffer,bufsize,format,alpha,&funs,
01107 seqinfo,error);
01108 }
01109