00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041 typedef int (*Appenddesc)(const char *desc, UINT desc_length,
00042 int write_separator, void *user_data,
00043 fid_Error *error);
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058 typedef int (*Nextsequence)(UINT seq_position, void *user_data,
00059 fid_Error *error);
00060
00061
00062
00063
00064
00065
00066
00067
00068 typedef struct
00069 {
00070 Appendline appendlinefun;
00071 Appenddesc appenddescfun;
00072
00073
00074
00075 Nextsequence nextseqfun;
00076
00077 } Parserfuns;
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095 static int sequences_realize(fid_Sequences *seqs, fid_Tablerequest tables,
00096 fid_Error *error)
00097 {
00098 #if defined SEQUENCES_32BIT_VERSION && fid_WORDSIZE == 64
00099 const char *filename=NULL;
00100 #endif
00101
00102 WSIZECHECK(seqs);
00103 if((tables&(fid_TABLE_TIS|fid_TABLE_OIS)) != 0)
00104 {
00105 if((tables&fid_TABLE_TIS) != 0 && seqs->tisfile.content != NULL)
00106 {
00107 TO_UINT(seqs->total_length.VU,seqs->tisfile.occupied,error,
00108 filename=seqs->tisfile.filename;
00109 goto bail_out;);
00110 }
00111 else if((tables&fid_TABLE_OIS) != 0 && seqs->oisfile.content != NULL)
00112 {
00113 TO_UINT(seqs->total_length.VU,seqs->oisfile.occupied,error,
00114 filename=seqs->oisfile.filename;
00115 goto bail_out;);
00116 }
00117 else
00118 {
00119 seqs->total_length.VU=0;
00120 }
00121
00122 if(seqs->total_length.VU > 0)
00123 {
00124 TO_UINT(seqs->num_of_sequences.VU,
00125 (size_t)(seqs->sspfile.occupied/sizeof(UINT))+1,error,
00126 filename=seqs->sspfile.filename;
00127 goto bail_out;);
00128 }
00129 else
00130 {
00131 seqs->num_of_sequences.VU=0;
00132 }
00133
00134 seqs->separators.VU=fid_CAST_POINTER(seqs->sspfile.content,const UINT);
00135 }
00136
00137 if((tables&fid_TABLE_DES) != 0)
00138 {
00139 seqs->descriptions.VU=fid_CAST_POINTER(seqs->sdsfile.content,const UINT);
00140 }
00141 return 0;
00142
00143 #if defined SEQUENCES_32BIT_VERSION && fid_WORDSIZE == 64
00144 bail_out:
00145 fid_error_throw(error,"Size of file \"%s\" too large to be represented in "
00146 "32 bits.\n",filename);
00147 return -1;
00148 #endif
00149 }
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171 static int next_sequence(UINT *seq_position, UINT *seq_count,
00172 Nextsequence nextseqfun, void *user_data,
00173 fid_Error *error)
00174 {
00175 if(*seq_count > 0)
00176 {
00177 if(nextseqfun(*seq_position,user_data,error) != 0)
00178 {
00179 return -1;
00180 }
00181 ++*seq_position;
00182 }
00183 ++*seq_count;
00184 return 0;
00185 }
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204 static int parse_fasta(const fid_Alphabet *alpha, const char *filename,
00205 const char *buffer, size_t bufsize,
00206 const Parserfuns *funs, void *user_data,
00207 fid_Error *error)
00208 {
00209 const char *ptr, *maxptr, *tempptr;
00210 UINT desc_size, seq_position=0;
00211 UINT num_of_sequences=0;
00212 char c;
00213 int in_sequence=0;
00214
00215 assert(buffer != NULL);
00216 assert(bufsize > 0);
00217 assert(buffer[0] == '>');
00218
00219 maxptr=buffer+bufsize;
00220 for(ptr=buffer; ptr < maxptr; ++ptr)
00221 {
00222 if(*ptr == '>')
00223 {
00224 in_sequence=0;
00225
00226
00227 tempptr=++ptr;
00228 SKIP_CHARS(ptr,maxptr,'\n');
00229 if(ptr == maxptr)
00230 {
00231 fid_error_throw(error,"Cannot parse FASTA file \"%s\", last header "
00232 "('>'-line) not followed by data.",filename);
00233 return -1;
00234 }
00235 else if(ptr < maxptr-1 &&
00236 alpha->char_to_sym[fid_CHAR_AS_INDEX(c=*(ptr+1))] == fid_UNDEF)
00237 {
00238 if(isprint((int)c))
00239 {
00240 fid_error_throw(error,"Cannot parse FASTA file \"%s\", invalid "
00241 "character '%c' in sequence data.",filename,c);
00242 }
00243 else
00244 {
00245 fid_error_throw(error,"Cannot parse FASTA file \"%s\", invalid "
00246 "character in sequence data (non-printable "
00247 "0x%02x).",filename,(unsigned int)c);
00248 }
00249 return -1;
00250 }
00251 if(funs->appenddescfun != NULL)
00252 {
00253 TO_UINT(desc_size,ptr-tempptr+1,error,
00254 fid_error_throw(error,"Cannot parse FASTA file \"%s\", "
00255 "description too long.",filename);
00256 return -1;);
00257 if(funs->appenddescfun(tempptr,desc_size,1,user_data,error) != 0)
00258 {
00259 return -1;
00260 }
00261 }
00262 }
00263 else if(*ptr != '\n')
00264 {
00265 if(!in_sequence)
00266 {
00267 if(next_sequence(&seq_position,&num_of_sequences,funs->nextseqfun,
00268 user_data,error) == -1)
00269 {
00270 return -1;
00271 }
00272 in_sequence=1;
00273 }
00274
00275
00276 for(tempptr=ptr; ptr < maxptr && (c=*ptr) != '\n'; ++ptr)
00277 {
00278 if(alpha->char_to_sym[fid_CHAR_AS_INDEX(c)] == fid_UNDEF)
00279 {
00280 if(isprint((int)c))
00281 {
00282 fid_error_throw(error,"Cannot parse FASTA file \"%s\", invalid "
00283 "character '%c' in sequence data.",filename,c);
00284 }
00285 else
00286 {
00287 fid_error_throw(error,"Cannot parse FASTA file \"%s\", invalid "
00288 "character in sequence data (non-printable "
00289 "0x%02x).",filename,(unsigned int)c);
00290 }
00291 return -1;
00292 }
00293 }
00294
00295 assert(tempptr < ptr);
00296 if(funs->appendlinefun(tempptr,ptr-tempptr,alpha,user_data,error) != 0)
00297 {
00298 return -1;
00299 }
00300 seq_position+=ptr-tempptr;
00301 }
00302 }
00303
00304 if(funs->appenddescfun != NULL)
00305 {
00306 return funs->appenddescfun(NULL,0,1,user_data,error);
00307 }
00308
00309 return 0;
00310 }
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334 static int read_uniprot_identifier(const char **ptr_ptr, const char *maxptr,
00335 const char *filename,
00336 Appenddesc appenddescfun, void *user_data,
00337 fid_Error *error)
00338 {
00339 const char *ptr=*ptr_ptr, *tempptr;
00340 char c=0;
00341 UINT desc_size;
00342
00343 if(appenddescfun != NULL)
00344 {
00345
00346 ptr+=3;
00347 SKIP_GAP(ptr,maxptr,' ');
00348 if(ptr == maxptr)
00349 {
00350 goto error_no_data;
00351 }
00352
00353
00354 tempptr=ptr;
00355 while(ptr < maxptr && (c=*ptr) != '\n' && c != ' ')
00356 {
00357 ++ptr;
00358 }
00359 if(ptr == maxptr)
00360 {
00361 goto error_no_data;
00362 }
00363
00364
00365 TO_UINT(desc_size,ptr-tempptr,error,
00366 fid_error_throw(error,"Cannot parse UniProt file \"%s\", "
00367 "description too long.",filename);
00368 return -1;);
00369 if(appenddescfun(tempptr,desc_size,1,user_data,error) != 0)
00370 {
00371 return -1;
00372 }
00373 }
00374
00375
00376 SKIP_CHARS(ptr,maxptr,'\n');
00377 ++ptr;
00378
00379 *ptr_ptr=ptr;
00380 return 0;
00381
00382 error_no_data:
00383 fid_error_throw(error,"Cannot parse UniProt file \"%s\", last "
00384 "identifier ('ID'-line) not followed by sequence "
00385 "data.",filename);
00386 return -1;
00387 }
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418 static int read_uniprot_descriptions(const char **ptr_ptr, const char *maxptr,
00419 int already_started, int all_descriptions,
00420 const char *filename,
00421 Appenddesc appenddescfun, void *user_data,
00422 fid_Error *error)
00423 {
00424 const char *ptr=*ptr_ptr, *tempptr;
00425 char c=0;
00426 UINT lines=0, desc_size;
00427
00428 while(memcmp(ptr,"DE ",3) == 0)
00429 {
00430
00431 tempptr=ptr+3;
00432 SKIP_GAP(tempptr,maxptr,' ');
00433 if(tempptr == maxptr)
00434 {
00435 goto error_no_data;
00436 }
00437 else if(c == '\n')
00438 {
00439
00440 ptr=tempptr+1;
00441 continue;
00442 }
00443
00444 --tempptr;
00445
00446
00447 ptr=tempptr+1;
00448 SKIP_CHARS(ptr,maxptr,'\n');
00449 if(ptr == maxptr)
00450 {
00451 goto error_no_data;
00452 }
00453 ++ptr;
00454
00455 if(appenddescfun != NULL && (all_descriptions || lines == 0))
00456 {
00457 TO_UINT(desc_size,ptr-tempptr,error,
00458 fid_error_throw(error,"Cannot parse UniProt file \"%s\", "
00459 "description too long.",filename);
00460 return -1;);
00461 if(appenddescfun(tempptr,desc_size,!already_started,user_data,error) != 0)
00462 {
00463 return -1;
00464 }
00465 already_started=1;
00466 }
00467 ++lines;
00468 }
00469
00470 *ptr_ptr=ptr;
00471 return 0;
00472
00473 error_no_data:
00474 fid_error_throw(error,"Cannot parse UniProt file \"%s\", last "
00475 "description ('DE'-lines) not followed by sequence "
00476 "data.",filename);
00477 return -1;
00478 }
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497 static int parse_uniprot(const fid_Alphabet *alpha, const char *filename,
00498 const char *buffer, size_t bufsize,
00499 const Parserfuns *funs, void *user_data,
00500 fid_Error *error)
00501 {
00502 const char *ptr, *maxptr, *tempptr;
00503 UINT seq_position=0;
00504 UINT num_of_sequences=0;
00505 char c;
00506 int in_sequence=0, wrote_identifier=0;
00507
00508 assert(buffer != NULL);
00509 assert(bufsize > 0);
00510
00511 maxptr=buffer+bufsize-2;
00512 for(ptr=buffer; ptr < maxptr; ++ptr)
00513 {
00514 if(memcmp(ptr,"SQ ",3) == 0)
00515 {
00516 if(!in_sequence)
00517 {
00518 if(next_sequence(&seq_position,&num_of_sequences,funs->nextseqfun,
00519 user_data,error) == -1)
00520 {
00521 return -1;
00522 }
00523 in_sequence=1;
00524 wrote_identifier=0;
00525 }
00526
00527
00528 SKIP_CHARS(ptr,maxptr,'\n');
00529
00530
00531 for(++ptr; ptr < maxptr && *ptr == ' '; ++ptr)
00532 {
00533 ++ptr;
00534 SKIP_GAP(ptr,maxptr,' ');
00535 tempptr=ptr;
00536 while(ptr < maxptr && (c=*ptr) != '\n' && !(c >= '0' && c <= '9'))
00537 {
00538 if(c == ' ')
00539 {
00540 if(tempptr < ptr &&
00541 funs->appendlinefun(tempptr,ptr-tempptr,alpha,user_data,
00542 error) != 0)
00543 {
00544 return -1;
00545 }
00546 seq_position+=ptr-tempptr;
00547 SKIP_GAP(ptr,maxptr,' ');
00548 tempptr=ptr;
00549 continue;
00550 }
00551
00552 if(alpha->char_to_sym[fid_CHAR_AS_INDEX(c)] == fid_UNDEF)
00553 {
00554 if(isprint((int)c))
00555 {
00556 fid_error_throw(error,"Cannot parse UniProt file \"%s\", invalid "
00557 "character '%c' in sequence data.",filename,c);
00558 }
00559 else
00560 {
00561 fid_error_throw(error,"Cannot parse UniProt file \"%s\", invalid "
00562 "character in sequence data (non-printable "
00563 "0x%02x).",filename,(unsigned int)c);
00564 }
00565 return -1;
00566 }
00567 ++ptr;
00568 }
00569 SKIP_CHARS(ptr,maxptr,'\n');
00570 ++ptr;
00571 }
00572 }
00573 else if(memcmp(ptr,"ID ",3) == 0)
00574 {
00575 in_sequence=0;
00576 if(read_uniprot_identifier(&ptr,maxptr,filename,funs->appenddescfun,
00577 user_data,error) == -1)
00578 {
00579 return -1;
00580 }
00581 wrote_identifier=1;
00582 }
00583 else if(memcmp(ptr,"DE ",3) == 0)
00584 {
00585 in_sequence=0;
00586 if(read_uniprot_descriptions(&ptr,maxptr,wrote_identifier,0,filename,
00587 funs->appenddescfun,user_data,error) == -1)
00588 {
00589 return -1;
00590 }
00591 wrote_identifier=0;
00592 }
00593 else
00594 {
00595
00596 SKIP_CHARS(ptr,maxptr,'\n');
00597 }
00598 }
00599
00600 if(funs->appenddescfun != NULL)
00601 {
00602 return funs->appenddescfun(NULL,0,1,user_data,error);
00603 }
00604
00605 return 0;
00606 }
00607
00608
00609
00610
00611
00612
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624
00625
00626 static int parse_sequences(const char *infilename, const char *buffer,
00627 size_t bufsize, fid_Fileformat format,
00628 const fid_Alphabet *alpha, const Parserfuns *funs,
00629 void *user_data, fid_Error *error)
00630 {
00631 switch(format)
00632 {
00633 case fid_FORMAT_FASTA:
00634 return parse_fasta(alpha,infilename,buffer,bufsize,funs,user_data,error);
00635 case fid_FORMAT_UNIPROT:
00636 return parse_uniprot(alpha,infilename,buffer,bufsize,funs,user_data,error);
00637 default:
00638 abort();
00639 }
00640 }
00641
00642
00643
00644
00645
00646
00647
00648
00649
00650
00651
00652
00653
00654
00655
00656
00657
00658
00659
00660
00661
00662
00663
00664
00665
00666 static int arrays_to_sequences(fid_Sequences *seqs, Sequencearrays *arrays,
00667 const fid_Alphabet *alpha,
00668 fid_Tablerequest tables, size_t padding,
00669 fid_Error *error)
00670 {
00671 fid_sequences_init(seqs,fid_UINTSIZE,alpha);
00672 FAKEFILE_FROM_ARRAY(seqs->tisfile,arrays->tis,fid_Symbol);
00673 FAKEFILE_FROM_ARRAY(seqs->oisfile,arrays->ois,char);
00674 FAKEFILE_FROM_ARRAY(seqs->desfile,arrays->des,char);
00675 FAKEFILE_FROM_ARRAY(seqs->sspfile,arrays->ssp.VU,UINT);
00676 FAKEFILE_FROM_ARRAY(seqs->sdsfile,arrays->sds.VU,UINT);
00677 if(seqs->tisfile.occupied >= padding) seqs->tisfile.occupied-=padding;
00678 if(seqs->oisfile.occupied >= padding) seqs->oisfile.occupied-=padding;
00679 return fid_sequences_realize(seqs,tables,error);
00680 }
00681
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700 static int parse_from_mem_to_mem(const char *infilename, const char *buffer,
00701 size_t bufsize, fid_Fileformat format,
00702 fid_Tablerequest tables, fid_Sequences *seqs,
00703 const fid_Alphabet *alpha, size_t padding,
00704 fid_Error *error)
00705 {
00706 Parserfuns funs;
00707 Sequencearrays arrays;
00708
00709 funs.appendlinefun=append_seq_to_dynarrays;
00710 funs.appenddescfun=(tables&fid_TABLE_DES)?append_desc_to_dynarrays:NULL;
00711 funs.nextseqfun=next_sequence_to_dynarrays;
00712
00713 fid_DYNARRAY_INIT(&arrays.tis,fid_Symbol);
00714 fid_DYNARRAY_INIT(&arrays.ois,char);
00715 fid_DYNARRAY_INIT(&arrays.des,char);
00716 fid_DYNARRAY_INIT(&arrays.ssp.VU,UINT);
00717 fid_DYNARRAY_INIT(&arrays.sds.VU,UINT);
00718 arrays.filename=infilename;
00719
00720 if((tables&fid_TABLE_TIS) != 0)
00721 {
00722 fid_DYNARRAY_ENSURE_NEXT(&arrays.tis,fid_Symbol,fid_MAPPEDFILE_GROWSIZE,
00723 goto cleanup;);
00724 }
00725 if((tables&fid_TABLE_OIS) != 0)
00726 {
00727 fid_DYNARRAY_ENSURE_NEXT(&arrays.ois,char,fid_MAPPEDFILE_GROWSIZE,
00728 goto cleanup;);
00729 }
00730 if((tables&(fid_TABLE_TIS|fid_TABLE_OIS)) != 0)
00731 {
00732 fid_DYNARRAY_ENSURE_NEXT(&arrays.ssp.VU,UINT,fid_MAPPEDFILE_GROWSIZE,
00733 goto cleanup;);
00734 }
00735 if((tables&fid_TABLE_DES) != 0)
00736 {
00737 fid_DYNARRAY_ENSURE_NEXT(&arrays.des,char,fid_MAPPEDFILE_GROWSIZE,
00738 goto cleanup;);
00739 fid_DYNARRAY_ENSURE_NEXT(&arrays.sds.VU,UINT,fid_MAPPEDFILE_GROWSIZE,
00740 goto cleanup;);
00741 }
00742
00743 if(parse_sequences(infilename,buffer,bufsize,format,alpha,&funs,&arrays,
00744 error) == 0)
00745 {
00746
00747 if(append_padding(&arrays,padding,error) == 0 &&
00748 arrays_to_sequences(seqs,&arrays,alpha,tables,padding,error) == 0)
00749 {
00750 return 0;
00751 }
00752 }
00753
00754 if(0)
00755 {
00756 cleanup:
00757 fid_OUTOFMEM(error);
00758 }
00759
00760 fid_DYNARRAY_FREE(&arrays.tis,fid_Symbol);
00761 fid_DYNARRAY_FREE(&arrays.ois,char);
00762 fid_DYNARRAY_FREE(&arrays.des,char);
00763 fid_DYNARRAY_FREE(&arrays.ssp.VU,UINT);
00764 fid_DYNARRAY_FREE(&arrays.sds.VU,UINT);
00765 return -1;
00766 }
00767
00768
00769
00770
00771
00772
00773
00774
00775
00776
00777
00778
00779 UINT fid_sequences_offset_to_index(const fid_Sequences *seqs,
00780 UINT offset)
00781 {
00782 UINT left, right, pivot;
00783 UINT entry;
00784
00785 assert(seqs != NULL);
00786 assert(seqs->num_of_sequences.VU > 0);
00787 assert(seqs->num_of_sequences.VU == (UINT)1 || seqs->separators.VU != NULL);
00788 WSIZECHECK(seqs);
00789
00790 if(seqs->num_of_sequences.VU == (UINT)1)
00791 {
00792 return 0;
00793 }
00794
00795 left=0;
00796 right=seqs->num_of_sequences.VU-1;
00797 while(left+1 < right)
00798 {
00799 pivot=left+((right-left) >> 1);
00800 entry=seqs->separators.VU[pivot];
00801 if(entry < offset)
00802 {
00803 left=pivot;
00804 }
00805 else
00806 {
00807 assert(entry != offset);
00808 right=pivot;
00809 }
00810 }
00811
00812 if(left == 0 && seqs->separators.VU[left] > offset)
00813 {
00814 return 0;
00815 }
00816 else if(left == right || seqs->separators.VU[left] < offset)
00817 {
00818 return left+1;
00819 }
00820 else
00821 {
00822 return right+1;
00823 }
00824 }
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837 void fid_sequences_index_to_boundaries(const fid_Sequences *seqs,
00838 UINT seqindex,
00839 UINT *left, UINT *right)
00840 {
00841 assert(seqs != NULL);
00842 assert(seqs->num_of_sequences.VU > 0);
00843 assert(seqs->num_of_sequences.VU == (UINT)1 || seqs->separators.VU != NULL);
00844 assert(seqindex < seqs->num_of_sequences.VU);
00845 assert(left != NULL);
00846 assert(right != NULL);
00847 WSIZECHECK(seqs);
00848
00849 if(seqindex == 0)
00850 {
00851 *left=0;
00852 }
00853 else
00854 {
00855 *left=seqs->separators.VU[seqindex-1]+1;
00856 }
00857
00858 if(seqindex+1 < seqs->num_of_sequences.VU)
00859 {
00860 *right=seqs->separators.VU[seqindex]-1;
00861 }
00862 else
00863 {
00864 *right=seqs->total_length.VU-1;
00865 }
00866
00867 assert(*left <= *right);
00868 }
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886 void fid_sequences_offset_to_boundaries(const fid_Sequences *seqs,
00887 UINT offset,
00888 UINT *left, UINT *right)
00889 {
00890 WSIZECHECK(seqs);
00891 fid_sequences_index_to_boundaries(seqs,
00892 fid_sequences_offset_to_index(seqs,offset),
00893 left,right);
00894 }
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916 int fid_sequences_iterate_range(const fid_Sequences *seqs,
00917 UINT from, UINT to,
00918 const fid_Sequenceiterfun iterfun,
00919 void *user_data)
00920 {
00921 UINT seqnum, length, offset, last;
00922 int retval;
00923
00924 assert(seqs != NULL);
00925 assert(seqs->num_of_sequences.VU <= (UINT)1 || seqs->separators.VU != NULL);
00926 assert(from <= to);
00927 assert(to < seqs->num_of_sequences.VU);
00928 assert(iterfun != NULL);
00929 WSIZECHECK(seqs);
00930
00931 if(seqs->num_of_sequences.VU > (UINT)1)
00932 {
00933 seqnum=from;
00934 if(seqnum < seqs->num_of_sequences.VU-1)
00935 {
00936 if(seqnum == 0)
00937 {
00938 offset=0;
00939 length=seqs->separators.VU[0];
00940 }
00941 else
00942 {
00943 offset=seqs->separators.VU[seqnum-1]+1;
00944 length=seqs->separators.VU[seqnum]-offset;
00945 }
00946
00947 if(to == seqs->num_of_sequences.VU-1)
00948 {
00949 last=seqs->num_of_sequences.VU-2;
00950 }
00951 else
00952 {
00953 last=to;
00954 }
00955
00956 retval=iterfun(seqs,seqnum,seqs->tisfile.content+offset,length,user_data);
00957
00958 for(++seqnum; retval == 0 && seqnum <= last; ++seqnum)
00959 {
00960 offset+=length+1;
00961 length=seqs->separators.VU[seqnum]-offset;
00962 retval=iterfun(seqs,seqnum,seqs->tisfile.content+offset,length,user_data);
00963 }
00964
00965 if(retval == 0 && last != to)
00966 {
00967
00968 assert(to == seqs->num_of_sequences.VU-1);
00969 offset+=length+1;
00970 length=seqs->total_length.VU-offset;
00971 retval=iterfun(seqs,seqnum,seqs->tisfile.content+offset,length,user_data);
00972 }
00973 }
00974 else
00975 {
00976
00977 assert(to == seqs->num_of_sequences.VU-1);
00978 offset=seqs->separators.VU[seqnum-1]+1;
00979 length=seqs->total_length.VU-offset;
00980 retval=iterfun(seqs,seqnum,seqs->tisfile.content+offset,length,user_data);
00981 }
00982 }
00983 else
00984 {
00985 assert(seqs->num_of_sequences.VU == (UINT)1);
00986 assert(from == 0 && to == 0);
00987 retval=iterfun(seqs,0,seqs->tisfile.content,seqs->total_length.VU,user_data);
00988 }
00989
00990 return retval;
00991 }
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008 int fid_sequences_iterate(const fid_Sequences *seqs,
01009 const fid_Sequenceiterfun iterfun, void *user_data)
01010 {
01011 assert(seqs != NULL);
01012 assert(seqs->num_of_sequences.VU <= (UINT)1 || seqs->separators.VU != NULL);
01013 assert(iterfun != NULL);
01014 WSIZECHECK(seqs);
01015
01016 if(seqs->num_of_sequences.VU > 0)
01017 {
01018 return fid_sequences_iterate_range(seqs,0,seqs->num_of_sequences.VU-1,
01019 iterfun,user_data);
01020 }
01021 else
01022 {
01023 return 0;
01024 }
01025 }
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044 void fid_sequences_dump_range(const fid_Symbol *seq, UINT length,
01045 const fid_Alphabet *alpha, const char *str,
01046 int stop_at_separator, FILE *stream)
01047 {
01048 UINT i;
01049
01050 assert(seq != NULL);
01051 assert(alpha != NULL);
01052
01053 if(stream == NULL)
01054 {
01055 return;
01056 }
01057
01058 if(str != NULL)
01059 {
01060 fprintf(stream,"%s",str);
01061 }
01062 if(stop_at_separator)
01063 {
01064 for(i=0; i < length && seq[i] != fid_SEPARATOR; ++i)
01065 {
01066 (void)fputc(fid_PRINT_SYMBOL(alpha,seq[i]),stream);
01067 }
01068 }
01069 else
01070 {
01071 for(i=0; i < length; ++i)
01072 {
01073 (void)fputc(fid_PRINT_SYMBOL(alpha,seq[i]),stream);
01074 }
01075 }
01076 (void)fputc('\n',stream);
01077 }
01078
01079
01080
01081