00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef SEQUENCES_H
00022 #define SEQUENCES_H
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 typedef struct
00037 {
00038 fid_Uintsize uisize;
00039 const fid_Alphabet *alpha;
00040
00041 fid_Uint48 num_of_sequences;
00042 fid_Uint48 total_length;
00043
00044
00045 fid_Uint48constptr descriptions;
00046
00047 fid_Uint48constptr separators;
00048
00049 double distribution[UCHAR_MAX+1];
00050 fid_Mappedfile tisfile;
00051 fid_Mappedfile oisfile;
00052
00053 fid_Mappedfile sspfile;
00054 fid_Mappedfile desfile;
00055 fid_Mappedfile sdsfile;
00056 } fid_Sequences;
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067 typedef enum
00068 {
00069 fid_FORMAT_UNDEF,
00070
00071 fid_FORMAT_FASTA,
00072 fid_FORMAT_UNIPROT
00073 } fid_Fileformat;
00074
00075
00076
00077
00078
00079
00080
00081
00082 typedef struct
00083 {
00084 size_t seqlen;
00085
00086 size_t desclen;
00087
00088 size_t num_of_sequences;
00089 size_t input_file_size;
00090 } fid_Sequencefileinfo;
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101 typedef fid_Uint32 fid_Tablerequest;
00102
00103
00104 #define fid_TABLE_NONE ((fid_Tablerequest)0x00000000)
00105
00106
00107 #define fid_TABLE_TIS ((fid_Tablerequest)0x00000001)
00108
00109
00110 #define fid_TABLE_OIS ((fid_Tablerequest)0x00000002)
00111
00112
00113 #define fid_TABLE_DES ((fid_Tablerequest)0x00000004)
00114
00115
00116 #define fid_TABLES_ONLINE (fid_TABLE_TIS|fid_TABLE_OIS|fid_TABLE_DES)
00117
00118
00119 #define fid_TABLES_ONLINE_MASK ((fid_Tablerequest)0x000000ff)
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130 #if defined DEBUG && defined __GNUC__ && !defined __ICC
00131 #ifdef __cplusplus
00132 #include <cassert>
00133 #else
00134 #include <assert.h>
00135 #endif
00136 #define fid_READ_SYMBOL(SEQS,I)\
00137 ({ fid_SWITCH48((SEQS)->uisize,\
00138 assert((I) < (SEQS)->total_length.v_uint32);,\
00139 assert((I) < (SEQS)->total_length.v_uint64););\
00140 (SEQS)->tisfile.content[I]; })
00141 #else
00142 #define fid_READ_SYMBOL(SEQS,I) (SEQS)->tisfile.content[I]
00143 #endif
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153 #define fid_SEQUENCE_DESCRIPTION_32(S,I)\
00154 ((const char *)((S)->desfile.content+(S)->descriptions.v_uint32[I]))
00155
00156
00157 #define fid_SEQUENCE_DESCRIPTION_64(S,I)\
00158 ((const char *)((S)->desfile.content+(S)->descriptions.v_uint64[I]))
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168 #define fid_SEQUENCE_DESCRIPTION_LENGTH_32(S,I)\
00169 ((S)->descriptions.v_uint32[(I)+1]-(S)->descriptions.v_uint32[I])
00170
00171
00172 #define fid_SEQUENCE_DESCRIPTION_LENGTH_64(S,I)\
00173 ((S)->descriptions.v_uint64[(I)+1]-(S)->descriptions.v_uint64[I])
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194 typedef int (*fid_Sequenceiterfun_32)(const fid_Sequences *seqs,
00195 fid_Uint32 seqnum,
00196 const fid_Symbol *sequence,
00197 fid_Uint32 seqlen, void *user_data);
00198
00199
00200
00201
00202
00203 typedef int (*fid_Sequenceiterfun_64)(const fid_Sequences *seqs,
00204 fid_Uint64 seqnum,
00205 const fid_Symbol *sequence,
00206 fid_Uint64 seqlen, void *user_data);
00207
00208 #ifdef __cplusplus
00209 extern "C" {
00210 #endif
00211 void fid_sequences_init(fid_Sequences *seqs, fid_Uintsize uisize,
00212 const fid_Alphabet *alpha);
00213 int fid_sequences_map(fid_Sequences *seqs, const char *basefilename,
00214 fid_Tablerequest tables, fid_Filenamebuffer *fnamebuf,
00215 fid_Error *error);
00216 int fid_sequences_realize(fid_Sequences *seqs, fid_Tablerequest tables,
00217 fid_Error *error);
00218 int fid_sequences_parse_from_file_to_file(const char *infilename,
00219 fid_Fileformat format,
00220 fid_Tablerequest tables,
00221 const char *basefilename,
00222 const fid_Alphabet *alpha,
00223 fid_Uintsize uisize,
00224 size_t *input_file_size,
00225 fid_Error *error);
00226 int fid_sequences_parse_from_file_to_memory(const char *infilename,
00227 fid_Fileformat format,
00228 fid_Tablerequest tables,
00229 fid_Sequences *result,
00230 const fid_Alphabet *alpha,
00231 size_t padding, fid_Uintsize uisize,
00232 size_t *input_file_size,
00233 fid_Error *error);
00234 int fid_sequences_parse_from_file_to_lengths(const char *infilename,
00235 fid_Fileformat format,
00236 fid_Sequencefileinfo *seqinfo,
00237 const fid_Alphabet *alpha,
00238 fid_Error *error);
00239 int fid_sequences_parse_from_memory_to_file(const char *infilename,
00240 const char *buffer, size_t bufsize,
00241 fid_Fileformat format,
00242 fid_Tablerequest tables,
00243 const char *basefilename,
00244 const fid_Alphabet *alpha,
00245 fid_Uintsize uisize,
00246 fid_Error *error);
00247 int fid_sequences_parse_from_memory_to_memory(const char *infilename,
00248 const char *buffer,
00249 size_t bufsize,
00250 fid_Fileformat format,
00251 fid_Tablerequest tables,
00252 fid_Sequences *result,
00253 const fid_Alphabet *alpha,
00254 size_t padding,
00255 fid_Uintsize uisize,
00256 fid_Error *error);
00257 int fid_sequences_parse_from_memory_to_lengths(const char *infilename,
00258 const char *buffer,
00259 size_t bufsize,
00260 fid_Fileformat format,
00261 fid_Sequencefileinfo *seqinfo,
00262 const fid_Alphabet *alpha,
00263 fid_Error *error);
00264
00265
00266 #line 265
00267 fid_Uint32 fid_sequences_offset_to_index_32(const fid_Sequences *seqs,
00268 fid_Uint32 offset);
00269
00270
00271 #line 265
00272 fid_Uint64 fid_sequences_offset_to_index_64(const fid_Sequences *seqs,
00273 fid_Uint64 offset);
00274
00275
00276 #line 267
00277 void fid_sequences_index_to_boundaries_32(const fid_Sequences *seqs,
00278 fid_Uint32 seqindex,
00279 fid_Uint32 *left, fid_Uint32 *right);
00280
00281
00282 #line 267
00283 void fid_sequences_index_to_boundaries_64(const fid_Sequences *seqs,
00284 fid_Uint64 seqindex,
00285 fid_Uint64 *left, fid_Uint64 *right);
00286
00287
00288 #line 270
00289 void fid_sequences_offset_to_boundaries_32(const fid_Sequences *seqs,
00290 fid_Uint32 offset,
00291 fid_Uint32 *left, fid_Uint32 *right);
00292
00293
00294 #line 270
00295 void fid_sequences_offset_to_boundaries_64(const fid_Sequences *seqs,
00296 fid_Uint64 offset,
00297 fid_Uint64 *left, fid_Uint64 *right);
00298
00299
00300 #line 273
00301 int fid_sequences_iterate_range_32(const fid_Sequences *seqs,
00302 fid_Uint32 from, fid_Uint32 to,
00303 const fid_Sequenceiterfun_32 iterfun,
00304 void *user_data);
00305
00306
00307 #line 273
00308 int fid_sequences_iterate_range_64(const fid_Sequences *seqs,
00309 fid_Uint64 from, fid_Uint64 to,
00310 const fid_Sequenceiterfun_64 iterfun,
00311 void *user_data);
00312
00313
00314 #line 277
00315 int fid_sequences_iterate_32(const fid_Sequences *seqs,
00316 const fid_Sequenceiterfun_32 iterfun, void *user_data);
00317
00318
00319 #line 277
00320 int fid_sequences_iterate_64(const fid_Sequences *seqs,
00321 const fid_Sequenceiterfun_64 iterfun, void *user_data);
00322 #line 280
00323 void fid_sequences_compute_distribution(fid_Sequences *seqs);
00324 void fid_sequences_free(fid_Sequences *seqs);
00325
00326
00327 #line 283
00328 void fid_sequences_dump_range_32(const fid_Symbol *seq, fid_Uint32 length,
00329 const fid_Alphabet *alpha, const char *str,
00330 int stop_at_separator, FILE *stream);
00331
00332
00333 #line 283
00334 void fid_sequences_dump_range_64(const fid_Symbol *seq, fid_Uint64 length,
00335 const fid_Alphabet *alpha, const char *str,
00336 int stop_at_separator, FILE *stream);
00337 #line 287
00338 #ifdef __cplusplus
00339 }
00340 #endif
00341
00342
00343 #endif