Data Structures | |
struct | fid_Projectfile |
Representation of a project file. More... | |
struct | fid_Suffixarray |
Representation of an enhanced suffix array. More... | |
struct | fid_Suffixinterval_32 |
Representation of a suffix-interval in 32 bit enhanced suffix array. More... | |
struct | fid_Suffixinterval_64 |
Representation of a suffix-interval in 64 bit enhanced suffix array. More... | |
Defines | |
#define | fid_SKIPTREE_LOOP(ESA, SKPTAB, NEXT, LCP, DEPTH, MAX, REL) |
The inner skip chain loop. | |
#define | fid_SKIPTREE_DEPTH0_LOOP(ESA, SKPTAB, NEXT, LCP, MAX) |
The inner skip chain loop for special character at depth 0. | |
#define | fid_SKIPTREE_COMMON(ESA, SKPTAB, NEXT, START, LCP, DEPTH, MAX) |
Skip tree if the last character read from the start suffix was a common character. | |
#define | fid_SKIPTREE_SPECIAL(ESA, SKPTAB, NEXT, START, LCP, DEPTH, MAX) |
Skip tree if the last character read from the start suffix was a special character. | |
#define | fid_SKIPTREE(ESA, SKPTAB, NEXT, START, LCP, DEPTH, MAX, COMMON) |
Skip a subtree of a virtual suffix tree via skip table. | |
#define | fid_SUFFIXINTERVAL_SINGLETON_32 (~(fid_Uint32)0) |
Special value for fid_Suffixinterval_32::depth to indicate a singleton. | |
#define | fid_SUFFIXINTERVAL_SINGLETON_64 (~(fid_Uint64)0) |
Special value for fid_Suffixinterval_64::depth to indicate a singleton. | |
#define | fid_TABLE_SUF ((fid_Tablerequest)0x00000100) |
Table request: suffix array. | |
#define | fid_TABLE_LCP ((fid_Tablerequest)0x00000200) |
Table request: lcp table. | |
#define | fid_TABLE_SKP ((fid_Tablerequest)0x00000400) |
Table request: skip table. | |
#define | fid_TABLE_STI ((fid_Tablerequest)0x00000800) |
Table request: inverse suffix array. | |
#define | fid_TABLES_OFFLINE (fid_TABLE_SUF|fid_TABLE_LCP|fid_TABLE_SKP|fid_TABLE_STI) |
Table request: all tables related to offline matching. | |
#define | fid_TABLES_OFFLINE_MASK ((fid_Tablerequest)0xffffff00) |
Table request bit mask: bits related to offline matching. | |
#define | fid_TABLES_ALL (fid_TABLES_ONLINE|fid_TABLES_OFFLINE) |
Table request: all tables. | |
#define | fid_INTERVALBUFFERSIZE ((size_t)fid_SYMBOLMAX+2) |
Size of the buffer for fid_suffixarray_get_intervals(). | |
#define | fid_LCP_32(L, ESA, I) |
Determine lcp-value of a 32 bit suffix. | |
#define | fid_LCP_64(L, ESA, I) |
Determine lcp-value of a 64 bit suffix. | |
#define | fid_suffixinterval_init(SI, D, L, R) |
Initialize a fid_Suffixinterval structure. | |
#define | fid_suffixinterval_init_root_32(SI, ESA) fid_suffixinterval_init(SI,0,0,(ESA)->sequences.total_length.v_uint32-1) |
Initialize a fid_Suffixinterval_32 structure corresponding to the root interval of a given enhanced suffix array. | |
#define | fid_suffixinterval_init_root_64(SI, ESA) fid_suffixinterval_init(SI,0,0,(ESA)->sequences.total_length.v_uint64-1) |
Initialize a fid_Suffixinterval_64 structure corresponding to the root interval of a given enhanced suffix array. | |
#define | fid_TRAVERSE_CONTINUE ((int)0) |
Special return value for fid_Esatraversecallback_32 and fid_Esatraversecallback_32 functions. | |
#define | fid_TRAVERSE_SKIP ((int)-1) |
Special return value for fid_Esatraversecallback_32 and fid_Esatraversecallback_32 functions. | |
Typedefs | |
typedef int(* | fid_Esatraversecallback_32 )(const fid_Suffixarray *esa, const fid_Suffixinterval_32 *si, fid_Uint32 pdepth, fid_Error *error, void *user_data) |
Callback function prototype for enhanced suffix array traversals. | |
typedef int(* | fid_Esatraversecallback_64 )(const fid_Suffixarray *esa, const fid_Suffixinterval_64 *si, fid_Uint64 pdepth, fid_Error *error, void *user_data) |
Callback function prototype for enhanced suffix array traversals. | |
Functions | |
int | fid_projectfile_init (fid_Projectfile *project, const char *prjbasename, fid_Error *error) |
Initialize a project file data structure. | |
int | fid_projectfile_parse_from_file (fid_Projectfile *project, const char *prjbasename, fid_Error *error) |
Parse project file content from file. | |
int | fid_projectfile_write (const fid_Projectfile *project, fid_Error *error) |
Write textual representation of project file data structure to file. | |
void | fid_projectfile_free (fid_Projectfile *project) |
Free memory associated with a project file structure. | |
void | fid_projectfile_dump (const fid_Projectfile *project, FILE *stream) |
Print contents of fid_Projectfile structure to output stream. | |
int | fid_suffixarray_load_from_project (fid_Suffixarray *esa, fid_Projectfile *project, fid_Tablerequest tables, fid_Error *error) |
Load enhanced suffix array based on the given project data structure. | |
int | fid_suffixarray_load_special (fid_Suffixarray *esa, const char *prjbasename, fid_Tablerequest tables, fid_Uintsize uisize_request, int ignore_byteorder, fid_Error *error) |
Load enhanced suffix array project and initialize data structures. | |
int | fid_suffixarray_load (fid_Suffixarray *esa, const char *prjbasename, fid_Tablerequest tables, fid_Error *error) |
Load enhanced suffix array project and initialize data structures. | |
int | fid_projectfile_init_from_suffixarray (fid_Projectfile *project, const fid_DBfiles *dbfiles, const char *prjbasename, const fid_Suffixarray *esa, fid_Error *error) |
Initialize fid_Projectfile structure from mapped enhanced suffix array. | |
void | fid_suffixarray_init (fid_Suffixarray *esa, fid_Uintsize uisize) |
Initialize enhanced suffix array structure. | |
int | fid_suffixarray_load_from_files (fid_Suffixarray *esa, const char *basefilename, fid_Tablerequest tables, fid_Uintsize uisize, fid_Error *error) |
Load enhanced suffix array from disk. | |
int | fid_suffixarray_realize (fid_Suffixarray *esa, fid_Tablerequest tables, fid_Error *error) |
Update data fields in fid_Suffixarray structure according to mapped files. | |
int | fid_suffixarray_traverse_32 (const fid_Suffixarray *esa, fid_Esatraversecallback_32 callback, void *user_data, fid_Error *error) |
32 bit version of fid_suffixarray_traverse(). | |
int | fid_suffixarray_traverse_64 (const fid_Suffixarray *esa, fid_Esatraversecallback_64 callback, void *user_data, fid_Error *error) |
64 bit version of fid_suffixarray_traverse(). | |
fid_Uint32 | fid_suffixarray_get_intervals_32 (const fid_Suffixarray *esa, const fid_Suffixinterval_32 *si, fid_Uint32 *intervals) |
32 bit version of fid_suffixarray_get_intervals(). | |
fid_Uint64 | fid_suffixarray_get_intervals_64 (const fid_Suffixarray *esa, const fid_Suffixinterval_64 *si, fid_Uint64 *intervals) |
64 bit version of fid_suffixarray_get_intervals(). | |
int | fid_suffixarray_find_embedded_interval_32 (const fid_Suffixarray *esa, fid_Suffixinterval_32 *si, fid_Symbol symbol) |
32 bit version of fid_suffixarray_find_embedded_interval(). | |
int | fid_suffixarray_find_embedded_interval_64 (const fid_Suffixarray *esa, fid_Suffixinterval_64 *si, fid_Symbol symbol) |
64 bit version of fid_suffixarray_find_embedded_interval(). | |
int | fid_suffixarray_extend_interval_32 (const fid_Suffixarray *esa, const fid_Symbol *pattern, fid_Uint32 plen, fid_Suffixinterval_32 *si) |
32 bit version of fid_suffixarray_extend_interval(). | |
int | fid_suffixarray_extend_interval_64 (const fid_Suffixarray *esa, const fid_Symbol *pattern, fid_Uint64 plen, fid_Suffixinterval_64 *si) |
64 bit version of fid_suffixarray_extend_interval(). | |
int | fid_suffixarray_find_interval_32 (const fid_Suffixarray *esa, const fid_Symbol *pattern, fid_Uint32 plen, fid_Suffixinterval_32 *si) |
32 bit version of fid_suffixarray_find_interval(). | |
int | fid_suffixarray_find_interval_64 (const fid_Suffixarray *esa, const fid_Symbol *pattern, fid_Uint64 plen, fid_Suffixinterval_64 *si) |
64 bit version of fid_suffixarray_find_interval(). | |
fid_Uint32 | fid_suffixarray_find_large_lcp_32 (const fid_Suffixarray *esa, fid_Uint32 suffix) |
32 bit version of fid_suffixarray_find_large_lcp(). | |
fid_Uint64 | fid_suffixarray_find_large_lcp_64 (const fid_Suffixarray *esa, fid_Uint64 suffix) |
64 bit version of fid_suffixarray_find_large_lcp(). | |
fid_Uint32 | fid_suffixarray_suffix_length_32 (const fid_Suffixarray *esa, fid_Uint32 suffix) |
32 bit version of fid_suffixarray_suffix_length(). | |
fid_Uint64 | fid_suffixarray_suffix_length_64 (const fid_Suffixarray *esa, fid_Uint64 suffix) |
64 bit version of fid_suffixarray_suffix_length(). | |
void | fid_suffixarray_compute_distribution_32 (fid_Suffixarray *esa) |
32 bit version of fid_suffixarray_compute_distribution(). | |
void | fid_suffixarray_compute_distribution_64 (fid_Suffixarray *esa) |
64 bit version of fid_suffixarray_compute_distribution(). | |
void | fid_suffixarray_free (fid_Suffixarray *esa) |
Free enhanced suffix array structure. | |
void | fid_suffixarray_dump (const fid_Suffixarray *esa, FILE *stream) |
Show information about an enhanced suffix array. | |
void | fid_suffixarray_dump_intervals_32 (const fid_Suffixarray *esa, const fid_Uint32 intervals[fid_INTERVALBUFFERSIZE], FILE *stream) |
32 bit version of fid_suffixarray_dump_intervals(). | |
void | fid_suffixarray_dump_intervals_64 (const fid_Suffixarray *esa, const fid_Uint64 intervals[fid_INTERVALBUFFERSIZE], FILE *stream) |
64 bit version of fid_suffixarray_dump_intervals(). | |
void | fid_suffixarray_dump_suffix_32 (const fid_Suffixarray *esa, fid_Uint32 suffix, fid_Uint32 length, FILE *stream) |
32 bit version of fid_suffixarray_dump_suffix(). | |
void | fid_suffixarray_dump_suffix_64 (const fid_Suffixarray *esa, fid_Uint64 suffix, fid_Uint64 length, FILE *stream) |
64 bit version of fid_suffixarray_dump_suffix(). | |
void | fid_suffixarray_print_32 (const fid_Suffixarray *esa, fid_Tablerequest request, FILE *stream) |
32 bit version of fid_suffixarray_print(). | |
void | fid_suffixarray_print_64 (const fid_Suffixarray *esa, fid_Tablerequest request, FILE *stream) |
64 bit version of fid_suffixarray_print(). | |
fid_Uint32 | fid_suffixinterval_lcpvalue_32 (const fid_Suffixinterval_32 *si, const fid_Suffixarray *esa) |
32 bit version of fid_suffixinterval_lcpvalue(). | |
fid_Uint64 | fid_suffixinterval_lcpvalue_64 (const fid_Suffixinterval_64 *si, const fid_Suffixarray *esa) |
64 bit version of fid_suffixinterval_lcpvalue(). | |
void | fid_suffixinterval_to_lcpinterval_32 (fid_Suffixinterval_32 *si, const fid_Suffixarray *esa) |
32 bit version of fid_suffixinterval_to_lcpinterval(). | |
void | fid_suffixinterval_to_lcpinterval_64 (fid_Suffixinterval_64 *si, const fid_Suffixarray *esa) |
64 bit version of fid_suffixinterval_to_lcpinterval(). | |
void | fid_suffixinterval_find_right_32 (fid_Suffixinterval_32 *si, const fid_Suffixarray *esa) |
32 bit version of fid_suffixinterval_find_right(). | |
void | fid_suffixinterval_find_right_64 (fid_Suffixinterval_64 *si, const fid_Suffixarray *esa) |
64 bit version of fid_suffixinterval_find_right(). | |
fid_Uint32 | fid_suffixinterval_homepos_32 (const fid_Suffixinterval_32 *si, const fid_Suffixarray *esa) |
32 bit version of fid_suffixinterval_homepos(). | |
fid_Uint64 | fid_suffixinterval_homepos_64 (const fid_Suffixinterval_64 *si, const fid_Suffixarray *esa) |
64 bit version of fid_suffixinterval_homepos(). | |
void | fid_suffixinterval_dump_32 (const fid_Suffixinterval_32 *si, const fid_Suffixarray *esa, FILE *stream) |
32 bit version of fid_suffixinterval_dump(). | |
void | fid_suffixinterval_dump_64 (const fid_Suffixinterval_64 *si, const fid_Suffixarray *esa, FILE *stream) |
64 bit version of fid_suffixinterval_dump(). | |
char * | fid_tablerequest_to_string (fid_Tablerequest request, const char *sep) |
Construct string from table request. |
#define fid_SKIPTREE_LOOP | ( | ESA, | |||
SKPTAB, | |||||
NEXT, | |||||
LCP, | |||||
DEPTH, | |||||
MAX, | |||||
REL | ) |
Value:
while((NEXT) < (MAX) && (LCP) REL (DEPTH))\
{\
NEXT=(SKPTAB)[NEXT]+1;\
fid_LCP(LCP,ESA,NEXT);\
}
If the last character read from the start suffix was a special character and depth is 0, then use SKIPTREE_DEPTH0_LOOP() instead to prevent reading invalid memory.
Definition at line 32 of file skiptree.h.
#define fid_SKIPTREE_DEPTH0_LOOP | ( | ESA, | |||
SKPTAB, | |||||
NEXT, | |||||
LCP, | |||||
MAX | ) |
Value:
while((NEXT) < (MAX))\ {\ NEXT=(SKPTAB)[NEXT]+1;\ if((NEXT) < (MAX))\ {\ fid_LCP(LCP,ESA,NEXT);\ }\ }
Definition at line 42 of file skiptree.h.
#define fid_SKIPTREE_COMMON | ( | ESA, | |||
SKPTAB, | |||||
NEXT, | |||||
START, | |||||
LCP, | |||||
DEPTH, | |||||
MAX | ) |
Value:
if((START) < (MAX))\ {\ (NEXT)=(START)+1;\ fid_LCP(LCP,ESA,NEXT);\ fid_SKIPTREE_LOOP(ESA,SKPTAB,NEXT,LCP,DEPTH,MAX,>);\ }\ else\ {\ NEXT=(MAX);\ fid_LCP(LCP,ESA,NEXT);\ }
Definition at line 55 of file skiptree.h.
#define fid_SKIPTREE_SPECIAL | ( | ESA, | |||
SKPTAB, | |||||
NEXT, | |||||
START, | |||||
LCP, | |||||
DEPTH, | |||||
MAX | ) |
Value:
if((START) < (MAX))\ {\ (NEXT)=(START)+1;\ fid_LCP(LCP,ESA,NEXT);\ if((DEPTH) > 0)\ {\ fid_SKIPTREE_LOOP(ESA,SKPTAB,NEXT,LCP,DEPTH,MAX,>=);\ }\ else\ {\ fid_SKIPTREE_DEPTH0_LOOP(ESA,SKPTAB,NEXT,LCP,MAX);\ }\ }\ else\ {\ NEXT=(MAX);\ fid_LCP(LCP,ESA,NEXT);\ }
Definition at line 71 of file skiptree.h.
#define fid_SKIPTREE | ( | ESA, | |||
SKPTAB, | |||||
NEXT, | |||||
START, | |||||
LCP, | |||||
DEPTH, | |||||
MAX, | |||||
COMMON | ) |
Value:
if((START) < (MAX))\ {\ (NEXT)=(START)+1;\ fid_LCP(LCP,ESA,NEXT);\ if(COMMON)\ {\ fid_SKIPTREE_LOOP(ESA,SKPTAB,NEXT,LCP,DEPTH,MAX,>);\ }\ else if((DEPTH) > 0)\ {\ fid_SKIPTREE_LOOP(ESA,SKPTAB,NEXT,LCP,DEPTH,MAX,>=);\ }\ else\ {\ fid_SKIPTREE_DEPTH0_LOOP(ESA,SKPTAB,NEXT,LCP,MAX);\ }\ }\ else\ {\ NEXT=(MAX);\ fid_LCP(LCP,ESA,NEXT);\ }
The macro will always update NEXT
and LCP
, the other arguments are used read-only.
ESA | Pointer to an enhanced suffix array. | |
SKPTAB | Pointer to the skip table (part of ESA ). This can be given separately because you may want to have a pointer around that is directly pointing to the skip table instead of referencing it through the fid_Suffixarray structure each time. | |
NEXT | An fid_Uint that stores the next index in the suffix table, that is the first suffix that does not belong to the skipped subtree. | |
START | The current index into the suffix table, that is the index of the suffix whose subtree should be skipped. | |
LCP | An fid_Uint that stores the current index in the LCP table. | |
DEPTH | The current depth in the suffix tree. | |
MAX | The length of the sequence underlaying the suffix array. The length could also be read directly from a fid_Suffixarray structure, but this may take too much time, so it should have been stored in a separate fid_Uint by the caller. | |
COMMON | Boolean indicating if the last character of the start suffix is a special (0) or a common (not 0) one as determined by the fid_REGULARSYMBOL() and fid_SPECIALSYMBOL() macros. |
Definition at line 117 of file skiptree.h.
#define fid_SUFFIXINTERVAL_SINGLETON_32 (~(fid_Uint32)0) |
Special value for fid_Suffixinterval_32::depth to indicate a singleton.
Definition at line 106 of file suffixarray.h.
#define fid_SUFFIXINTERVAL_SINGLETON_64 (~(fid_Uint64)0) |
Special value for fid_Suffixinterval_64::depth to indicate a singleton.
Definition at line 113 of file suffixarray.h.
#define fid_TABLE_SUF ((fid_Tablerequest)0x00000100) |
#define fid_TABLE_LCP ((fid_Tablerequest)0x00000200) |
#define fid_TABLE_SKP ((fid_Tablerequest)0x00000400) |
#define fid_TABLE_STI ((fid_Tablerequest)0x00000800) |
Table request: inverse suffix array.
Definition at line 125 of file suffixarray.h.
Referenced by fid_suffixarray_print().
#define fid_TABLES_OFFLINE (fid_TABLE_SUF|fid_TABLE_LCP|fid_TABLE_SKP|fid_TABLE_STI) |
Table request: all tables related to offline matching.
Definition at line 128 of file suffixarray.h.
Referenced by fid_suffixarray_free(), and fid_tablerequest_to_string().
#define fid_TABLES_OFFLINE_MASK ((fid_Tablerequest)0xffffff00) |
Table request bit mask: bits related to offline matching.
Definition at line 131 of file suffixarray.h.
Referenced by fid_suffixarray_load_from_files().
#define fid_TABLES_ALL (fid_TABLES_ONLINE|fid_TABLES_OFFLINE) |
Table request: all tables.
Definition at line 134 of file suffixarray.h.
Referenced by fid_suffixarray_print(), and fid_tablerequest_to_string().
#define fid_INTERVALBUFFERSIZE ((size_t)fid_SYMBOLMAX+2) |
Size of the buffer for fid_suffixarray_get_intervals().
The size is determined as the maximum number of regular symbols, plus 1 for the left boundary for the special interval boundary, plus 1 for the (non-existent) left boundary of the interval following the special interval.
Definition at line 144 of file suffixarray.h.
#define fid_LCP_32 | ( | L, | |||
ESA, | |||||
I | ) |
Value:
if(((L)=(ESA)->lcpfile.content[I]) == (unsigned char)UCHAR_MAX)\ {\ (L)=fid_suffixarray_find_large_lcp_32(ESA,I);\ }
This is implemented as a macro since it would be a huge performance penality to call a function for something that is a simple assignment in the vast majority of all cases. A function call occurs if the lcp-value stored in the main lcp-table is equal to UCHAR_MAX
(255), however, since then the real, potentially larger, lcp-value must be looked up inside an exception table.
L | The lcp-value is assigned to this variable, which should be of type fid_Uint32 or fid_Uint64. | |
ESA | An enhanced suffix array. | |
I | The suffix index for which the lcp-value should be determined. |
Definition at line 162 of file suffixarray.h.
#define fid_LCP_64 | ( | L, | |||
ESA, | |||||
I | ) |
Value:
if(((L)=(ESA)->lcpfile.content[I]) == (unsigned char)UCHAR_MAX)\ {\ (L)=fid_suffixarray_find_large_lcp_64(ESA,I);\ }
Definition at line 172 of file suffixarray.h.
#define fid_suffixinterval_init | ( | SI, | |||
D, | |||||
L, | |||||
R | ) |
Value:
(SI)->depth=(D);\ (SI)->left=(L);\ (SI)->right=(R)
This has been implemented as a macro mainly for speed since it is used all the time. We don't want to rely on the compiler to inline this automatically, do we?
SI | The fid_Suffixinterval structure to be initialized. | |
D | Depth. | |
L,R | Left and right boundaries. |
Definition at line 189 of file suffixarray.h.
Referenced by fid_suffixarray_dump_intervals(), and fid_suffixarray_traverse().
#define fid_suffixinterval_init_root_32 | ( | SI, | |||
ESA | ) | fid_suffixinterval_init(SI,0,0,(ESA)->sequences.total_length.v_uint32-1) |
Initialize a fid_Suffixinterval_32 structure corresponding to the root interval of a given enhanced suffix array.
SI | The fid_Suffixinterval_32 structure to be initialized. | |
ESA | A 32 bit enhanced suffix array. |
Definition at line 203 of file suffixarray.h.
#define fid_suffixinterval_init_root_64 | ( | SI, | |||
ESA | ) | fid_suffixinterval_init(SI,0,0,(ESA)->sequences.total_length.v_uint64-1) |
Initialize a fid_Suffixinterval_64 structure corresponding to the root interval of a given enhanced suffix array.
SI | The fid_Suffixinterval_64 structure to be initialized. | |
ESA | A 64 bit enhanced suffix array. |
Definition at line 215 of file suffixarray.h.
#define fid_TRAVERSE_CONTINUE ((int)0) |
Special return value for fid_Esatraversecallback_32 and fid_Esatraversecallback_32 functions.
Definition at line 220 of file suffixarray.h.
#define fid_TRAVERSE_SKIP ((int)-1) |
Special return value for fid_Esatraversecallback_32 and fid_Esatraversecallback_32 functions.
Definition at line 224 of file suffixarray.h.
Referenced by fid_suffixarray_traverse().
typedef int(* fid_Esatraversecallback_32)(const fid_Suffixarray *esa, const fid_Suffixinterval_32 *si, fid_Uint32 pdepth, fid_Error *error, void *user_data) |
Callback function prototype for enhanced suffix array traversals.
For each lcp-interval in an enhanced suffix array passed to fid_suffixarray_traverse(), a callback function is called. That callback function must follow the signature of this type.
esa | The traversed enhanced suffix array. | |
si | An lcp-interval. This is really an lcp-interval, not just a suffix-interval, so there is no need to turn this into an lcp-interval inside the function. | |
pdepth | The depth of the parent lcp-interval. | |
error | Error messages go here. | |
user_data | Pointer to any data, may be used freely within the callback function. |
fid_TRAVERSE_CONTINUE | Continue traversal. | |
fid_TRAVERSE_SKIP | Do not traverse deeper into current subtree, continue with next sibling. |
Definition at line 253 of file suffixarray.h.
typedef int(* fid_Esatraversecallback_64)(const fid_Suffixarray *esa, const fid_Suffixinterval_64 *si, fid_Uint64 pdepth, fid_Error *error, void *user_data) |
Callback function prototype for enhanced suffix array traversals.
Definition at line 262 of file suffixarray.h.
int fid_projectfile_init | ( | fid_Projectfile * | project, | |
const char * | prjbasename, | |||
fid_Error * | error | |||
) |
Initialize a project file data structure.
project | Data structure to be initialized. | |
prjbasename | Base name of the enhanced suffix array project. The function makes a private copy of this string, so be sure to call fid_projectfile_free() when done. | |
error | Error messages go here. |
Definition at line 54 of file projectfile.c.
References fid_Projectfile::dbfiles, fid_Projectfile::endianess_known, fid_DYNARRAY_INIT, fid_error_throw(), fid_filename_create(), fid_OUTOFMEM, fid_UINTSIZE_32, fid_UINTSIZE_64, fid_WORDSIZE, fid_Projectfile::filename, fid_Projectfile::integer_size_in_bits, fid_Projectfile::integer_size_in_bytes, fid_Projectfile::large_lcp_values, fid_Projectfile::len_of_special_prefix, fid_Projectfile::len_of_special_suffix, fid_Projectfile::littleendian, fid_Projectfile::longest, fid_Projectfile::max_branchdepth, fid_Projectfile::num_of_query_sequences, fid_Projectfile::num_of_sequences, fid_Projectfile::num_of_special_ranges, fid_Projectfile::num_of_specials, fid_Projectfile::prefixlength, fid_Projectfile::prjbasename, fid_Projectfile::totallength, and fid_Projectfile::uisize.
Referenced by fid_projectfile_init_from_suffixarray(), and fid_projectfile_parse_from_file().
int fid_projectfile_parse_from_file | ( | fid_Projectfile * | project, | |
const char * | prjbasename, | |||
fid_Error * | error | |||
) |
Parse project file content from file.
This function calls fid_projectfile_init(), so the caller should not do so.
project | Content of the file is parsed into this structure. | |
prjbasename | Base name of the enhanced suffix array project. | |
error | Error messages go here. |
Definition at line 649 of file projectfile.c.
References fid_Mappedfile::content, fid_file_map(), fid_file_unmap(), fid_projectfile_free(), fid_projectfile_init(), fid_Projectfile::filename, and fid_Mappedfile::occupied.
Referenced by fid_suffixarray_load_special().
int fid_projectfile_write | ( | const fid_Projectfile * | project, | |
fid_Error * | error | |||
) |
Write textual representation of project file data structure to file.
A new file is created and the content of project
is written to it in textual form.
project | The project file data structure to be written. The filename is taken from fid_Projectfile::filename. | |
error | Error messages go here. |
Definition at line 688 of file projectfile.c.
References fid_Projectfile::dbfiles, fid_DBfiles::dyndata, fid_Projectfile::endianess_known, fid_file_new(), fid_file_unmap(), fid_file_write(), fid_U64FMT, fid_Projectfile::filename, fid_DBfileinfo::inputfile_length, fid_Projectfile::integer_size_in_bits, fid_Projectfile::large_lcp_values, fid_Projectfile::len_of_special_prefix, fid_Projectfile::len_of_special_suffix, fid_Projectfile::littleendian, fid_Projectfile::longest, fid_Projectfile::max_branchdepth, fid_DBfileinfo::name, fid_Projectfile::num_of_query_sequences, fid_Projectfile::num_of_sequences, fid_Projectfile::num_of_special_ranges, fid_Projectfile::num_of_specials, fid_DBfiles::occupied, fid_Projectfile::prefixlength, fid_DBfileinfo::sequence_length, and fid_Projectfile::totallength.
void fid_projectfile_free | ( | fid_Projectfile * | project | ) |
Free memory associated with a project file structure.
project | The structure to be freed. |
Definition at line 755 of file projectfile.c.
References fid_Projectfile::dbfiles, fid_Projectfile::filename, and fid_Projectfile::prjbasename.
Referenced by fid_projectfile_parse_from_file(), and fid_suffixarray_load_special().
void fid_projectfile_dump | ( | const fid_Projectfile * | project, | |
FILE * | stream | |||
) |
Print contents of fid_Projectfile structure to output stream.
project | Structure to be printed out. | |
stream | An output stream to which the structure is printed. If NULL , nothing will be printed. |
Definition at line 775 of file projectfile.c.
References fid_Projectfile::dbfiles, fid_DBfiles::dyndata, fid_U64FMT, fid_Projectfile::filename, fid_DBfileinfo::inputfile_length, fid_Projectfile::integer_size_in_bits, fid_Projectfile::integer_size_in_bytes, fid_Projectfile::large_lcp_values, fid_Projectfile::max_branchdepth, fid_DBfileinfo::name, fid_Projectfile::num_of_query_sequences, fid_Projectfile::num_of_sequences, fid_DBfiles::occupied, fid_Projectfile::prefixlength, fid_Projectfile::prjbasename, fid_DBfileinfo::sequence_length, and fid_Projectfile::totallength.
int fid_suffixarray_load_from_project | ( | fid_Suffixarray * | esa, | |
fid_Projectfile * | project, | |||
fid_Tablerequest | tables, | |||
fid_Error * | error | |||
) |
Load enhanced suffix array based on the given project data structure.
Given a parsed project file, open all requested tables and put these into the passed enhanced suffix array data structure.
esa | The enhanced suffix array to be initialized. | |
project | Structure representing a parsed enhanced suffix array project file. This is required to intialize several data fields of esa and to perform some integrity checks. | |
tables | Bitvector of requested tables. | |
error | Error messages go here. |
Definition at line 67 of file suffixarrayext.c.
References fid_suffixarray_load_from_files(), fid_SWITCH48, fid_Projectfile::prjbasename, and fid_Projectfile::uisize.
Referenced by fid_suffixarray_load_special().
int fid_suffixarray_load_special | ( | fid_Suffixarray * | esa, | |
const char * | prjbasename, | |||
fid_Tablerequest | tables, | |||
fid_Uintsize | uisize_request, | |||
int | ignore_byteorder, | |||
fid_Error * | error | |||
) |
Load enhanced suffix array project and initialize data structures.
Like fid_suffixarray_load(), this function opens and parses a project file, opens all requested tables and put these into the passed enhanced suffix array data structure. It takes two additional arguments, which, however, are not needed in most cases.
esa | The enhanced suffix array to be initialized. | |
prjbasename | Base filename of the enhanced suffix array project. | |
tables | Bitvector of requested tables. | |
uisize_request | Loading fails if the integer size does not match this value. Pass fid_UINTSIZE_UNDEF to accept any integer size. | |
ignore_byteorder | Ignore byte order as given in the project file if set to True. If the project file does not contain a littleendian line, then this option has no effect. This option is only useful for programs that can either handle binary data in non-native byte order, or that just need to check if all files needed can be read and have certain sizes. | |
error | Error messages go here. |
Definition at line 130 of file suffixarrayext.c.
References fid_Projectfile::endianess_known, fid_error_throw(), fid_projectfile_free(), fid_projectfile_parse_from_file(), fid_suffixarray_load_from_project(), fid_UINTSIZE_32, fid_UINTSIZE_UNDEF, fid_Projectfile::littleendian, and fid_Projectfile::uisize.
Referenced by fid_suffixarray_load().
int fid_suffixarray_load | ( | fid_Suffixarray * | esa, | |
const char * | prjbasename, | |||
fid_Tablerequest | tables, | |||
fid_Error * | error | |||
) |
Load enhanced suffix array project and initialize data structures.
This is arguably the most important and high-level function of the whole library. Given a base filename, open and parse the corresponding project file, then open all requested tables and put these into the passed enhanced suffix array data structure.
esa | The enhanced suffix array to be initialized. | |
prjbasename | Base filename of the enhanced suffix array project. | |
tables | Bitvector of requested tables. | |
error | Error messages go here. |
Definition at line 211 of file suffixarrayext.c.
References fid_suffixarray_load_special(), and fid_UINTSIZE_UNDEF.
int fid_projectfile_init_from_suffixarray | ( | fid_Projectfile * | project, | |
const fid_DBfiles * | dbfiles, | |||
const char * | prjbasename, | |||
const fid_Suffixarray * | esa, | |||
fid_Error * | error | |||
) |
Initialize fid_Projectfile structure from mapped enhanced suffix array.
This function is useful for generating enhanced suffix array projects from parsed sequence data. Generate enhanced suffix array tables first, then call this function to fill the fid_Projectfile data structure from these.
project | The profile file structure to be initialized. | |
dbfiles | Names and lengths of the original input sequence files. | |
prjbasename | Base filename of the enhanced suffix array project. | |
esa | Structure that has some enhanced suffix array tables associated. | |
error | Error messages go here. |
Definition at line 235 of file suffixarrayext.c.
References fid_Projectfile::dbfiles, fid_projectfile_init(), fid_SWITCH48, fid_Projectfile::integer_size_in_bytes, fid_Suffixarray::uisize, and fid_Projectfile::uisize.
void fid_suffixarray_init | ( | fid_Suffixarray * | esa, | |
fid_Uintsize | uisize | |||
) |
Initialize enhanced suffix array structure.
esa | The structure to be initialized. | |
uisize | The size of integers in enhanced suffix array tables. This must be fid_UINTSIZE_64 for sequences data longer than 4GB. |
Definition at line 60 of file suffixarray.c.
References fid_Suffixarray::alpha, fid_Mappedfile::content, fid_sequences_init(), fid_SWITCH48, fid_Suffixarray::lcpfile, fid_Suffixarray::llvfile, fid_Suffixarray::num_of_large_lcps, fid_Suffixarray::sequences, fid_Suffixarray::skiptab, fid_Suffixarray::skpfile, fid_Suffixarray::stifile, fid_Suffixarray::stitab, fid_Suffixarray::suffile, fid_Suffixarray::suftab, fid_Suffixarray::uisize, fid_Uint48::v_uint32, fid_Uint48constptr::v_uint32, fid_Uint48::v_uint64, and fid_Uint48constptr::v_uint64.
Referenced by fid_suffixarray_load_from_files().
int fid_suffixarray_load_from_files | ( | fid_Suffixarray * | esa, | |
const char * | basefilename, | |||
fid_Tablerequest | tables, | |||
fid_Uintsize | uisize, | |||
fid_Error * | error | |||
) |
Load enhanced suffix array from disk.
On systems where the madvise()
system call is available, the kernel will be advised how to treat the mapped files in hope of improved performance. It is not necessary to call fid_suffixarray_init() before calling this function.
esa | The structure to be filled. | |
basefilename | The base filename of the suffix array tables on disk. | |
tables | Requested tables, online and offline. | |
uisize | When loading files from disk, assume that integers stored in files are of this size. | |
error | Error messages go here. |
Definition at line 262 of file suffixarray.c.
References fid_Suffixarray::alpha, fid_Filenamebuffer::buffer, fid_Filenamebuffer::bufptr, fid_alphabet_init_from_specfile(), fid_filenamebuffer_free(), fid_filenamebuffer_init(), fid_sequences_free(), fid_sequences_map(), fid_suffixarray_init(), fid_suffixarray_realize(), fid_TABLES_OFFLINE_MASK, fid_TABLES_ONLINE_MASK, and fid_Suffixarray::sequences.
Referenced by fid_suffixarray_load_from_project().
int fid_suffixarray_realize | ( | fid_Suffixarray * | esa, | |
fid_Tablerequest | tables, | |||
fid_Error * | error | |||
) |
Update data fields in fid_Suffixarray structure according to mapped files.
All data fields whose correct values can be derived from the sizes of the associated mapped files are updated by this function (e.g., fid_Sequences::num_of_large_lcps can be derived from the size of the file associated with fid_Sequences::llvfile). No function should set these fields manually, just call this function instead. A table request can be defined to ask the function to look at only some of the files and update only values derived from these. Note that also all online tables in fid_Suffixarray::sequences can be updated if the request tables
asks for it.
esa | The structure to be updated. | |
tables | Update only values derivable from the tables specified in this request. To update all values, pass fid_TABLES_ALL, or fid_TABLES_OFFLINE to update only values derivable from the index tables. | |
error | Error messages go here. |
Definition at line 366 of file suffixarray.c.
References fid_sequences_realize(), fid_SWITCH48, fid_TABLES_ONLINE_MASK, fid_Suffixarray::sequences, and fid_Suffixarray::uisize.
Referenced by fid_suffixarray_free(), and fid_suffixarray_load_from_files().
int fid_suffixarray_traverse_32 | ( | const fid_Suffixarray * | esa, | |
fid_Esatraversecallback_32 | callback, | |||
void * | user_data, | |||
fid_Error * | error | |||
) |
32 bit version of fid_suffixarray_traverse().
Generic depth-first traversal of an enhanced suffix array.
This function calls a callback function for each lcp-interval in an enhanced suffix array. The callback function is free to limit the maximum depth or to stop the traversal by returning special return values. See fid_Esatraversecallback_32 and fid_Esatraversecallback_64 for more details.
While it might not be the most efficient way to perform a depth-first by means of a callback driven generic function, it is for sure a very convenient one to try out some ideas. Once an algorithm works when using this function (leading to a reference implementation), it can be rewritten with the traversal inlined to speed it up.
esa | An enhanced suffix array to be traversed. | |
callback | A callback function that processes lcp-intervals. | |
user_data | Pointer to data used by the callback function. | |
error | Error messages go here. |
int fid_suffixarray_traverse_64 | ( | const fid_Suffixarray * | esa, | |
fid_Esatraversecallback_64 | callback, | |||
void * | user_data, | |||
fid_Error * | error | |||
) |
64 bit version of fid_suffixarray_traverse().
Generic depth-first traversal of an enhanced suffix array.
This function calls a callback function for each lcp-interval in an enhanced suffix array. The callback function is free to limit the maximum depth or to stop the traversal by returning special return values. See fid_Esatraversecallback_32 and fid_Esatraversecallback_64 for more details.
While it might not be the most efficient way to perform a depth-first by means of a callback driven generic function, it is for sure a very convenient one to try out some ideas. Once an algorithm works when using this function (leading to a reference implementation), it can be rewritten with the traversal inlined to speed it up.
esa | An enhanced suffix array to be traversed. | |
callback | A callback function that processes lcp-intervals. | |
user_data | Pointer to data used by the callback function. | |
error | Error messages go here. |
fid_Uint32 fid_suffixarray_get_intervals_32 | ( | const fid_Suffixarray * | esa, | |
const fid_Suffixinterval_32 * | si, | |||
fid_Uint32 * | intervals | |||
) |
32 bit version of fid_suffixarray_get_intervals().
Return all embedded suffix-intervals embedded in a suffix-interval.
Given a suffix-interval, determine the suffix indices of all embedded suffix-intervals. The result is written to intervals
. Let s
be a symbol, then the embedded suffix-interval for symbol s
starts at suffix intervals
[s], and its length is intervals
[s+1]-intervals[s]. If the length is 0, then the interval does not exist.
esa | The enhanced suffix array the interval are to be searched in. | |
si | The suffix-interval whose child-intervals are to be determined. | |
intervals | Array of suffix indices. This array must have fid_INTERVALBUFFERSIZE entries, all elements will be set. |
si
. fid_Uint64 fid_suffixarray_get_intervals_64 | ( | const fid_Suffixarray * | esa, | |
const fid_Suffixinterval_64 * | si, | |||
fid_Uint64 * | intervals | |||
) |
64 bit version of fid_suffixarray_get_intervals().
Return all embedded suffix-intervals embedded in a suffix-interval.
Given a suffix-interval, determine the suffix indices of all embedded suffix-intervals. The result is written to intervals
. Let s
be a symbol, then the embedded suffix-interval for symbol s
starts at suffix intervals
[s], and its length is intervals
[s+1]-intervals[s]. If the length is 0, then the interval does not exist.
esa | The enhanced suffix array the interval are to be searched in. | |
si | The suffix-interval whose child-intervals are to be determined. | |
intervals | Array of suffix indices. This array must have fid_INTERVALBUFFERSIZE entries, all elements will be set. |
si
. int fid_suffixarray_find_embedded_interval_32 | ( | const fid_Suffixarray * | esa, | |
fid_Suffixinterval_32 * | si, | |||
fid_Symbol | symbol | |||
) |
32 bit version of fid_suffixarray_find_embedded_interval().
Determine the embedded suffix-interval for a given symbol.
The child-interval is returned in argument si
, i.e., si
will be modified if the requested child-interval exists. If no such interval exists, then si
will remain unchanged.
esa | The enhanced suffix array the interval is to be searched in. | |
si | The suffix-interval whose child-interval is to be determined. | |
symbol | The embedded child-interval shall be the suffix-interval for the longest common prefix of si concatenated with this symbol. |
int fid_suffixarray_find_embedded_interval_64 | ( | const fid_Suffixarray * | esa, | |
fid_Suffixinterval_64 * | si, | |||
fid_Symbol | symbol | |||
) |
64 bit version of fid_suffixarray_find_embedded_interval().
Determine the embedded suffix-interval for a given symbol.
The child-interval is returned in argument si
, i.e., si
will be modified if the requested child-interval exists. If no such interval exists, then si
will remain unchanged.
esa | The enhanced suffix array the interval is to be searched in. | |
si | The suffix-interval whose child-interval is to be determined. | |
symbol | The embedded child-interval shall be the suffix-interval for the longest common prefix of si concatenated with this symbol. |
int fid_suffixarray_extend_interval_32 | ( | const fid_Suffixarray * | esa, | |
const fid_Symbol * | pattern, | |||
fid_Uint32 | plen, | |||
fid_Suffixinterval_32 * | si | |||
) |
32 bit version of fid_suffixarray_extend_interval().
Extend the common prefix of a suffix-interval by some pattern.
This function can be used to continue searching inside a suffix-interval by prolonging the common prefix of length fid_Suffixinterval::depth by a some string, possibly further narrowing down the suffix-interval.
esa | An enhanced suffix array to search in. | |
pattern | Extension of the common prefix of the given suffix-interval, alphabet encoded. | |
plen | Length of pattern . | |
si | A valid suffix-interval to start with, will be modified due to the extension with pattern . If the common prefix cannot be extended by pattern , then the content of this structure will be undefined. |
si
set to the requested suffix-interval, or -1 if the suffix-interval could not be extended by pattern
. int fid_suffixarray_extend_interval_64 | ( | const fid_Suffixarray * | esa, | |
const fid_Symbol * | pattern, | |||
fid_Uint64 | plen, | |||
fid_Suffixinterval_64 * | si | |||
) |
64 bit version of fid_suffixarray_extend_interval().
Extend the common prefix of a suffix-interval by some pattern.
This function can be used to continue searching inside a suffix-interval by prolonging the common prefix of length fid_Suffixinterval::depth by a some string, possibly further narrowing down the suffix-interval.
esa | An enhanced suffix array to search in. | |
pattern | Extension of the common prefix of the given suffix-interval, alphabet encoded. | |
plen | Length of pattern . | |
si | A valid suffix-interval to start with, will be modified due to the extension with pattern . If the common prefix cannot be extended by pattern , then the content of this structure will be undefined. |
si
set to the requested suffix-interval, or -1 if the suffix-interval could not be extended by pattern
. int fid_suffixarray_find_interval_32 | ( | const fid_Suffixarray * | esa, | |
const fid_Symbol * | pattern, | |||
fid_Uint32 | plen, | |||
fid_Suffixinterval_32 * | si | |||
) |
32 bit version of fid_suffixarray_find_interval().
Find suffix-interval with suffixes sharing a common given prefix.
Simply put, this function returns all occurrences of pattern
in the suffix array as suffix-interval (which is not necessarily an lcp-interval).
esa | An enhanced suffix array to search in. | |
pattern | Common prefix of searched suffix-interval, alphabet encoded. | |
plen | Length of pattern . | |
si | The suffix-interval corresponding to pattern if it exists. |
si
set to the requested suffix-interval, or -1 if pattern
does not occur in the text. int fid_suffixarray_find_interval_64 | ( | const fid_Suffixarray * | esa, | |
const fid_Symbol * | pattern, | |||
fid_Uint64 | plen, | |||
fid_Suffixinterval_64 * | si | |||
) |
64 bit version of fid_suffixarray_find_interval().
Find suffix-interval with suffixes sharing a common given prefix.
Simply put, this function returns all occurrences of pattern
in the suffix array as suffix-interval (which is not necessarily an lcp-interval).
esa | An enhanced suffix array to search in. | |
pattern | Common prefix of searched suffix-interval, alphabet encoded. | |
plen | Length of pattern . | |
si | The suffix-interval corresponding to pattern if it exists. |
si
set to the requested suffix-interval, or -1 if pattern
does not occur in the text. fid_Uint32 fid_suffixarray_find_large_lcp_32 | ( | const fid_Suffixarray * | esa, | |
fid_Uint32 | suffix | |||
) |
32 bit version of fid_suffixarray_find_large_lcp().
Find large LCP value in LCP exception table.
esa | An enhanced suffix array. | |
suffix | The suffix index whose large LCP value should be retrieved. |
fid_Uint64 fid_suffixarray_find_large_lcp_64 | ( | const fid_Suffixarray * | esa, | |
fid_Uint64 | suffix | |||
) |
64 bit version of fid_suffixarray_find_large_lcp().
Find large LCP value in LCP exception table.
esa | An enhanced suffix array. | |
suffix | The suffix index whose large LCP value should be retrieved. |
fid_Uint32 fid_suffixarray_suffix_length_32 | ( | const fid_Suffixarray * | esa, | |
fid_Uint32 | suffix | |||
) |
32 bit version of fid_suffixarray_suffix_length().
Determine length of suffix up to next sequence separator.
It is easy to determine the length of a suffix in an enhanced suffix array when there is only one sequence inside. This function also takes sequence separators into account and returns the length of the sequence in the given suffix up to the next sequence separator, or to the end of the input.
esa | An enhanced suffix array. | |
suffix | The suffix whose lengths is to be found. |
fid_Uint64 fid_suffixarray_suffix_length_64 | ( | const fid_Suffixarray * | esa, | |
fid_Uint64 | suffix | |||
) |
64 bit version of fid_suffixarray_suffix_length().
Determine length of suffix up to next sequence separator.
It is easy to determine the length of a suffix in an enhanced suffix array when there is only one sequence inside. This function also takes sequence separators into account and returns the length of the sequence in the given suffix up to the next sequence separator, or to the end of the input.
esa | An enhanced suffix array. | |
suffix | The suffix whose lengths is to be found. |
void fid_suffixarray_compute_distribution_32 | ( | fid_Suffixarray * | esa | ) |
32 bit version of fid_suffixarray_compute_distribution().
Compute character distribution of given enhanced suffix array.
The fid_Sequences::distribution of fid_Suffixarray::sequences will be filled by this function, such that each entry s contains to the relative frequency of symbol s.
The length of the sequence is corrected by the number of sequence separators present in the input sequence, so these will not be taken into account. Note that the entry for wildcards will be filled twice, once at index fid_WILDCARD, and once at the first index after the last normal symbol. The entries for separators and undefined characters will both be set to 0.0.
Note that this function is faster than fid_sequences_compute_distribution() for it makes use of the suffix array.
esa | The enhanced suffix array whose character distribution should be determined. |
void fid_suffixarray_compute_distribution_64 | ( | fid_Suffixarray * | esa | ) |
64 bit version of fid_suffixarray_compute_distribution().
Compute character distribution of given enhanced suffix array.
The fid_Sequences::distribution of fid_Suffixarray::sequences will be filled by this function, such that each entry s contains to the relative frequency of symbol s.
The length of the sequence is corrected by the number of sequence separators present in the input sequence, so these will not be taken into account. Note that the entry for wildcards will be filled twice, once at index fid_WILDCARD, and once at the first index after the last normal symbol. The entries for separators and undefined characters will both be set to 0.0.
Note that this function is faster than fid_sequences_compute_distribution() for it makes use of the suffix array.
esa | The enhanced suffix array whose character distribution should be determined. |
void fid_suffixarray_free | ( | fid_Suffixarray * | esa | ) |
Free enhanced suffix array structure.
All associated files will be closed.
esa | The structure to be freed. |
Definition at line 320 of file suffixarray.c.
References fid_file_unmap(), fid_sequences_free(), fid_suffixarray_realize(), fid_TABLES_OFFLINE, fid_Suffixarray::lcpfile, fid_Suffixarray::llvfile, fid_Suffixarray::sequences, fid_Suffixarray::skpfile, fid_Suffixarray::stifile, and fid_Suffixarray::suffile.
void fid_suffixarray_dump | ( | const fid_Suffixarray * | esa, | |
FILE * | stream | |||
) |
Show information about an enhanced suffix array.
esa | The enhanced suffix array structure to be printed out. | |
stream | An output stream to which the structure is printed. If NULL , nothing will be printed. |
Definition at line 391 of file suffixarray.c.
References fid_Sequences::alpha, fid_Suffixarray::alpha, fid_Mappedfile::content, fid_Sequences::desfile, fid_PRINT48, fid_SWITCH48, fid_U64FMT, fid_UINTSIZE_32, fid_Suffixarray::lcpfile, fid_Suffixarray::llvfile, fid_Alphabet::num_of_chars, fid_Sequences::num_of_sequences, fid_Alphabet::num_of_syms, fid_Sequences::oisfile, fid_Suffixarray::sequences, fid_Suffixarray::skpfile, fid_Sequences::sspfile, fid_Suffixarray::stifile, fid_Suffixarray::suffile, fid_Sequences::tisfile, and fid_Suffixarray::uisize.
void fid_suffixarray_dump_intervals_32 | ( | const fid_Suffixarray * | esa, | |
const fid_Uint32 | intervals[fid_INTERVALBUFFERSIZE], | |||
FILE * | stream | |||
) |
32 bit version of fid_suffixarray_dump_intervals().
Dump array of suffix-intervals as returned by fid_suffixarray_get_intervals().
Each printed suffix-interval is preceded by its distinguishing character. Empty intervals are left out and appear in a summary below the list of dumped intervals.
esa | The enhanced suffix array the intervals belong to. | |
intervals | An array of suffix-intervals encoded by integers. | |
stream | An output stream to which the suffix-intervals are printed. If NULL, nothing will be printed. |
void fid_suffixarray_dump_intervals_64 | ( | const fid_Suffixarray * | esa, | |
const fid_Uint64 | intervals[fid_INTERVALBUFFERSIZE], | |||
FILE * | stream | |||
) |
64 bit version of fid_suffixarray_dump_intervals().
Dump array of suffix-intervals as returned by fid_suffixarray_get_intervals().
Each printed suffix-interval is preceded by its distinguishing character. Empty intervals are left out and appear in a summary below the list of dumped intervals.
esa | The enhanced suffix array the intervals belong to. | |
intervals | An array of suffix-intervals encoded by integers. | |
stream | An output stream to which the suffix-intervals are printed. If NULL, nothing will be printed. |
void fid_suffixarray_dump_suffix_32 | ( | const fid_Suffixarray * | esa, | |
fid_Uint32 | suffix, | |||
fid_Uint32 | length, | |||
FILE * | stream | |||
) |
32 bit version of fid_suffixarray_dump_suffix().
Dump prefix of a suffix to given stream.
This function will not print more than length
characters, and will stop at sequence separators.
esa | The enhanced suffix array containing the suffix to be dumped. | |
suffix | The index of the suffix to be dumped. | |
length | Maximum number of characters to be printed. Dump stops at end of the sequence. | |
stream | An output stream to which the suffix is printed. If NULL, nothing will be printed. |
void fid_suffixarray_dump_suffix_64 | ( | const fid_Suffixarray * | esa, | |
fid_Uint64 | suffix, | |||
fid_Uint64 | length, | |||
FILE * | stream | |||
) |
64 bit version of fid_suffixarray_dump_suffix().
Dump prefix of a suffix to given stream.
This function will not print more than length
characters, and will stop at sequence separators.
esa | The enhanced suffix array containing the suffix to be dumped. | |
suffix | The index of the suffix to be dumped. | |
length | Maximum number of characters to be printed. Dump stops at end of the sequence. | |
stream | An output stream to which the suffix is printed. If NULL, nothing will be printed. |
void fid_suffixarray_print_32 | ( | const fid_Suffixarray * | esa, | |
fid_Tablerequest | request, | |||
FILE * | stream | |||
) |
32 bit version of fid_suffixarray_print().
Print suffix array tables to given stream.
Suffixes are not printed to full length since this would not be practical even for tiny suffix arrays. Instead, only the first few characters corresponding to a suffix' LCP-value are printed. Thus, when printing the suffixes, the LCP-table is usually also of interest.
esa | The enhanced suffix array to be printed. | |
request | Which tables to print. Note that suffixes themselves are printed by adding fid_TABLE_TIS to the request, in which case the LCP-table must be mapped, too (even if it is not to be printed). | |
stream | An output stream to which the suffix is printed. If NULL, nothing will be printed. |
void fid_suffixarray_print_64 | ( | const fid_Suffixarray * | esa, | |
fid_Tablerequest | request, | |||
FILE * | stream | |||
) |
64 bit version of fid_suffixarray_print().
Print suffix array tables to given stream.
Suffixes are not printed to full length since this would not be practical even for tiny suffix arrays. Instead, only the first few characters corresponding to a suffix' LCP-value are printed. Thus, when printing the suffixes, the LCP-table is usually also of interest.
esa | The enhanced suffix array to be printed. | |
request | Which tables to print. Note that suffixes themselves are printed by adding fid_TABLE_TIS to the request, in which case the LCP-table must be mapped, too (even if it is not to be printed). | |
stream | An output stream to which the suffix is printed. If NULL, nothing will be printed. |
fid_Uint32 fid_suffixinterval_lcpvalue_32 | ( | const fid_Suffixinterval_32 * | si, | |
const fid_Suffixarray * | esa | |||
) |
32 bit version of fid_suffixinterval_lcpvalue().
Determine LCP value of suffix-interval.
This function computes the length of the longest common prefix of the suffixes represented by a given suffix-interval by sequence comparisons. Setting field fid_Suffixinterval::depth of si
to the return value of this functions yields an lcp-interval. Note that longest common prefixes never include special symbols like wildcards or sequence separators since these symbols are not equal to others, not even to themselves.
si | A suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
si
, or fid_SUFFIXINTERVAL_SINGLETON if si
is a singleton. fid_Uint64 fid_suffixinterval_lcpvalue_64 | ( | const fid_Suffixinterval_64 * | si, | |
const fid_Suffixarray * | esa | |||
) |
64 bit version of fid_suffixinterval_lcpvalue().
Determine LCP value of suffix-interval.
This function computes the length of the longest common prefix of the suffixes represented by a given suffix-interval by sequence comparisons. Setting field fid_Suffixinterval::depth of si
to the return value of this functions yields an lcp-interval. Note that longest common prefixes never include special symbols like wildcards or sequence separators since these symbols are not equal to others, not even to themselves.
si | A suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
si
, or fid_SUFFIXINTERVAL_SINGLETON if si
is a singleton. void fid_suffixinterval_to_lcpinterval_32 | ( | fid_Suffixinterval_32 * | si, | |
const fid_Suffixarray * | esa | |||
) |
32 bit version of fid_suffixinterval_to_lcpinterval().
Turn suffix-interval into lcp-interval.
This function determines the LCP value of the given suffix-interval and assigns that value to its fid_Suffixinterval::depth field.
si | A suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
void fid_suffixinterval_to_lcpinterval_64 | ( | fid_Suffixinterval_64 * | si, | |
const fid_Suffixarray * | esa | |||
) |
64 bit version of fid_suffixinterval_to_lcpinterval().
Turn suffix-interval into lcp-interval.
This function determines the LCP value of the given suffix-interval and assigns that value to its fid_Suffixinterval::depth field.
si | A suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
void fid_suffixinterval_find_right_32 | ( | fid_Suffixinterval_32 * | si, | |
const fid_Suffixarray * | esa | |||
) |
32 bit version of fid_suffixinterval_find_right().
Find right boundary of suffix-interval from left boundary and depth.
This function assumes that the left boundary and the depth of the passed suffix-interval are correctly set up. From these two values the right boundary is determined and assigned to fid_Suffixinterval::right of si
.
si | An incomplete suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
void fid_suffixinterval_find_right_64 | ( | fid_Suffixinterval_64 * | si, | |
const fid_Suffixarray * | esa | |||
) |
64 bit version of fid_suffixinterval_find_right().
Find right boundary of suffix-interval from left boundary and depth.
This function assumes that the left boundary and the depth of the passed suffix-interval are correctly set up. From these two values the right boundary is determined and assigned to fid_Suffixinterval::right of si
.
si | An incomplete suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
fid_Uint32 fid_suffixinterval_homepos_32 | ( | const fid_Suffixinterval_32 * | si, | |
const fid_Suffixarray * | esa | |||
) |
32 bit version of fid_suffixinterval_homepos().
Determine home-position of given lcp-interval.
si | A suffix-interval; stricty spoken, home-positions are defined for lcp-intervals only, but since home-positions are derived just from interval boundaries, you may safely pass in suffix-intervals. | |
esa | An enhanced suffix array. |
si
, i.e., either its left or right boundary. fid_Uint64 fid_suffixinterval_homepos_64 | ( | const fid_Suffixinterval_64 * | si, | |
const fid_Suffixarray * | esa | |||
) |
64 bit version of fid_suffixinterval_homepos().
Determine home-position of given lcp-interval.
si | A suffix-interval; stricty spoken, home-positions are defined for lcp-intervals only, but since home-positions are derived just from interval boundaries, you may safely pass in suffix-intervals. | |
esa | An enhanced suffix array. |
si
, i.e., either its left or right boundary. void fid_suffixinterval_dump_32 | ( | const fid_Suffixinterval_32 * | si, | |
const fid_Suffixarray * | esa, | |||
FILE * | stream | |||
) |
32 bit version of fid_suffixinterval_dump().
Dump suffix-interval to given stream.
si | The suffix-interval to be dumped. | |
esa | The enhanced suffix array containing the suffix-interval. | |
stream | An output stream to which the suffix-interval is printed. If NULL, nothing will be printed. |
void fid_suffixinterval_dump_64 | ( | const fid_Suffixinterval_64 * | si, | |
const fid_Suffixarray * | esa, | |||
FILE * | stream | |||
) |
64 bit version of fid_suffixinterval_dump().
Dump suffix-interval to given stream.
si | The suffix-interval to be dumped. | |
esa | The enhanced suffix array containing the suffix-interval. | |
stream | An output stream to which the suffix-interval is printed. If NULL, nothing will be printed. |
char* fid_tablerequest_to_string | ( | fid_Tablerequest | request, | |
const char * | sep | |||
) |
Construct string from table request.
request | A table request. | |
sep | Separator string that is to be inserted between table names. |
NULL
pointer in an out-of-memory condition. Definition at line 499 of file suffixarray.c.
References fid_TABLE_TIS, fid_TABLES_ALL, fid_TABLES_OFFLINE, and fid_TABLES_ONLINE.