libfid.32
and libfid.64
via preprocessor to obtain a simple simulation of C++ templates. The symbols defined in libfid.32
and libfid.64
can be undefined again by including libfid.undef
. This means you can do something like #include <libfid.h> /* switch to 32 bit interface */ #include <libfid.32> /* use generic names */ #include "mygenericcode.h" #include <libfid.undef> /* switch to 64 bit interface */ #include <libfid.64> /* use generic names */ #include "mygenericcode.h" #include <libfid.undef> /* code below cannot use _32/_64-free versions anymore */
If you are using C++, however, consider using the traits classes from libfidxx.h
in conjunction with real C++ templates (experimental).
fid_gen48.pl
to generate templates support macros. Data Structures | |
struct | UINT |
Type of unsigned integers in template code. More... | |
struct | fid_Sequenceiterfun |
struct | fid_Suffixinterval |
struct | fid_Esatraversecallback |
Defines | |
#define | UINT |
Name of type of unsigned integers in template code. | |
#define | UINTFMT |
Format string for printing values of type UINT in template code. | |
#define | VU |
Name of correct member of fid_Uint48 and fid_Uint48constptr unions in template code. | |
Functions | |
UINT | fid_sequences_offset_to_index (const fid_Sequences *seqs, UINT offset) |
Determine sequence index from sequence offset. | |
void | fid_sequences_index_to_boundaries (const fid_Sequences *seqs, UINT seqindex, UINT *left, UINT *right) |
Determine sequence boundaries from sequence index. | |
void | fid_sequences_offset_to_boundaries (const fid_Sequences *seqs, UINT offset, UINT *left, UINT *right) |
Determine sequence boundaries from sequence offset. | |
int | fid_sequences_iterate_range (const fid_Sequences *seqs, UINT from, UINT to, const fid_Sequenceiterfun iterfun, void *user_data) |
Call a callback function for each sequence in given range. | |
int | fid_sequences_iterate (const fid_Sequences *seqs, const fid_Sequenceiterfun iterfun, void *user_data) |
Call a callback function for each sequence. | |
void | fid_sequences_dump_range (const fid_Symbol *seq, UINT length, const fid_Alphabet *alpha, const char *str, int stop_at_separator, FILE *stream) |
Print piece of sequence to stream. | |
int | fid_suffixarray_traverse (const fid_Suffixarray *esa, fid_Esatraversecallback callback, void *user_data, fid_Error *error) |
Generic depth-first traversal of an enhanced suffix array. | |
UINT | fid_suffixarray_get_intervals (const fid_Suffixarray *esa, const fid_Suffixinterval *si, UINT *intervals) |
Return all embedded suffix-intervals embedded in a suffix-interval. | |
int | fid_suffixarray_find_embedded_interval (const fid_Suffixarray *esa, fid_Suffixinterval *si, fid_Symbol symbol) |
Determine the embedded suffix-interval for a given symbol. | |
int | fid_suffixarray_extend_interval (const fid_Suffixarray *esa, const fid_Symbol *pattern, UINT plen, fid_Suffixinterval *si) |
Extend the common prefix of a suffix-interval by some pattern. | |
int | fid_suffixarray_find_interval (const fid_Suffixarray *esa, const fid_Symbol *pattern, UINT plen, fid_Suffixinterval *si) |
Find suffix-interval with suffixes sharing a common given prefix. | |
UINT | fid_suffixarray_find_large_lcp (const fid_Suffixarray *esa, UINT suffix) |
Find large LCP value in LCP exception table. | |
UINT | fid_suffixarray_suffix_length (const fid_Suffixarray *esa, UINT suffix) |
Determine length of suffix up to next sequence separator. | |
void | fid_suffixarray_compute_distribution (fid_Suffixarray *esa) |
Compute character distribution of given enhanced suffix array. | |
void | fid_suffixarray_dump_intervals (const fid_Suffixarray *esa, const UINT intervals[fid_INTERVALBUFFERSIZE], FILE *stream) |
Dump array of suffix-intervals as returned by fid_suffixarray_get_intervals(). | |
void | fid_suffixarray_dump_suffix (const fid_Suffixarray *esa, UINT suffix, UINT length, FILE *stream) |
Dump prefix of a suffix to given stream. | |
void | fid_suffixarray_print (const fid_Suffixarray *esa, fid_Tablerequest request, FILE *stream) |
Print suffix array tables to given stream. | |
UINT | fid_suffixinterval_lcpvalue (const fid_Suffixinterval *si, const fid_Suffixarray *esa) |
Determine LCP value of suffix-interval. | |
void | fid_suffixinterval_to_lcpinterval (fid_Suffixinterval *si, const fid_Suffixarray *esa) |
Turn suffix-interval into lcp-interval. | |
void | fid_suffixinterval_find_right (fid_Suffixinterval *si, const fid_Suffixarray *esa) |
Find right boundary of suffix-interval from left boundary and depth. | |
UINT | fid_suffixinterval_homepos (const fid_Suffixinterval *si, const fid_Suffixarray *esa) |
Determine home-position of given lcp-interval. | |
void | fid_suffixinterval_dump (const fid_Suffixinterval *si, const fid_Suffixarray *esa, FILE *stream) |
Dump suffix-interval to given stream. |
#define UINT |
#define UINTFMT |
Format string for printing values of type UINT in template code.
When writing template code for 32 and 64 bit integers, use this macro to print a value of type UINT via printf()
. The format string defined by this macro is a simple one, there is no possibility to add modifiers to the format string.
Example:
Definition at line 79 of file names_32_64.txt.
Referenced by fid_suffixarray_dump_intervals(), and fid_suffixinterval_dump().
#define VU |
Name of correct member of fid_Uint48 and fid_Uint48constptr unions in template code.
When writing template code for 32 and 64 bit integers, use this macro to access the 32 or 64 bit part of a fid_Uint48 and fid_Uint48constptr value.
Example:
fid_Uint48 myvalue; myvalue.VU = 42; printf("My value is " UINTFMT "\n", myvalue.VU);
Definition at line 98 of file names_32_64.txt.
UINT fid_sequences_offset_to_index | ( | const fid_Sequences * | seqs, | |
UINT | offset | |||
) |
Determine sequence index from sequence offset.
It is an error to pass the position of a sequence separator as offset.
seqs | Set of sequences. | |
offset | Offset into the concatenated set of sequences. |
Definition at line 779 of file sequences.templ.c.
References fid_Sequences::num_of_sequences, and fid_Sequences::separators.
Referenced by fid_sequences_offset_to_boundaries().
void fid_sequences_index_to_boundaries | ( | const fid_Sequences * | seqs, | |
UINT | seqindex, | |||
UINT * | left, | |||
UINT * | right | |||
) |
Determine sequence boundaries from sequence index.
seqs | Set of sequences. | |
seqindex | Sequence index; the first sequence is identified by 0. | |
left,right | The sequence boundaries will be written to these integers. Note that sequence separators are excluded from the returned sequence. |
Definition at line 837 of file sequences.templ.c.
References fid_Sequences::num_of_sequences, fid_Sequences::separators, and fid_Sequences::total_length.
Referenced by fid_sequences_offset_to_boundaries().
void fid_sequences_offset_to_boundaries | ( | const fid_Sequences * | seqs, | |
UINT | offset, | |||
UINT * | left, | |||
UINT * | right | |||
) |
Determine sequence boundaries from sequence offset.
This function performs a binary search to find the sequence index first, so use fid_sequences_index_to_boundaries() instead if you already know the sequence index. It is an error to pass the position of a sequence separator as offset.
seqs | Set of sequences. | |
offset | Offset into the concatenated set of sequences. | |
left,right | The sequence boundaries will be written to these integers. Note that sequence separators are excluded from the returned sequence. |
Definition at line 886 of file sequences.templ.c.
References fid_sequences_index_to_boundaries(), and fid_sequences_offset_to_index().
Referenced by fid_suffixarray_suffix_length().
int fid_sequences_iterate_range | ( | const fid_Sequences * | seqs, | |
UINT | from, | |||
UINT | to, | |||
const fid_Sequenceiterfun | iterfun, | |||
void * | user_data | |||
) |
Call a callback function for each sequence in given range.
For each sequence stored in seqs
in the range specified by sequences indices from
and to
, call the callback function iterfun
. The iteration can be interrupted by the callback function returning a non-zero value.
seqs | The sequences to be iterated over. | |
from,to | The range of sequences to be processed. These values must be valid, i.e., from must not be greater than to , and none of them must be greater than fid_Sequences::num_of_sequences-1, behavior is undefined in these cases. | |
iterfun | The callback function. | |
user_data | Arbitrary pointer passed through to iterfun . |
iterfun
otherwise. Definition at line 916 of file sequences.templ.c.
References fid_Mappedfile::content, fid_Sequences::num_of_sequences, fid_Sequences::separators, fid_Sequences::tisfile, and fid_Sequences::total_length.
Referenced by fid_sequences_iterate().
int fid_sequences_iterate | ( | const fid_Sequences * | seqs, | |
const fid_Sequenceiterfun | iterfun, | |||
void * | user_data | |||
) |
Call a callback function for each sequence.
For each sequence stored in seqs
, call the callback function iterfun
. The iteration can be interrupted by the callback function returning a non-zero value.
seqs | The sequences to be iterated over. | |
iterfun | The callback function. | |
user_data | Arbitrary pointer passed through to iterfun . |
iterfun
otherwise. Definition at line 1008 of file sequences.templ.c.
References fid_sequences_iterate_range(), fid_Sequences::num_of_sequences, and fid_Sequences::separators.
void fid_sequences_dump_range | ( | const fid_Symbol * | seq, | |
UINT | length, | |||
const fid_Alphabet * | alpha, | |||
const char * | str, | |||
int | stop_at_separator, | |||
FILE * | stream | |||
) |
Print piece of sequence to stream.
seq | Some binary encoded sequence of symbols. | |
length | The number of symbols to be read from seq and printed to stream . | |
alpha | Alphabet used to transform symbols to printable characters. | |
str | If not NULL , then print this ASCII string in front of the transformed sequence (useful to distinguish multiple sequences). | |
stop_at_separator | If true, then stop printing when a sequence separator is encountered. If false, print some special character for each sequence separator encountered and continue. | |
stream | An output stream to which the sequence is printed. If NULL , nothing will be printed. |
Definition at line 1044 of file sequences.templ.c.
References fid_PRINT_SYMBOL, and fid_SEPARATOR.
Referenced by fid_suffixarray_dump_suffix().
int fid_suffixarray_traverse | ( | const fid_Suffixarray * | esa, | |
fid_Esatraversecallback | callback, | |||
void * | user_data, | |||
fid_Error * | error | |||
) |
Generic depth-first traversal of an enhanced suffix array.
This function calls a callback function for each lcp-interval in an enhanced suffix array. The callback function is free to limit the maximum depth or to stop the traversal by returning special return values. See fid_Esatraversecallback_32 and fid_Esatraversecallback_64 for more details.
While it might not be the most efficient way to perform a depth-first by means of a callback driven generic function, it is for sure a very convenient one to try out some ideas. Once an algorithm works when using this function (leading to a reference implementation), it can be rewritten with the traversal inlined to speed it up.
esa | An enhanced suffix array to be traversed. | |
callback | A callback function that processes lcp-intervals. | |
user_data | Pointer to data used by the callback function. | |
error | Error messages go here. |
Definition at line 156 of file suffixarray.templ.c.
References fid_Suffixarray::alpha, fid_DYNARRAY_FREE, fid_DYNARRAY_POP, fid_DYNARRAY_PUSH, fid_DYNARRAY_TOPELEM, fid_OUTOFMEM, fid_REGULARSYMBOL, fid_suffixarray_get_intervals(), fid_suffixinterval_init, fid_suffixinterval_to_lcpinterval(), fid_TRAVERSE_SKIP, and fid_Alphabet::num_of_syms.
UINT fid_suffixarray_get_intervals | ( | const fid_Suffixarray * | esa, | |
const fid_Suffixinterval * | si, | |||
UINT * | intervals | |||
) |
Return all embedded suffix-intervals embedded in a suffix-interval.
Given a suffix-interval, determine the suffix indices of all embedded suffix-intervals. The result is written to intervals
. Let s
be a symbol, then the embedded suffix-interval for symbol s
starts at suffix intervals
[s], and its length is intervals
[s+1]-intervals[s]. If the length is 0, then the interval does not exist.
esa | The enhanced suffix array the interval are to be searched in. | |
si | The suffix-interval whose child-intervals are to be determined. | |
intervals | Array of suffix indices. This array must have fid_INTERVALBUFFERSIZE entries, all elements will be set. |
si
. Definition at line 492 of file suffixarray.templ.c.
References fid_Suffixarray::alpha, fid_Mappedfile::content, fid_READ_SYMBOL, fid_REGULARSYMBOL, fid_SPECIALSYMBOL, fid_Alphabet::num_of_syms, fid_Suffixarray::sequences, fid_Suffixarray::suffile, fid_Suffixarray::suftab, fid_Sequences::tisfile, and fid_Sequences::total_length.
Referenced by fid_suffixarray_compute_distribution(), and fid_suffixarray_traverse().
int fid_suffixarray_find_embedded_interval | ( | const fid_Suffixarray * | esa, | |
fid_Suffixinterval * | si, | |||
fid_Symbol | symbol | |||
) |
Determine the embedded suffix-interval for a given symbol.
The child-interval is returned in argument si
, i.e., si
will be modified if the requested child-interval exists. If no such interval exists, then si
will remain unchanged.
esa | The enhanced suffix array the interval is to be searched in. | |
si | The suffix-interval whose child-interval is to be determined. | |
symbol | The embedded child-interval shall be the suffix-interval for the longest common prefix of si concatenated with this symbol. |
Definition at line 614 of file suffixarray.templ.c.
References fid_Mappedfile::content, fid_READ_SYMBOL, fid_REGULARSYMBOL, fid_SPECIALSYMBOL, fid_Suffixarray::sequences, fid_Suffixarray::suftab, fid_Sequences::tisfile, and fid_Sequences::total_length.
Referenced by fid_suffixarray_extend_interval().
int fid_suffixarray_extend_interval | ( | const fid_Suffixarray * | esa, | |
const fid_Symbol * | pattern, | |||
UINT | plen, | |||
fid_Suffixinterval * | si | |||
) |
Extend the common prefix of a suffix-interval by some pattern.
This function can be used to continue searching inside a suffix-interval by prolonging the common prefix of length fid_Suffixinterval::depth by a some string, possibly further narrowing down the suffix-interval.
esa | An enhanced suffix array to search in. | |
pattern | Extension of the common prefix of the given suffix-interval, alphabet encoded. | |
plen | Length of pattern . | |
si | A valid suffix-interval to start with, will be modified due to the extension with pattern . If the common prefix cannot be extended by pattern , then the content of this structure will be undefined. |
si
set to the requested suffix-interval, or -1 if the suffix-interval could not be extended by pattern
. Definition at line 777 of file suffixarray.templ.c.
References fid_suffixarray_find_embedded_interval(), fid_Suffixarray::sequences, and fid_Sequences::total_length.
Referenced by fid_suffixarray_find_interval().
int fid_suffixarray_find_interval | ( | const fid_Suffixarray * | esa, | |
const fid_Symbol * | pattern, | |||
UINT | plen, | |||
fid_Suffixinterval * | si | |||
) |
Find suffix-interval with suffixes sharing a common given prefix.
Simply put, this function returns all occurrences of pattern
in the suffix array as suffix-interval (which is not necessarily an lcp-interval).
esa | An enhanced suffix array to search in. | |
pattern | Common prefix of searched suffix-interval, alphabet encoded. | |
plen | Length of pattern . | |
si | The suffix-interval corresponding to pattern if it exists. |
si
set to the requested suffix-interval, or -1 if pattern
does not occur in the text. Definition at line 815 of file suffixarray.templ.c.
References fid_suffixarray_extend_interval(), fid_Suffixarray::sequences, and fid_Sequences::total_length.
UINT fid_suffixarray_find_large_lcp | ( | const fid_Suffixarray * | esa, | |
UINT | suffix | |||
) |
Find large LCP value in LCP exception table.
esa | An enhanced suffix array. | |
suffix | The suffix index whose large LCP value should be retrieved. |
Definition at line 836 of file suffixarray.templ.c.
References fid_Mappedfile::content, fid_CAST_POINTER, fid_Suffixarray::llvfile, and fid_Suffixarray::num_of_large_lcps.
UINT fid_suffixarray_suffix_length | ( | const fid_Suffixarray * | esa, | |
UINT | suffix | |||
) |
Determine length of suffix up to next sequence separator.
It is easy to determine the length of a suffix in an enhanced suffix array when there is only one sequence inside. This function also takes sequence separators into account and returns the length of the sequence in the given suffix up to the next sequence separator, or to the end of the input.
esa | An enhanced suffix array. | |
suffix | The suffix whose lengths is to be found. |
Definition at line 888 of file suffixarray.templ.c.
References fid_Mappedfile::content, fid_sequences_offset_to_boundaries(), fid_Suffixarray::sequences, fid_Suffixarray::suftab, and fid_Sequences::tisfile.
void fid_suffixarray_compute_distribution | ( | fid_Suffixarray * | esa | ) |
Compute character distribution of given enhanced suffix array.
The fid_Sequences::distribution of fid_Suffixarray::sequences will be filled by this function, such that each entry s contains to the relative frequency of symbol s.
The length of the sequence is corrected by the number of sequence separators present in the input sequence, so these will not be taken into account. Note that the entry for wildcards will be filled twice, once at index fid_WILDCARD, and once at the first index after the last normal symbol. The entries for separators and undefined characters will both be set to 0.0.
Note that this function is faster than fid_sequences_compute_distribution() for it makes use of the suffix array.
esa | The enhanced suffix array whose character distribution should be determined. |
Definition at line 925 of file suffixarray.templ.c.
References fid_Suffixarray::alpha, fid_Sequences::alpha, fid_Mappedfile::content, fid_Sequences::distribution, fid_suffixarray_get_intervals(), fid_WILDCARD, fid_Sequences::num_of_sequences, fid_Alphabet::num_of_syms, fid_Mappedfile::occupied, fid_Suffixarray::sequences, fid_Suffixarray::suftab, and fid_Sequences::tisfile.
void fid_suffixarray_dump_intervals | ( | const fid_Suffixarray * | esa, | |
const UINT | intervals[fid_INTERVALBUFFERSIZE], | |||
FILE * | stream | |||
) |
Dump array of suffix-intervals as returned by fid_suffixarray_get_intervals().
Each printed suffix-interval is preceded by its distinguishing character. Empty intervals are left out and appear in a summary below the list of dumped intervals.
esa | The enhanced suffix array the intervals belong to. | |
intervals | An array of suffix-intervals encoded by integers. | |
stream | An output stream to which the suffix-intervals are printed. If NULL, nothing will be printed. |
Definition at line 1014 of file suffixarray.templ.c.
References fid_Suffixarray::alpha, fid_PRINT_SYMBOL, fid_suffixinterval_init, fid_suffixinterval_to_lcpinterval(), fid_Alphabet::num_of_syms, and UINTFMT.
void fid_suffixarray_dump_suffix | ( | const fid_Suffixarray * | esa, | |
UINT | suffix, | |||
UINT | length, | |||
FILE * | stream | |||
) |
Dump prefix of a suffix to given stream.
This function will not print more than length
characters, and will stop at sequence separators.
esa | The enhanced suffix array containing the suffix to be dumped. | |
suffix | The index of the suffix to be dumped. | |
length | Maximum number of characters to be printed. Dump stops at end of the sequence. | |
stream | An output stream to which the suffix is printed. If NULL, nothing will be printed. |
Definition at line 1079 of file suffixarray.templ.c.
References fid_Suffixarray::alpha, fid_Mappedfile::content, fid_sequences_dump_range(), fid_Suffixarray::sequences, fid_Suffixarray::suftab, fid_Sequences::tisfile, and fid_Sequences::total_length.
Referenced by fid_suffixarray_print().
void fid_suffixarray_print | ( | const fid_Suffixarray * | esa, | |
fid_Tablerequest | request, | |||
FILE * | stream | |||
) |
Print suffix array tables to given stream.
Suffixes are not printed to full length since this would not be practical even for tiny suffix arrays. Instead, only the first few characters corresponding to a suffix' LCP-value are printed. Thus, when printing the suffixes, the LCP-table is usually also of interest.
esa | The enhanced suffix array to be printed. | |
request | Which tables to print. Note that suffixes themselves are printed by adding fid_TABLE_TIS to the request, in which case the LCP-table must be mapped, too (even if it is not to be printed). | |
stream | An output stream to which the suffix is printed. If NULL, nothing will be printed. |
Definition at line 1116 of file suffixarray.templ.c.
References fid_suffixarray_dump_suffix(), fid_TABLE_STI, fid_TABLE_TIS, fid_TABLES_ALL, fid_Suffixarray::sequences, fid_Suffixarray::skiptab, fid_Suffixarray::stitab, fid_Suffixarray::suftab, and fid_Sequences::total_length.
UINT fid_suffixinterval_lcpvalue | ( | const fid_Suffixinterval * | si, | |
const fid_Suffixarray * | esa | |||
) |
Determine LCP value of suffix-interval.
This function computes the length of the longest common prefix of the suffixes represented by a given suffix-interval by sequence comparisons. Setting field fid_Suffixinterval::depth of si
to the return value of this functions yields an lcp-interval. Note that longest common prefixes never include special symbols like wildcards or sequence separators since these symbols are not equal to others, not even to themselves.
si | A suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
si
, or fid_SUFFIXINTERVAL_SINGLETON if si
is a singleton. Definition at line 1177 of file suffixarray.templ.c.
References fid_READ_SYMBOL, fid_REGULARSYMBOL, fid_SPECIALSYMBOL, fid_Suffixarray::sequences, fid_Suffixarray::suftab, and fid_Sequences::total_length.
Referenced by fid_suffixinterval_to_lcpinterval().
void fid_suffixinterval_to_lcpinterval | ( | fid_Suffixinterval * | si, | |
const fid_Suffixarray * | esa | |||
) |
Turn suffix-interval into lcp-interval.
This function determines the LCP value of the given suffix-interval and assigns that value to its fid_Suffixinterval::depth field.
si | A suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
Definition at line 1231 of file suffixarray.templ.c.
References fid_suffixinterval_lcpvalue().
Referenced by fid_suffixarray_dump_intervals(), and fid_suffixarray_traverse().
void fid_suffixinterval_find_right | ( | fid_Suffixinterval * | si, | |
const fid_Suffixarray * | esa | |||
) |
Find right boundary of suffix-interval from left boundary and depth.
This function assumes that the left boundary and the depth of the passed suffix-interval are correctly set up. From these two values the right boundary is determined and assigned to fid_Suffixinterval::right of si
.
si | An incomplete suffix-interval. | |
esa | The enhanced suffix array the suffix-interval refers to. |
Definition at line 1251 of file suffixarray.templ.c.
References fid_Suffixarray::sequences, and fid_Sequences::total_length.
UINT fid_suffixinterval_homepos | ( | const fid_Suffixinterval * | si, | |
const fid_Suffixarray * | esa | |||
) |
Determine home-position of given lcp-interval.
si | A suffix-interval; stricty spoken, home-positions are defined for lcp-intervals only, but since home-positions are derived just from interval boundaries, you may safely pass in suffix-intervals. | |
esa | An enhanced suffix array. |
si
, i.e., either its left or right boundary. Definition at line 1283 of file suffixarray.templ.c.
References fid_Mappedfile::content, fid_Suffixarray::lcpfile, fid_Suffixarray::sequences, and fid_Sequences::total_length.
void fid_suffixinterval_dump | ( | const fid_Suffixinterval * | si, | |
const fid_Suffixarray * | esa, | |||
FILE * | stream | |||
) |
Dump suffix-interval to given stream.
si | The suffix-interval to be dumped. | |
esa | The enhanced suffix array containing the suffix-interval. | |
stream | An output stream to which the suffix-interval is printed. If NULL, nothing will be printed. |
Definition at line 1317 of file suffixarray.templ.c.
References fid_Suffixarray::alpha, fid_Mappedfile::content, fid_PRINT_SYMBOL, fid_Suffixarray::sequences, fid_Suffixarray::suftab, fid_Sequences::tisfile, and UINTFMT.