Enhanced Suffix Arrays


Detailed Description

The enhanced suffix array data types and accompanying functions are defined in this group. The integral data type is fid_Suffixarray. Suffix-intervals are represented by the fid_Suffixinterval data structure.

Data Structures

struct  fid_Projectfile
 Representation of a project file. More...
struct  fid_Suffixarray
 Representation of an enhanced suffix array. More...
struct  fid_Suffixinterval_32
 Representation of a suffix-interval in 32 bit enhanced suffix array. More...
struct  fid_Suffixinterval_64
 Representation of a suffix-interval in 64 bit enhanced suffix array. More...

Defines

#define fid_SKIPTREE_LOOP(ESA, SKPTAB, NEXT, LCP, DEPTH, MAX, REL)
 The inner skip chain loop.
#define fid_SKIPTREE_DEPTH0_LOOP(ESA, SKPTAB, NEXT, LCP, MAX)
 The inner skip chain loop for special character at depth 0.
#define fid_SKIPTREE_COMMON(ESA, SKPTAB, NEXT, START, LCP, DEPTH, MAX)
 Skip tree if the last character read from the start suffix was a common character.
#define fid_SKIPTREE_SPECIAL(ESA, SKPTAB, NEXT, START, LCP, DEPTH, MAX)
 Skip tree if the last character read from the start suffix was a special character.
#define fid_SKIPTREE(ESA, SKPTAB, NEXT, START, LCP, DEPTH, MAX, COMMON)
 Skip a subtree of a virtual suffix tree via skip table.
#define fid_SUFFIXINTERVAL_SINGLETON_32   (~(fid_Uint32)0)
 Special value for fid_Suffixinterval_32::depth to indicate a singleton.
#define fid_SUFFIXINTERVAL_SINGLETON_64   (~(fid_Uint64)0)
 Special value for fid_Suffixinterval_64::depth to indicate a singleton.
#define fid_TABLE_SUF   ((fid_Tablerequest)0x00000100)
 Table request: suffix array.
#define fid_TABLE_LCP   ((fid_Tablerequest)0x00000200)
 Table request: lcp table.
#define fid_TABLE_SKP   ((fid_Tablerequest)0x00000400)
 Table request: skip table.
#define fid_TABLE_STI   ((fid_Tablerequest)0x00000800)
 Table request: inverse suffix array.
#define fid_TABLES_OFFLINE   (fid_TABLE_SUF|fid_TABLE_LCP|fid_TABLE_SKP|fid_TABLE_STI)
 Table request: all tables related to offline matching.
#define fid_TABLES_OFFLINE_MASK   ((fid_Tablerequest)0xffffff00)
 Table request bit mask: bits related to offline matching.
#define fid_TABLES_ALL   (fid_TABLES_ONLINE|fid_TABLES_OFFLINE)
 Table request: all tables.
#define fid_INTERVALBUFFERSIZE   ((size_t)fid_SYMBOLMAX+2)
 Size of the buffer for fid_suffixarray_get_intervals().
#define fid_LCP_32(L, ESA, I)
 Determine lcp-value of a 32 bit suffix.
#define fid_LCP_64(L, ESA, I)
 Determine lcp-value of a 64 bit suffix.
#define fid_suffixinterval_init(SI, D, L, R)
 Initialize a fid_Suffixinterval structure.
#define fid_suffixinterval_init_root_32(SI, ESA)   fid_suffixinterval_init(SI,0,0,(ESA)->sequences.total_length.v_uint32-1)
 Initialize a fid_Suffixinterval_32 structure corresponding to the root interval of a given enhanced suffix array.
#define fid_suffixinterval_init_root_64(SI, ESA)   fid_suffixinterval_init(SI,0,0,(ESA)->sequences.total_length.v_uint64-1)
 Initialize a fid_Suffixinterval_64 structure corresponding to the root interval of a given enhanced suffix array.
#define fid_TRAVERSE_CONTINUE   ((int)0)
 Special return value for fid_Esatraversecallback_32 and fid_Esatraversecallback_32 functions.
#define fid_TRAVERSE_SKIP   ((int)-1)
 Special return value for fid_Esatraversecallback_32 and fid_Esatraversecallback_32 functions.

Typedefs

typedef int(* fid_Esatraversecallback_32 )(const fid_Suffixarray *esa, const fid_Suffixinterval_32 *si, fid_Uint32 pdepth, fid_Error *error, void *user_data)
 Callback function prototype for enhanced suffix array traversals.
typedef int(* fid_Esatraversecallback_64 )(const fid_Suffixarray *esa, const fid_Suffixinterval_64 *si, fid_Uint64 pdepth, fid_Error *error, void *user_data)
 Callback function prototype for enhanced suffix array traversals.

Functions

int fid_projectfile_init (fid_Projectfile *project, const char *prjbasename, fid_Error *error)
 Initialize a project file data structure.
int fid_projectfile_parse_from_file (fid_Projectfile *project, const char *prjbasename, fid_Error *error)
 Parse project file content from file.
int fid_projectfile_write (const fid_Projectfile *project, fid_Error *error)
 Write textual representation of project file data structure to file.
void fid_projectfile_free (fid_Projectfile *project)
 Free memory associated with a project file structure.
void fid_projectfile_dump (const fid_Projectfile *project, FILE *stream)
 Print contents of fid_Projectfile structure to output stream.
int fid_suffixarray_load_from_project (fid_Suffixarray *esa, fid_Projectfile *project, fid_Tablerequest tables, fid_Error *error)
 Load enhanced suffix array based on the given project data structure.
int fid_suffixarray_load_special (fid_Suffixarray *esa, const char *prjbasename, fid_Tablerequest tables, fid_Uintsize uisize_request, int ignore_byteorder, fid_Error *error)
 Load enhanced suffix array project and initialize data structures.
int fid_suffixarray_load (fid_Suffixarray *esa, const char *prjbasename, fid_Tablerequest tables, fid_Error *error)
 Load enhanced suffix array project and initialize data structures.
int fid_projectfile_init_from_suffixarray (fid_Projectfile *project, const fid_DBfiles *dbfiles, const char *prjbasename, const fid_Suffixarray *esa, fid_Error *error)
 Initialize fid_Projectfile structure from mapped enhanced suffix array.
void fid_suffixarray_init (fid_Suffixarray *esa, fid_Uintsize uisize)
 Initialize enhanced suffix array structure.
int fid_suffixarray_load_from_files (fid_Suffixarray *esa, const char *basefilename, fid_Tablerequest tables, fid_Uintsize uisize, fid_Error *error)
 Load enhanced suffix array from disk.
int fid_suffixarray_realize (fid_Suffixarray *esa, fid_Tablerequest tables, fid_Error *error)
 Update data fields in fid_Suffixarray structure according to mapped files.
int fid_suffixarray_traverse_32 (const fid_Suffixarray *esa, fid_Esatraversecallback_32 callback, void *user_data, fid_Error *error)
 32 bit version of fid_suffixarray_traverse().
int fid_suffixarray_traverse_64 (const fid_Suffixarray *esa, fid_Esatraversecallback_64 callback, void *user_data, fid_Error *error)
 64 bit version of fid_suffixarray_traverse().
fid_Uint32 fid_suffixarray_get_intervals_32 (const fid_Suffixarray *esa, const fid_Suffixinterval_32 *si, fid_Uint32 *intervals)
 32 bit version of fid_suffixarray_get_intervals().
fid_Uint64 fid_suffixarray_get_intervals_64 (const fid_Suffixarray *esa, const fid_Suffixinterval_64 *si, fid_Uint64 *intervals)
 64 bit version of fid_suffixarray_get_intervals().
int fid_suffixarray_find_embedded_interval_32 (const fid_Suffixarray *esa, fid_Suffixinterval_32 *si, fid_Symbol symbol)
 32 bit version of fid_suffixarray_find_embedded_interval().
int fid_suffixarray_find_embedded_interval_64 (const fid_Suffixarray *esa, fid_Suffixinterval_64 *si, fid_Symbol symbol)
 64 bit version of fid_suffixarray_find_embedded_interval().
int fid_suffixarray_extend_interval_32 (const fid_Suffixarray *esa, const fid_Symbol *pattern, fid_Uint32 plen, fid_Suffixinterval_32 *si)
 32 bit version of fid_suffixarray_extend_interval().
int fid_suffixarray_extend_interval_64 (const fid_Suffixarray *esa, const fid_Symbol *pattern, fid_Uint64 plen, fid_Suffixinterval_64 *si)
 64 bit version of fid_suffixarray_extend_interval().
int fid_suffixarray_find_interval_32 (const fid_Suffixarray *esa, const fid_Symbol *pattern, fid_Uint32 plen, fid_Suffixinterval_32 *si)
 32 bit version of fid_suffixarray_find_interval().
int fid_suffixarray_find_interval_64 (const fid_Suffixarray *esa, const fid_Symbol *pattern, fid_Uint64 plen, fid_Suffixinterval_64 *si)
 64 bit version of fid_suffixarray_find_interval().
fid_Uint32 fid_suffixarray_find_large_lcp_32 (const fid_Suffixarray *esa, fid_Uint32 suffix)
 32 bit version of fid_suffixarray_find_large_lcp().
fid_Uint64 fid_suffixarray_find_large_lcp_64 (const fid_Suffixarray *esa, fid_Uint64 suffix)
 64 bit version of fid_suffixarray_find_large_lcp().
fid_Uint32 fid_suffixarray_suffix_length_32 (const fid_Suffixarray *esa, fid_Uint32 suffix)
 32 bit version of fid_suffixarray_suffix_length().
fid_Uint64 fid_suffixarray_suffix_length_64 (const fid_Suffixarray *esa, fid_Uint64 suffix)
 64 bit version of fid_suffixarray_suffix_length().
void fid_suffixarray_compute_distribution_32 (fid_Suffixarray *esa)
 32 bit version of fid_suffixarray_compute_distribution().
void fid_suffixarray_compute_distribution_64 (fid_Suffixarray *esa)
 64 bit version of fid_suffixarray_compute_distribution().
void fid_suffixarray_free (fid_Suffixarray *esa)
 Free enhanced suffix array structure.
void fid_suffixarray_dump (const fid_Suffixarray *esa, FILE *stream)
 Show information about an enhanced suffix array.
void fid_suffixarray_dump_intervals_32 (const fid_Suffixarray *esa, const fid_Uint32 intervals[fid_INTERVALBUFFERSIZE], FILE *stream)
 32 bit version of fid_suffixarray_dump_intervals().
void fid_suffixarray_dump_intervals_64 (const fid_Suffixarray *esa, const fid_Uint64 intervals[fid_INTERVALBUFFERSIZE], FILE *stream)
 64 bit version of fid_suffixarray_dump_intervals().
void fid_suffixarray_dump_suffix_32 (const fid_Suffixarray *esa, fid_Uint32 suffix, fid_Uint32 length, FILE *stream)
 32 bit version of fid_suffixarray_dump_suffix().
void fid_suffixarray_dump_suffix_64 (const fid_Suffixarray *esa, fid_Uint64 suffix, fid_Uint64 length, FILE *stream)
 64 bit version of fid_suffixarray_dump_suffix().
void fid_suffixarray_print_32 (const fid_Suffixarray *esa, fid_Tablerequest request, FILE *stream)
 32 bit version of fid_suffixarray_print().
void fid_suffixarray_print_64 (const fid_Suffixarray *esa, fid_Tablerequest request, FILE *stream)
 64 bit version of fid_suffixarray_print().
fid_Uint32 fid_suffixinterval_lcpvalue_32 (const fid_Suffixinterval_32 *si, const fid_Suffixarray *esa)
 32 bit version of fid_suffixinterval_lcpvalue().
fid_Uint64 fid_suffixinterval_lcpvalue_64 (const fid_Suffixinterval_64 *si, const fid_Suffixarray *esa)
 64 bit version of fid_suffixinterval_lcpvalue().
void fid_suffixinterval_to_lcpinterval_32 (fid_Suffixinterval_32 *si, const fid_Suffixarray *esa)
 32 bit version of fid_suffixinterval_to_lcpinterval().
void fid_suffixinterval_to_lcpinterval_64 (fid_Suffixinterval_64 *si, const fid_Suffixarray *esa)
 64 bit version of fid_suffixinterval_to_lcpinterval().
void fid_suffixinterval_find_right_32 (fid_Suffixinterval_32 *si, const fid_Suffixarray *esa)
 32 bit version of fid_suffixinterval_find_right().
void fid_suffixinterval_find_right_64 (fid_Suffixinterval_64 *si, const fid_Suffixarray *esa)
 64 bit version of fid_suffixinterval_find_right().
fid_Uint32 fid_suffixinterval_homepos_32 (const fid_Suffixinterval_32 *si, const fid_Suffixarray *esa)
 32 bit version of fid_suffixinterval_homepos().
fid_Uint64 fid_suffixinterval_homepos_64 (const fid_Suffixinterval_64 *si, const fid_Suffixarray *esa)
 64 bit version of fid_suffixinterval_homepos().
void fid_suffixinterval_dump_32 (const fid_Suffixinterval_32 *si, const fid_Suffixarray *esa, FILE *stream)
 32 bit version of fid_suffixinterval_dump().
void fid_suffixinterval_dump_64 (const fid_Suffixinterval_64 *si, const fid_Suffixarray *esa, FILE *stream)
 64 bit version of fid_suffixinterval_dump().
char * fid_tablerequest_to_string (fid_Tablerequest request, const char *sep)
 Construct string from table request.

Define Documentation

#define fid_SKIPTREE_LOOP ( ESA,
SKPTAB,
NEXT,
LCP,
DEPTH,
MAX,
REL   ) 

Value:

while((NEXT) < (MAX) && (LCP) REL (DEPTH))\
  {\
    NEXT=(SKPTAB)[NEXT]+1;\
    fid_LCP(LCP,ESA,NEXT);\
  }
The inner skip chain loop.

If the last character read from the start suffix was a special character and depth is 0, then use SKIPTREE_DEPTH0_LOOP() instead to prevent reading invalid memory.

Definition at line 32 of file skiptree.h.

#define fid_SKIPTREE_DEPTH0_LOOP ( ESA,
SKPTAB,
NEXT,
LCP,
MAX   ) 

Value:

while((NEXT) < (MAX))\
  {\
    NEXT=(SKPTAB)[NEXT]+1;\
    if((NEXT) < (MAX))\
    {\
      fid_LCP(LCP,ESA,NEXT);\
    }\
  }
The inner skip chain loop for special character at depth 0.

Definition at line 42 of file skiptree.h.

#define fid_SKIPTREE_COMMON ( ESA,
SKPTAB,
NEXT,
START,
LCP,
DEPTH,
MAX   ) 

Value:

if((START) < (MAX))\
  {\
    (NEXT)=(START)+1;\
    fid_LCP(LCP,ESA,NEXT);\
    fid_SKIPTREE_LOOP(ESA,SKPTAB,NEXT,LCP,DEPTH,MAX,>);\
  }\
  else\
  {\
    NEXT=(MAX);\
    fid_LCP(LCP,ESA,NEXT);\
  }
Skip tree if the last character read from the start suffix was a common character.

Definition at line 55 of file skiptree.h.

#define fid_SKIPTREE_SPECIAL ( ESA,
SKPTAB,
NEXT,
START,
LCP,
DEPTH,
MAX   ) 

Value:

if((START) < (MAX))\
  {\
    (NEXT)=(START)+1;\
    fid_LCP(LCP,ESA,NEXT);\
    if((DEPTH) > 0)\
    {\
      fid_SKIPTREE_LOOP(ESA,SKPTAB,NEXT,LCP,DEPTH,MAX,>=);\
    }\
    else\
    {\
      fid_SKIPTREE_DEPTH0_LOOP(ESA,SKPTAB,NEXT,LCP,MAX);\
    }\
  }\
  else\
  {\
    NEXT=(MAX);\
    fid_LCP(LCP,ESA,NEXT);\
  }
Skip tree if the last character read from the start suffix was a special character.

Definition at line 71 of file skiptree.h.

#define fid_SKIPTREE ( ESA,
SKPTAB,
NEXT,
START,
LCP,
DEPTH,
MAX,
COMMON   ) 

Value:

if((START) < (MAX))\
  {\
    (NEXT)=(START)+1;\
    fid_LCP(LCP,ESA,NEXT);\
    if(COMMON)\
    {\
      fid_SKIPTREE_LOOP(ESA,SKPTAB,NEXT,LCP,DEPTH,MAX,>);\
    }\
    else if((DEPTH) > 0)\
    {\
      fid_SKIPTREE_LOOP(ESA,SKPTAB,NEXT,LCP,DEPTH,MAX,>=);\
    }\
    else\
    {\
      fid_SKIPTREE_DEPTH0_LOOP(ESA,SKPTAB,NEXT,LCP,MAX);\
    }\
  }\
  else\
  {\
    NEXT=(MAX);\
    fid_LCP(LCP,ESA,NEXT);\
  }
Skip a subtree of a virtual suffix tree via skip table.

The macro will always update NEXT and LCP, the other arguments are used read-only.

Parameters:
ESA Pointer to an enhanced suffix array.
SKPTAB Pointer to the skip table (part of ESA). This can be given separately because you may want to have a pointer around that is directly pointing to the skip table instead of referencing it through the fid_Suffixarray structure each time.
NEXT An fid_Uint that stores the next index in the suffix table, that is the first suffix that does not belong to the skipped subtree.
START The current index into the suffix table, that is the index of the suffix whose subtree should be skipped.
LCP An fid_Uint that stores the current index in the LCP table.
DEPTH The current depth in the suffix tree.
MAX The length of the sequence underlaying the suffix array. The length could also be read directly from a fid_Suffixarray structure, but this may take too much time, so it should have been stored in a separate fid_Uint by the caller.
COMMON Boolean indicating if the last character of the start suffix is a special (0) or a common (not 0) one as determined by the fid_REGULARSYMBOL() and fid_SPECIALSYMBOL() macros.

Definition at line 117 of file skiptree.h.

#define fid_SUFFIXINTERVAL_SINGLETON_32   (~(fid_Uint32)0)

Special value for fid_Suffixinterval_32::depth to indicate a singleton.

See also:
fid_SUFFIXINTERVAL_SINGLETON_64

Definition at line 106 of file suffixarray.h.

#define fid_SUFFIXINTERVAL_SINGLETON_64   (~(fid_Uint64)0)

Special value for fid_Suffixinterval_64::depth to indicate a singleton.

See also:
fid_SUFFIXINTERVAL_SINGLETON_32

Definition at line 113 of file suffixarray.h.

#define fid_TABLE_SUF   ((fid_Tablerequest)0x00000100)

Table request: suffix array.

Definition at line 116 of file suffixarray.h.

#define fid_TABLE_LCP   ((fid_Tablerequest)0x00000200)

Table request: lcp table.

Definition at line 119 of file suffixarray.h.

#define fid_TABLE_SKP   ((fid_Tablerequest)0x00000400)

Table request: skip table.

Definition at line 122 of file suffixarray.h.

#define fid_TABLE_STI   ((fid_Tablerequest)0x00000800)

Table request: inverse suffix array.

Definition at line 125 of file suffixarray.h.

Referenced by fid_suffixarray_print().

#define fid_TABLES_OFFLINE   (fid_TABLE_SUF|fid_TABLE_LCP|fid_TABLE_SKP|fid_TABLE_STI)

Table request: all tables related to offline matching.

Definition at line 128 of file suffixarray.h.

Referenced by fid_suffixarray_free(), and fid_tablerequest_to_string().

#define fid_TABLES_OFFLINE_MASK   ((fid_Tablerequest)0xffffff00)

Table request bit mask: bits related to offline matching.

Definition at line 131 of file suffixarray.h.

Referenced by fid_suffixarray_load_from_files().

#define fid_TABLES_ALL   (fid_TABLES_ONLINE|fid_TABLES_OFFLINE)

Table request: all tables.

Definition at line 134 of file suffixarray.h.

Referenced by fid_suffixarray_print(), and fid_tablerequest_to_string().

#define fid_INTERVALBUFFERSIZE   ((size_t)fid_SYMBOLMAX+2)

Size of the buffer for fid_suffixarray_get_intervals().

The size is determined as the maximum number of regular symbols, plus 1 for the left boundary for the special interval boundary, plus 1 for the (non-existent) left boundary of the interval following the special interval.

Definition at line 144 of file suffixarray.h.

#define fid_LCP_32 ( L,
ESA,
 ) 

Value:

if(((L)=(ESA)->lcpfile.content[I]) == (unsigned char)UCHAR_MAX)\
  {\
    (L)=fid_suffixarray_find_large_lcp_32(ESA,I);\
  }
Determine lcp-value of a 32 bit suffix.

This is implemented as a macro since it would be a huge performance penality to call a function for something that is a simple assignment in the vast majority of all cases. A function call occurs if the lcp-value stored in the main lcp-table is equal to UCHAR_MAX (255), however, since then the real, potentially larger, lcp-value must be looked up inside an exception table.

Parameters:
L The lcp-value is assigned to this variable, which should be of type fid_Uint32 or fid_Uint64.
ESA An enhanced suffix array.
I The suffix index for which the lcp-value should be determined.
See also:
fid_LCP_64()

Definition at line 162 of file suffixarray.h.

#define fid_LCP_64 ( L,
ESA,
 ) 

Value:

if(((L)=(ESA)->lcpfile.content[I]) == (unsigned char)UCHAR_MAX)\
  {\
    (L)=fid_suffixarray_find_large_lcp_64(ESA,I);\
  }
Determine lcp-value of a 64 bit suffix.

See also:
fid_LCP_32()

Definition at line 172 of file suffixarray.h.

#define fid_suffixinterval_init ( SI,
D,
L,
 ) 

Value:

(SI)->depth=(D);\
  (SI)->left=(L);\
  (SI)->right=(R)
Initialize a fid_Suffixinterval structure.

This has been implemented as a macro mainly for speed since it is used all the time. We don't want to rely on the compiler to inline this automatically, do we?

Parameters:
SI The fid_Suffixinterval structure to be initialized.
D Depth.
L,R Left and right boundaries.

Definition at line 189 of file suffixarray.h.

Referenced by fid_suffixarray_dump_intervals(), and fid_suffixarray_traverse().

#define fid_suffixinterval_init_root_32 ( SI,
ESA   )     fid_suffixinterval_init(SI,0,0,(ESA)->sequences.total_length.v_uint32-1)

Initialize a fid_Suffixinterval_32 structure corresponding to the root interval of a given enhanced suffix array.

Parameters:
SI The fid_Suffixinterval_32 structure to be initialized.
ESA A 32 bit enhanced suffix array.
See also:
fid_suffixinterval_init_root_64()

Definition at line 203 of file suffixarray.h.

#define fid_suffixinterval_init_root_64 ( SI,
ESA   )     fid_suffixinterval_init(SI,0,0,(ESA)->sequences.total_length.v_uint64-1)

Initialize a fid_Suffixinterval_64 structure corresponding to the root interval of a given enhanced suffix array.

Parameters:
SI The fid_Suffixinterval_64 structure to be initialized.
ESA A 64 bit enhanced suffix array.
See also:
fid_suffixinterval_init_root_32()

Definition at line 215 of file suffixarray.h.

#define fid_TRAVERSE_CONTINUE   ((int)0)

Special return value for fid_Esatraversecallback_32 and fid_Esatraversecallback_32 functions.

Definition at line 220 of file suffixarray.h.

#define fid_TRAVERSE_SKIP   ((int)-1)

Special return value for fid_Esatraversecallback_32 and fid_Esatraversecallback_32 functions.

Definition at line 224 of file suffixarray.h.

Referenced by fid_suffixarray_traverse().


Typedef Documentation

typedef int(* fid_Esatraversecallback_32)(const fid_Suffixarray *esa, const fid_Suffixinterval_32 *si, fid_Uint32 pdepth, fid_Error *error, void *user_data)

Callback function prototype for enhanced suffix array traversals.

For each lcp-interval in an enhanced suffix array passed to fid_suffixarray_traverse(), a callback function is called. That callback function must follow the signature of this type.

Parameters:
esa The traversed enhanced suffix array.
si An lcp-interval. This is really an lcp-interval, not just a suffix-interval, so there is no need to turn this into an lcp-interval inside the function.
pdepth The depth of the parent lcp-interval.
error Error messages go here.
user_data Pointer to any data, may be used freely within the callback function.
Returns:
A positive integer for stopping the traversal, or one of the following special return codes. If the function returns a positive integer, then that exact value will be the return value of fid_suffixarray_traverse().
Return values:
fid_TRAVERSE_CONTINUE Continue traversal.
fid_TRAVERSE_SKIP Do not traverse deeper into current subtree, continue with next sibling.
See also:
fid_Esatraversecallback_64

Definition at line 253 of file suffixarray.h.

typedef int(* fid_Esatraversecallback_64)(const fid_Suffixarray *esa, const fid_Suffixinterval_64 *si, fid_Uint64 pdepth, fid_Error *error, void *user_data)

Callback function prototype for enhanced suffix array traversals.

See also:
fid_Esatraversecallback_32

Definition at line 262 of file suffixarray.h.


Function Documentation

int fid_projectfile_init ( fid_Projectfile project,
const char *  prjbasename,
fid_Error error 
)

int fid_projectfile_parse_from_file ( fid_Projectfile project,
const char *  prjbasename,
fid_Error error 
)

Parse project file content from file.

This function calls fid_projectfile_init(), so the caller should not do so.

Parameters:
project Content of the file is parsed into this structure.
prjbasename Base name of the enhanced suffix array project.
error Error messages go here.
Returns:
0 on success, -1 on error.

Definition at line 649 of file projectfile.c.

References fid_Mappedfile::content, fid_file_map(), fid_file_unmap(), fid_projectfile_free(), fid_projectfile_init(), fid_Projectfile::filename, and fid_Mappedfile::occupied.

Referenced by fid_suffixarray_load_special().

int fid_projectfile_write ( const fid_Projectfile project,
fid_Error error 
)

void fid_projectfile_free ( fid_Projectfile project  ) 

Free memory associated with a project file structure.

Parameters:
project The structure to be freed.

Definition at line 755 of file projectfile.c.

References fid_Projectfile::dbfiles, fid_Projectfile::filename, and fid_Projectfile::prjbasename.

Referenced by fid_projectfile_parse_from_file(), and fid_suffixarray_load_special().

void fid_projectfile_dump ( const fid_Projectfile project,
FILE *  stream 
)

int fid_suffixarray_load_from_project ( fid_Suffixarray esa,
fid_Projectfile project,
fid_Tablerequest  tables,
fid_Error error 
)

Load enhanced suffix array based on the given project data structure.

Given a parsed project file, open all requested tables and put these into the passed enhanced suffix array data structure.

Parameters:
esa The enhanced suffix array to be initialized.
project Structure representing a parsed enhanced suffix array project file. This is required to intialize several data fields of esa and to perform some integrity checks.
tables Bitvector of requested tables.
error Error messages go here.
Returns:
0 on success, -1 on error.

Definition at line 67 of file suffixarrayext.c.

References fid_suffixarray_load_from_files(), fid_SWITCH48, fid_Projectfile::prjbasename, and fid_Projectfile::uisize.

Referenced by fid_suffixarray_load_special().

int fid_suffixarray_load_special ( fid_Suffixarray esa,
const char *  prjbasename,
fid_Tablerequest  tables,
fid_Uintsize  uisize_request,
int  ignore_byteorder,
fid_Error error 
)

Load enhanced suffix array project and initialize data structures.

Like fid_suffixarray_load(), this function opens and parses a project file, opens all requested tables and put these into the passed enhanced suffix array data structure. It takes two additional arguments, which, however, are not needed in most cases.

Parameters:
esa The enhanced suffix array to be initialized.
prjbasename Base filename of the enhanced suffix array project.
tables Bitvector of requested tables.
uisize_request Loading fails if the integer size does not match this value. Pass fid_UINTSIZE_UNDEF to accept any integer size.
ignore_byteorder Ignore byte order as given in the project file if set to True. If the project file does not contain a littleendian line, then this option has no effect. This option is only useful for programs that can either handle binary data in non-native byte order, or that just need to check if all files needed can be read and have certain sizes.
error Error messages go here.
Returns:
0 on success, -1 on error.

Definition at line 130 of file suffixarrayext.c.

References fid_Projectfile::endianess_known, fid_error_throw(), fid_projectfile_free(), fid_projectfile_parse_from_file(), fid_suffixarray_load_from_project(), fid_UINTSIZE_32, fid_UINTSIZE_UNDEF, fid_Projectfile::littleendian, and fid_Projectfile::uisize.

Referenced by fid_suffixarray_load().

int fid_suffixarray_load ( fid_Suffixarray esa,
const char *  prjbasename,
fid_Tablerequest  tables,
fid_Error error 
)

Load enhanced suffix array project and initialize data structures.

This is arguably the most important and high-level function of the whole library. Given a base filename, open and parse the corresponding project file, then open all requested tables and put these into the passed enhanced suffix array data structure.

Parameters:
esa The enhanced suffix array to be initialized.
prjbasename Base filename of the enhanced suffix array project.
tables Bitvector of requested tables.
error Error messages go here.
Returns:
0 on success, -1 on error.
Note:
If you need to ignore the byte order or need a special integer size, use fid_suffixarray_load_special() instead. This function is just a wrapper around that function.

Definition at line 211 of file suffixarrayext.c.

References fid_suffixarray_load_special(), and fid_UINTSIZE_UNDEF.

int fid_projectfile_init_from_suffixarray ( fid_Projectfile project,
const fid_DBfiles dbfiles,
const char *  prjbasename,
const fid_Suffixarray esa,
fid_Error error 
)

Initialize fid_Projectfile structure from mapped enhanced suffix array.

This function is useful for generating enhanced suffix array projects from parsed sequence data. Generate enhanced suffix array tables first, then call this function to fill the fid_Projectfile data structure from these.

Parameters:
project The profile file structure to be initialized.
dbfiles Names and lengths of the original input sequence files.
prjbasename Base filename of the enhanced suffix array project.
esa Structure that has some enhanced suffix array tables associated.
error Error messages go here.
Returns:
0 on success, -1 on error.

Definition at line 235 of file suffixarrayext.c.

References fid_Projectfile::dbfiles, fid_projectfile_init(), fid_SWITCH48, fid_Projectfile::integer_size_in_bytes, fid_Suffixarray::uisize, and fid_Projectfile::uisize.

void fid_suffixarray_init ( fid_Suffixarray esa,
fid_Uintsize  uisize 
)

int fid_suffixarray_load_from_files ( fid_Suffixarray esa,
const char *  basefilename,
fid_Tablerequest  tables,
fid_Uintsize  uisize,
fid_Error error 
)

Load enhanced suffix array from disk.

On systems where the madvise() system call is available, the kernel will be advised how to treat the mapped files in hope of improved performance. It is not necessary to call fid_suffixarray_init() before calling this function.

Parameters:
esa The structure to be filled.
basefilename The base filename of the suffix array tables on disk.
tables Requested tables, online and offline.
uisize When loading files from disk, assume that integers stored in files are of this size.
error Error messages go here.
Returns:
0 on success, -1 on error.

Definition at line 262 of file suffixarray.c.

References fid_Suffixarray::alpha, fid_Filenamebuffer::buffer, fid_Filenamebuffer::bufptr, fid_alphabet_init_from_specfile(), fid_filenamebuffer_free(), fid_filenamebuffer_init(), fid_sequences_free(), fid_sequences_map(), fid_suffixarray_init(), fid_suffixarray_realize(), fid_TABLES_OFFLINE_MASK, fid_TABLES_ONLINE_MASK, and fid_Suffixarray::sequences.

Referenced by fid_suffixarray_load_from_project().

int fid_suffixarray_realize ( fid_Suffixarray esa,
fid_Tablerequest  tables,
fid_Error error 
)

Update data fields in fid_Suffixarray structure according to mapped files.

All data fields whose correct values can be derived from the sizes of the associated mapped files are updated by this function (e.g., fid_Sequences::num_of_large_lcps can be derived from the size of the file associated with fid_Sequences::llvfile). No function should set these fields manually, just call this function instead. A table request can be defined to ask the function to look at only some of the files and update only values derived from these. Note that also all online tables in fid_Suffixarray::sequences can be updated if the request tables asks for it.

Parameters:
esa The structure to be updated.
tables Update only values derivable from the tables specified in this request. To update all values, pass fid_TABLES_ALL, or fid_TABLES_OFFLINE to update only values derivable from the index tables.
error Error messages go here.
Returns:
0 on success, -1 on error.

Definition at line 366 of file suffixarray.c.

References fid_sequences_realize(), fid_SWITCH48, fid_TABLES_ONLINE_MASK, fid_Suffixarray::sequences, and fid_Suffixarray::uisize.

Referenced by fid_suffixarray_free(), and fid_suffixarray_load_from_files().

int fid_suffixarray_traverse_32 ( const fid_Suffixarray esa,
fid_Esatraversecallback_32  callback,
void *  user_data,
fid_Error error 
)

32 bit version of fid_suffixarray_traverse().

Generic depth-first traversal of an enhanced suffix array.

This function calls a callback function for each lcp-interval in an enhanced suffix array. The callback function is free to limit the maximum depth or to stop the traversal by returning special return values. See fid_Esatraversecallback_32 and fid_Esatraversecallback_64 for more details.

While it might not be the most efficient way to perform a depth-first by means of a callback driven generic function, it is for sure a very convenient one to try out some ideas. Once an algorithm works when using this function (leading to a reference implementation), it can be rewritten with the traversal inlined to speed it up.

Parameters:
esa An enhanced suffix array to be traversed.
callback A callback function that processes lcp-intervals.
user_data Pointer to data used by the callback function.
error Error messages go here.
Returns:
-1 on error, 0 on success, or the non-zero return value returned by the callback function.

int fid_suffixarray_traverse_64 ( const fid_Suffixarray esa,
fid_Esatraversecallback_64  callback,
void *  user_data,
fid_Error error 
)

64 bit version of fid_suffixarray_traverse().

Generic depth-first traversal of an enhanced suffix array.

This function calls a callback function for each lcp-interval in an enhanced suffix array. The callback function is free to limit the maximum depth or to stop the traversal by returning special return values. See fid_Esatraversecallback_32 and fid_Esatraversecallback_64 for more details.

While it might not be the most efficient way to perform a depth-first by means of a callback driven generic function, it is for sure a very convenient one to try out some ideas. Once an algorithm works when using this function (leading to a reference implementation), it can be rewritten with the traversal inlined to speed it up.

Parameters:
esa An enhanced suffix array to be traversed.
callback A callback function that processes lcp-intervals.
user_data Pointer to data used by the callback function.
error Error messages go here.
Returns:
-1 on error, 0 on success, or the non-zero return value returned by the callback function.

fid_Uint32 fid_suffixarray_get_intervals_32 ( const fid_Suffixarray esa,
const fid_Suffixinterval_32 si,
fid_Uint32 intervals 
)

32 bit version of fid_suffixarray_get_intervals().

Return all embedded suffix-intervals embedded in a suffix-interval.

Given a suffix-interval, determine the suffix indices of all embedded suffix-intervals. The result is written to intervals. Let s be a symbol, then the embedded suffix-interval for symbol s starts at suffix intervals[s], and its length is intervals[s+1]-intervals[s]. If the length is 0, then the interval does not exist.

Parameters:
esa The enhanced suffix array the interval are to be searched in.
si The suffix-interval whose child-intervals are to be determined.
intervals Array of suffix indices. This array must have fid_INTERVALBUFFERSIZE entries, all elements will be set.
Returns:
The number of suffix-intervals embedded in si.

fid_Uint64 fid_suffixarray_get_intervals_64 ( const fid_Suffixarray esa,
const fid_Suffixinterval_64 si,
fid_Uint64 intervals 
)

64 bit version of fid_suffixarray_get_intervals().

Return all embedded suffix-intervals embedded in a suffix-interval.

Given a suffix-interval, determine the suffix indices of all embedded suffix-intervals. The result is written to intervals. Let s be a symbol, then the embedded suffix-interval for symbol s starts at suffix intervals[s], and its length is intervals[s+1]-intervals[s]. If the length is 0, then the interval does not exist.

Parameters:
esa The enhanced suffix array the interval are to be searched in.
si The suffix-interval whose child-intervals are to be determined.
intervals Array of suffix indices. This array must have fid_INTERVALBUFFERSIZE entries, all elements will be set.
Returns:
The number of suffix-intervals embedded in si.

int fid_suffixarray_find_embedded_interval_32 ( const fid_Suffixarray esa,
fid_Suffixinterval_32 si,
fid_Symbol  symbol 
)

32 bit version of fid_suffixarray_find_embedded_interval().

Determine the embedded suffix-interval for a given symbol.

The child-interval is returned in argument si, i.e., si will be modified if the requested child-interval exists. If no such interval exists, then si will remain unchanged.

Parameters:
esa The enhanced suffix array the interval is to be searched in.
si The suffix-interval whose child-interval is to be determined.
symbol The embedded child-interval shall be the suffix-interval for the longest common prefix of si concatenated with this symbol.
Returns:
0 if found, -1 if no such interval exists.

int fid_suffixarray_find_embedded_interval_64 ( const fid_Suffixarray esa,
fid_Suffixinterval_64 si,
fid_Symbol  symbol 
)

64 bit version of fid_suffixarray_find_embedded_interval().

Determine the embedded suffix-interval for a given symbol.

The child-interval is returned in argument si, i.e., si will be modified if the requested child-interval exists. If no such interval exists, then si will remain unchanged.

Parameters:
esa The enhanced suffix array the interval is to be searched in.
si The suffix-interval whose child-interval is to be determined.
symbol The embedded child-interval shall be the suffix-interval for the longest common prefix of si concatenated with this symbol.
Returns:
0 if found, -1 if no such interval exists.

int fid_suffixarray_extend_interval_32 ( const fid_Suffixarray esa,
const fid_Symbol pattern,
fid_Uint32  plen,
fid_Suffixinterval_32 si 
)

32 bit version of fid_suffixarray_extend_interval().

Extend the common prefix of a suffix-interval by some pattern.

This function can be used to continue searching inside a suffix-interval by prolonging the common prefix of length fid_Suffixinterval::depth by a some string, possibly further narrowing down the suffix-interval.

Parameters:
esa An enhanced suffix array to search in.
pattern Extension of the common prefix of the given suffix-interval, alphabet encoded.
plen Length of pattern.
si A valid suffix-interval to start with, will be modified due to the extension with pattern. If the common prefix cannot be extended by pattern, then the content of this structure will be undefined.
Returns:
0 on success with si set to the requested suffix-interval, or -1 if the suffix-interval could not be extended by pattern.

int fid_suffixarray_extend_interval_64 ( const fid_Suffixarray esa,
const fid_Symbol pattern,
fid_Uint64  plen,
fid_Suffixinterval_64 si 
)

64 bit version of fid_suffixarray_extend_interval().

Extend the common prefix of a suffix-interval by some pattern.

This function can be used to continue searching inside a suffix-interval by prolonging the common prefix of length fid_Suffixinterval::depth by a some string, possibly further narrowing down the suffix-interval.

Parameters:
esa An enhanced suffix array to search in.
pattern Extension of the common prefix of the given suffix-interval, alphabet encoded.
plen Length of pattern.
si A valid suffix-interval to start with, will be modified due to the extension with pattern. If the common prefix cannot be extended by pattern, then the content of this structure will be undefined.
Returns:
0 on success with si set to the requested suffix-interval, or -1 if the suffix-interval could not be extended by pattern.

int fid_suffixarray_find_interval_32 ( const fid_Suffixarray esa,
const fid_Symbol pattern,
fid_Uint32  plen,
fid_Suffixinterval_32 si 
)

32 bit version of fid_suffixarray_find_interval().

Find suffix-interval with suffixes sharing a common given prefix.

Simply put, this function returns all occurrences of pattern in the suffix array as suffix-interval (which is not necessarily an lcp-interval).

Parameters:
esa An enhanced suffix array to search in.
pattern Common prefix of searched suffix-interval, alphabet encoded.
plen Length of pattern.
si The suffix-interval corresponding to pattern if it exists.
Returns:
0 on success with si set to the requested suffix-interval, or -1 if pattern does not occur in the text.

int fid_suffixarray_find_interval_64 ( const fid_Suffixarray esa,
const fid_Symbol pattern,
fid_Uint64  plen,
fid_Suffixinterval_64 si 
)

64 bit version of fid_suffixarray_find_interval().

Find suffix-interval with suffixes sharing a common given prefix.

Simply put, this function returns all occurrences of pattern in the suffix array as suffix-interval (which is not necessarily an lcp-interval).

Parameters:
esa An enhanced suffix array to search in.
pattern Common prefix of searched suffix-interval, alphabet encoded.
plen Length of pattern.
si The suffix-interval corresponding to pattern if it exists.
Returns:
0 on success with si set to the requested suffix-interval, or -1 if pattern does not occur in the text.

fid_Uint32 fid_suffixarray_find_large_lcp_32 ( const fid_Suffixarray esa,
fid_Uint32  suffix 
)

32 bit version of fid_suffixarray_find_large_lcp().

Find large LCP value in LCP exception table.

Parameters:
esa An enhanced suffix array.
suffix The suffix index whose large LCP value should be retrieved.
Returns:
The LCP value for the given suffix.

fid_Uint64 fid_suffixarray_find_large_lcp_64 ( const fid_Suffixarray esa,
fid_Uint64  suffix 
)

64 bit version of fid_suffixarray_find_large_lcp().

Find large LCP value in LCP exception table.

Parameters:
esa An enhanced suffix array.
suffix The suffix index whose large LCP value should be retrieved.
Returns:
The LCP value for the given suffix.

fid_Uint32 fid_suffixarray_suffix_length_32 ( const fid_Suffixarray esa,
fid_Uint32  suffix 
)

32 bit version of fid_suffixarray_suffix_length().

Determine length of suffix up to next sequence separator.

It is easy to determine the length of a suffix in an enhanced suffix array when there is only one sequence inside. This function also takes sequence separators into account and returns the length of the sequence in the given suffix up to the next sequence separator, or to the end of the input.

Parameters:
esa An enhanced suffix array.
suffix The suffix whose lengths is to be found.
Returns:
The length of the sequence in the given suffix.

fid_Uint64 fid_suffixarray_suffix_length_64 ( const fid_Suffixarray esa,
fid_Uint64  suffix 
)

64 bit version of fid_suffixarray_suffix_length().

Determine length of suffix up to next sequence separator.

It is easy to determine the length of a suffix in an enhanced suffix array when there is only one sequence inside. This function also takes sequence separators into account and returns the length of the sequence in the given suffix up to the next sequence separator, or to the end of the input.

Parameters:
esa An enhanced suffix array.
suffix The suffix whose lengths is to be found.
Returns:
The length of the sequence in the given suffix.

void fid_suffixarray_compute_distribution_32 ( fid_Suffixarray esa  ) 

32 bit version of fid_suffixarray_compute_distribution().

Compute character distribution of given enhanced suffix array.

The fid_Sequences::distribution of fid_Suffixarray::sequences will be filled by this function, such that each entry s contains to the relative frequency of symbol s.

The length of the sequence is corrected by the number of sequence separators present in the input sequence, so these will not be taken into account. Note that the entry for wildcards will be filled twice, once at index fid_WILDCARD, and once at the first index after the last normal symbol. The entries for separators and undefined characters will both be set to 0.0.

Note that this function is faster than fid_sequences_compute_distribution() for it makes use of the suffix array.

Parameters:
esa The enhanced suffix array whose character distribution should be determined.

void fid_suffixarray_compute_distribution_64 ( fid_Suffixarray esa  ) 

64 bit version of fid_suffixarray_compute_distribution().

Compute character distribution of given enhanced suffix array.

The fid_Sequences::distribution of fid_Suffixarray::sequences will be filled by this function, such that each entry s contains to the relative frequency of symbol s.

The length of the sequence is corrected by the number of sequence separators present in the input sequence, so these will not be taken into account. Note that the entry for wildcards will be filled twice, once at index fid_WILDCARD, and once at the first index after the last normal symbol. The entries for separators and undefined characters will both be set to 0.0.

Note that this function is faster than fid_sequences_compute_distribution() for it makes use of the suffix array.

Parameters:
esa The enhanced suffix array whose character distribution should be determined.

void fid_suffixarray_free ( fid_Suffixarray esa  ) 

Free enhanced suffix array structure.

All associated files will be closed.

Parameters:
esa The structure to be freed.

Definition at line 320 of file suffixarray.c.

References fid_file_unmap(), fid_sequences_free(), fid_suffixarray_realize(), fid_TABLES_OFFLINE, fid_Suffixarray::lcpfile, fid_Suffixarray::llvfile, fid_Suffixarray::sequences, fid_Suffixarray::skpfile, fid_Suffixarray::stifile, and fid_Suffixarray::suffile.

void fid_suffixarray_dump ( const fid_Suffixarray esa,
FILE *  stream 
)

void fid_suffixarray_dump_intervals_32 ( const fid_Suffixarray esa,
const fid_Uint32  intervals[fid_INTERVALBUFFERSIZE],
FILE *  stream 
)

32 bit version of fid_suffixarray_dump_intervals().

Dump array of suffix-intervals as returned by fid_suffixarray_get_intervals().

Each printed suffix-interval is preceded by its distinguishing character. Empty intervals are left out and appear in a summary below the list of dumped intervals.

Parameters:
esa The enhanced suffix array the intervals belong to.
intervals An array of suffix-intervals encoded by integers.
stream An output stream to which the suffix-intervals are printed. If NULL, nothing will be printed.

void fid_suffixarray_dump_intervals_64 ( const fid_Suffixarray esa,
const fid_Uint64  intervals[fid_INTERVALBUFFERSIZE],
FILE *  stream 
)

64 bit version of fid_suffixarray_dump_intervals().

Dump array of suffix-intervals as returned by fid_suffixarray_get_intervals().

Each printed suffix-interval is preceded by its distinguishing character. Empty intervals are left out and appear in a summary below the list of dumped intervals.

Parameters:
esa The enhanced suffix array the intervals belong to.
intervals An array of suffix-intervals encoded by integers.
stream An output stream to which the suffix-intervals are printed. If NULL, nothing will be printed.

void fid_suffixarray_dump_suffix_32 ( const fid_Suffixarray esa,
fid_Uint32  suffix,
fid_Uint32  length,
FILE *  stream 
)

32 bit version of fid_suffixarray_dump_suffix().

Dump prefix of a suffix to given stream.

This function will not print more than length characters, and will stop at sequence separators.

Parameters:
esa The enhanced suffix array containing the suffix to be dumped.
suffix The index of the suffix to be dumped.
length Maximum number of characters to be printed. Dump stops at end of the sequence.
stream An output stream to which the suffix is printed. If NULL, nothing will be printed.

void fid_suffixarray_dump_suffix_64 ( const fid_Suffixarray esa,
fid_Uint64  suffix,
fid_Uint64  length,
FILE *  stream 
)

64 bit version of fid_suffixarray_dump_suffix().

Dump prefix of a suffix to given stream.

This function will not print more than length characters, and will stop at sequence separators.

Parameters:
esa The enhanced suffix array containing the suffix to be dumped.
suffix The index of the suffix to be dumped.
length Maximum number of characters to be printed. Dump stops at end of the sequence.
stream An output stream to which the suffix is printed. If NULL, nothing will be printed.

void fid_suffixarray_print_32 ( const fid_Suffixarray esa,
fid_Tablerequest  request,
FILE *  stream 
)

32 bit version of fid_suffixarray_print().

Print suffix array tables to given stream.

Suffixes are not printed to full length since this would not be practical even for tiny suffix arrays. Instead, only the first few characters corresponding to a suffix' LCP-value are printed. Thus, when printing the suffixes, the LCP-table is usually also of interest.

Parameters:
esa The enhanced suffix array to be printed.
request Which tables to print. Note that suffixes themselves are printed by adding fid_TABLE_TIS to the request, in which case the LCP-table must be mapped, too (even if it is not to be printed).
stream An output stream to which the suffix is printed. If NULL, nothing will be printed.

void fid_suffixarray_print_64 ( const fid_Suffixarray esa,
fid_Tablerequest  request,
FILE *  stream 
)

64 bit version of fid_suffixarray_print().

Print suffix array tables to given stream.

Suffixes are not printed to full length since this would not be practical even for tiny suffix arrays. Instead, only the first few characters corresponding to a suffix' LCP-value are printed. Thus, when printing the suffixes, the LCP-table is usually also of interest.

Parameters:
esa The enhanced suffix array to be printed.
request Which tables to print. Note that suffixes themselves are printed by adding fid_TABLE_TIS to the request, in which case the LCP-table must be mapped, too (even if it is not to be printed).
stream An output stream to which the suffix is printed. If NULL, nothing will be printed.

fid_Uint32 fid_suffixinterval_lcpvalue_32 ( const fid_Suffixinterval_32 si,
const fid_Suffixarray esa 
)

32 bit version of fid_suffixinterval_lcpvalue().

Determine LCP value of suffix-interval.

This function computes the length of the longest common prefix of the suffixes represented by a given suffix-interval by sequence comparisons. Setting field fid_Suffixinterval::depth of si to the return value of this functions yields an lcp-interval. Note that longest common prefixes never include special symbols like wildcards or sequence separators since these symbols are not equal to others, not even to themselves.

Parameters:
si A suffix-interval.
esa The enhanced suffix array the suffix-interval refers to.
Returns:
The LCP value of si, or fid_SUFFIXINTERVAL_SINGLETON if si is a singleton.

fid_Uint64 fid_suffixinterval_lcpvalue_64 ( const fid_Suffixinterval_64 si,
const fid_Suffixarray esa 
)

64 bit version of fid_suffixinterval_lcpvalue().

Determine LCP value of suffix-interval.

This function computes the length of the longest common prefix of the suffixes represented by a given suffix-interval by sequence comparisons. Setting field fid_Suffixinterval::depth of si to the return value of this functions yields an lcp-interval. Note that longest common prefixes never include special symbols like wildcards or sequence separators since these symbols are not equal to others, not even to themselves.

Parameters:
si A suffix-interval.
esa The enhanced suffix array the suffix-interval refers to.
Returns:
The LCP value of si, or fid_SUFFIXINTERVAL_SINGLETON if si is a singleton.

void fid_suffixinterval_to_lcpinterval_32 ( fid_Suffixinterval_32 si,
const fid_Suffixarray esa 
)

32 bit version of fid_suffixinterval_to_lcpinterval().

Turn suffix-interval into lcp-interval.

This function determines the LCP value of the given suffix-interval and assigns that value to its fid_Suffixinterval::depth field.

Parameters:
si A suffix-interval.
esa The enhanced suffix array the suffix-interval refers to.

void fid_suffixinterval_to_lcpinterval_64 ( fid_Suffixinterval_64 si,
const fid_Suffixarray esa 
)

64 bit version of fid_suffixinterval_to_lcpinterval().

Turn suffix-interval into lcp-interval.

This function determines the LCP value of the given suffix-interval and assigns that value to its fid_Suffixinterval::depth field.

Parameters:
si A suffix-interval.
esa The enhanced suffix array the suffix-interval refers to.

void fid_suffixinterval_find_right_32 ( fid_Suffixinterval_32 si,
const fid_Suffixarray esa 
)

32 bit version of fid_suffixinterval_find_right().

Find right boundary of suffix-interval from left boundary and depth.

This function assumes that the left boundary and the depth of the passed suffix-interval are correctly set up. From these two values the right boundary is determined and assigned to fid_Suffixinterval::right of si.

Parameters:
si An incomplete suffix-interval.
esa The enhanced suffix array the suffix-interval refers to.
Bug:
This is a naive implementation and should be improved.

void fid_suffixinterval_find_right_64 ( fid_Suffixinterval_64 si,
const fid_Suffixarray esa 
)

64 bit version of fid_suffixinterval_find_right().

Find right boundary of suffix-interval from left boundary and depth.

This function assumes that the left boundary and the depth of the passed suffix-interval are correctly set up. From these two values the right boundary is determined and assigned to fid_Suffixinterval::right of si.

Parameters:
si An incomplete suffix-interval.
esa The enhanced suffix array the suffix-interval refers to.
Bug:
This is a naive implementation and should be improved.

fid_Uint32 fid_suffixinterval_homepos_32 ( const fid_Suffixinterval_32 si,
const fid_Suffixarray esa 
)

32 bit version of fid_suffixinterval_homepos().

Determine home-position of given lcp-interval.

Parameters:
si A suffix-interval; stricty spoken, home-positions are defined for lcp-intervals only, but since home-positions are derived just from interval boundaries, you may safely pass in suffix-intervals.
esa An enhanced suffix array.
Returns:
The home-position of lcp-interval si, i.e., either its left or right boundary.

fid_Uint64 fid_suffixinterval_homepos_64 ( const fid_Suffixinterval_64 si,
const fid_Suffixarray esa 
)

64 bit version of fid_suffixinterval_homepos().

Determine home-position of given lcp-interval.

Parameters:
si A suffix-interval; stricty spoken, home-positions are defined for lcp-intervals only, but since home-positions are derived just from interval boundaries, you may safely pass in suffix-intervals.
esa An enhanced suffix array.
Returns:
The home-position of lcp-interval si, i.e., either its left or right boundary.

void fid_suffixinterval_dump_32 ( const fid_Suffixinterval_32 si,
const fid_Suffixarray esa,
FILE *  stream 
)

32 bit version of fid_suffixinterval_dump().

Dump suffix-interval to given stream.

Parameters:
si The suffix-interval to be dumped.
esa The enhanced suffix array containing the suffix-interval.
stream An output stream to which the suffix-interval is printed. If NULL, nothing will be printed.

void fid_suffixinterval_dump_64 ( const fid_Suffixinterval_64 si,
const fid_Suffixarray esa,
FILE *  stream 
)

64 bit version of fid_suffixinterval_dump().

Dump suffix-interval to given stream.

Parameters:
si The suffix-interval to be dumped.
esa The enhanced suffix array containing the suffix-interval.
stream An output stream to which the suffix-interval is printed. If NULL, nothing will be printed.

char* fid_tablerequest_to_string ( fid_Tablerequest  request,
const char *  sep 
)

Construct string from table request.

Parameters:
request A table request.
sep Separator string that is to be inserted between table names.
Returns:
A newly allocated string containing the names of the tables requested in the given table request. The caller must free the memory when it is not needed anymore.
Note:
This function returns the NULL pointer in an out-of-memory condition.

Definition at line 499 of file suffixarray.c.

References fid_TABLE_TIS, fid_TABLES_ALL, fid_TABLES_OFFLINE, and fid_TABLES_ONLINE.


Generated on Wed Jul 8 17:21:16 2009 for Full-text Index Data structure library by  doxygen 1.5.9