Dbh Reference Manual

Disk Based HashTables (1.0.10)

Name

Disk Based HashTables -- associations between keys and values so that given a key the value can be found quickly, being disk based allows for large and persistent hashes.

Synopsis

#include <dbh.h>


struct       DBHashTable;
void         (*DBHashFunc)                  (struct DBHashTable * hash);
DBHashTable* DBH_create                     (char * path,
                                             unsigned char key_length);
DBHashTable* DBH_open                       (char * path);
DBHashTable* DBH_openR                      (char * path);
int          DBH_close                      (DBHashTable *hash_table);
DBHashTable* DBH_destroy                    (DBHashTable* hash_table);
FILEPOINTER  DBH_update                     (DBHashTable *hash_table);
FILEPOINTER  DBH_load                       (DBHashTable *hash_table);
FILEPOINTER  DBH_load_address               (DBHashTable *hash_table);
FILEPOINTER  DBH_load_parent                (DBHashTable *hash_table);
FILEPOINTER  DBH_load_child                 (DBHashTable *hash_table, unsigned char key_index);
FILEPOINTER  DBH_find                       (DBHashTable *hash_table,
                                             int ignored_digits);
int          DBH_erase                       (DBHashTable *hash_table);
int          DBH_unerase                       (DBHashTable *hash_table);
int          DBH_prune                       (DBHashTable *hash_table,
                                             unsigned char *key,
                                             int subtree_length);
int          DBH_unprune                       (DBHashTable *hash_table,
                                             unsigned char *key,
                                             int subtree_length);

int          DBH_foreach_sweep              (DBHashTable *hash_table,
                                             *DBHashFunc) func);

int          DBH_foreach_fanout             (DBHashTable *hash_table,
                                             *DBHashFunc) func);

int          DBH_sweep                     (DBHashTable *hash_table,
                                             *DBHashFunc) func,
                                             unsigned char key1,
                                             unsigned char key2,
                                             unsigned char ignore_portion);
int          DBH__fanout                    (DBHashTable *hash_table,
                                             *DBHashFunc) func,
                                             unsigned char key1,
                                             unsigned char key2,
                                             unsigned char ignore_portion);

void         DBH_orderkey                   (unsigned char *key,
                                             unsigned char length,
                                             unsigned int n, 
                                             unsigned char base);

void         DBH_genkey                     (unsigned char *key,
                                             unsigned char length,
                                             unsigned int n);

void         DBH_genkey2                    (unsigned char *key,
                                             unsigned char length,
                                             unsigned int n);
DBHashTable* DBH_regen                      (DBHashTable* hash_table);

int          DBH_Size                       (DBHashTable *hash_table,
                                             int new_max_size);

int          DBH_writeheader                (DBHashTable *hash_table);

void         DBH_exitsweep                  (DBHashTable *hash_table);

void         DBH_set_recordsize             (DBHashTable *hash_table,
                                             int recordsize);

void         DBH_set_key                    (DBHashTable *hash_table,
                                             char *key);
void         DBH_set_data                   (DBHashTable *hash_table,
                                             void * data,
                                             int size);

unsigned char DBH_KEYLENGTH                  (DBHashTable *hash_table);

int          DBH_RECORD_SIZE                (DBHashTable *hash_table);

void *       DBH_PATH                       (DBHashTable *hash_table);

void *       DBH_DATA                       (DBHashTable *hash_table);

unsigned char * DBH_KEY                     (DBHashTable *hash_table);


FILEPOINTER  DBH_ERASED_SPACE               (DBHashTable *hash_table);

FILEPOINTER  DBH_DATA_SPACE                 (DBHashTable *hash_table);

FILEPOINTER  DBH_TOTAL_SPACE                (DBHashTable *hash_table);

FILEPOINTER  DBH_FORMAT_SPACE               (DBHashTable *hash_table);

FILEPOINTER  DBH_RECORDS                    (DBHashTable *hash_table);

FILEPOINTER  DBH_MAXIMUM_RECORD_SIZE        (DBHashTable *hash_table);

Description

A DBHashTable provides associations between keys and values which is optimized so that given a key, the associated value can be found very quickly.

Note that only one hash record is loaded from disk to memory at any given moment for a DBHashTable. Both keys and values should be copied into the DBHashTable record, so they need not exist for the lifetime of the DBHashTable. This means that the use of static strings and temporary strings (i.e. those created in buffers and those returned by GTK+ widgets) should be copied with DBH_set_key and DBH_set_data ()into the DBHashTable record before being inserted.

You must be careful to ensure that copied key length matches the defined key length of the DBHashTable, and also that the copied data does not exceed the maximum length of the DBHashTable record (1024 bytes by default, and expandable by DBH_Size). If the DBHashTable record length is to be variable, be sure to set the appropriate length before each DBH_update(), with DBH_set_recordsize(), otherwise the record length need only be set before the first DBH_update().

To create a DBHashTable, use DBH_create().

To insert a key and value into a DBHashTable, use DBH_update(). The DBHashTable will not be modified until this command is given. All changes to the current DBHashTable record only reside in memory. DBH_update is necesary to commit the changes to the DBHashTable.

To lookup a value corresponding to a given key, use DBH_load().

To erase and unerase a key and value, use DBH_erase() and DBH_unerase().

To call a function for each key and value pair (using a sweep route) use DBH_foreach_sweep() and DBH_sweep().

To call a function for each key and value pair (using a fanout route) use DBH_foreach_fanout() and DBH_foreach_fanout().

To destroy a DBHashTable use DBH_destroy().


Details

typedef FILEPOINTER

typedef FILEPOINTER

Either a 32 bit or 64 bit integer.

struct DBHashTable

struct DBHashTable;

The DBHashTable struct is an opaque data structure to represent a Disk Based Hash Table. It should only be accessed via the following functions.


DBH_create ()

DBHashTable* DBH_create                     (char * path,
                                             unsigned char key_length);

Creates a new DBHashTable.

path :

path on disk where DBHashTable will reside.

key_length :

The length of the key to access the DBHashTable. This is fixed length. If you want variable length, use a g_hash_table to associate cuantified keys generated by genkey(), and create an extra DBHashTable to save the g_hash. Cuantified keys assure that large DBHashes are spread out evenly.

Returns :

A pointer to the newly created and opened DBHashTable, or NULL if it fails.


DBH_open()



DBHashTable* DBH_open                       (char * path);

Open a existing DBHashTable for read/write operations.

path :

path to file containing DBHashTable

Returns :

pointer to DBHashTable or NULL if it fails.


DBH_openR()

DBHashTable* DBH_openR                      (char * path);

Open a existing DBHashTable for read-only operations.

path :

path to file containing DBHashTable

Returns :

pointer to DBHashTable or NULL if it fails.


DBH_close()



int          DBH_close                      (DBHashTable *hash_table);

Close a previously opened DBHashTable

hash_table :

A pointer to a open DBHashTable.

Returns :

TRUE if successful, FALSE if fails.


DBH_destroy()



DBHashTable* DBH_destroy                    (DBHashTable* hash_table);

Close an open DBHashTable and erase file from disk.

hash_table :

A pointer to a open DBHashTable.

Returns :

NULL (fixme: should be a TRUE/FALSE routine)


DBH_update()



FILEPOINTER  DBH_update                     (DBHashTable *hash_table);

Insert or replace the current hash record into the DBHashTable.

hash_table :

A pointer to a open DBHashTable.

Returns :

Current hash record FILEPOINTER or FALSE if it fails


DBH_load()

FILEPOINTER  DBH_load                       (DBHashTable *hash_table);

Load the record with the key that matches the one in the current hash record.

hash_table :

A pointer to a open DBHashTable.

Returns :

Current hash record FILEPOINTER or FALSE if it fails


DBH_load_address()

FILEPOINTER  DBH_load_address               (DBHashTable *hash_table);

Load the record directly by means of the FILEPOINTER into the current hash record.

hash_table :

A pointer to a open DBHashTable.

Returns :

Current hash record number of branches or FALSE if it fails

DBH_load_parent() (1.0.10)

FILEPOINTER  DBH_load_parent                 (DBHashTable *hash_table);

Load the record which is the parent of the in the current hash record.

hash_table :

A pointer to a open DBHashTable.

Returns :

Current hash record number of branches or FALSE if it fails


DBH_load_child() (1.0.10)

FILEPOINTER  DBH_load_child                  (DBHashTable *hash_table, unsigned char key_index);

Load the record which is the child on the key_index branch of the current hash record.

This is a convenience function. If you are doing a repetitive operation on a set of linked records it is much faster and better to use DBH_sweep().

hash_table :

A pointer to a open DBHashTable.

Returns :

Current hash record number of branches or FALSE if it fails




DBH_find()

FILEPOINTER  DBH_find                       (DBHashTable *hash_table,
                                             int n_digits);

Find the top level subtree node for the key in the current hash record, but ignoring the last n_digits.

hash_table :

A pointer to a open DBHashTable.

n_digits :

number of trailing bytes to ignore on the key of the current hash record

Returns :

FILEPOINTER for top level node of subtree


DBH_unerase()

FILEPOINTER  DBH_unerase                       (DBHashTable *hash_table);

Mark the current DBHashTable record as *not* erased

hash_table :

A pointer to a open DBHashTable.

Returns :

TRUE if successfull. FALSE otherwise.


DBH_erase()

FILEPOINTER  DBH_erase                       (DBHashTable *hash_table);

Mark the current DBHashTable record as erased

hash_table :

A pointer to a open DBHashTable.

Returns :

TRUE if successfull. FALSE otherwise.


DBH_prune()

FILEPOINTER  DBH_prune                       (DBHashTable *hash_table,
                                             unsigned char *key,
                                             int subtree_length);

Mark the entire subtree defined by key and subtree_length as erased

hash_table :

A pointer to a open DBHashTable.

key :

key to top of subtree

subtree_length :

number of trailing bytes to ignore on the key to define the subtree

Returns :

TRUE if successfull, FALSE otherwise.


DBH_unprune()

FILEPOINTER  DBH_unprune                       (DBHashTable *hash_table,
                                             unsigned char *key,
                                             int subtree_length);

Mark the entire subtree defined by key and subtree_length as *not* erased

hash_table :

A pointer to a open DBHashTable.

key :

key to top of subtree

subtree_length :

number of trailing bytes to ignore on the key to define the subtree

Returns :

TRUE if successfull, FALSE otherwise.


DBH_foreach_sweep()

int          DBH_foreach_sweep              (DBHashTable *hash_table,
                                             *DBHashFunc) func);

Apply a function to each member of the hash, following a sweep trajectory (vertically through branches).

hash_table :

A pointer to a open DBHashTable.

func :

The function to apply to each selected member of the hash.

Returns :

TRUE on success, FALSE if it fails


DBH_foreach_fanout()

int          DBH_foreach_fanout             (DBHashTable *hash_table,
                                             *DBHashFunc) func);

Apply a function to each member of the hash, following a fanout trajectory (horizontally through leafs).

hash_table :

A pointer to a open DBHashTable.

func :

The function to apply to each selected member of the hash.

Returns :

TRUE on success, FALSE if it fails


DBH_sweep()

int          DBH_sweep                     (DBHashTable *hash_table,
                                             *DBHashFunc) func,
                                             unsigned char key1,
                                             unsigned char key2,
                                             unsigned char ignore_portion);

Apply a function to subtree members of the hash, following a sweep trajectory (vertically through branches).

hash_table :

A pointer to a open DBHashTable.

func :

The function to apply to each selected member of the hash.

key1 :

The key from which to start the sweep. (Make sure it is a top level node of a subtree. This is done by using DBH_find() previously), or NULL if don't care.

key2 :

The key which will trigger an exit condition from the sweep, or NULL if don't care.

ignore_portion :

The ignored trailing bytes of key1 which will define the magnitud of the subtree to be sweeped, or zero if don't care.

Returns :

TRUE on success, FALSE if it fails


DBH_fanout()

int          DBH__fanout                    (DBHashTable *hash_table,
                                             *DBHashFunc) func,
                                             unsigned char key1,
                                             unsigned char key2,
                                             unsigned char ignore_portion);

Apply a function to subtree members of the hash, following a fanout trajectory (horizontally through leafs). s

hash_table :

A pointer to a open DBHashTable.

func :

The function to apply to each selected member of the hash.

key1 :

The key from which to start the sweep. (Make sure it is a top level node of a subtree. This is done by using DBH_find() previously), or NULL if don't care.

key2 :

The key which will trigger an exit condition from the sweep, or NULL if don't care.

ignore_portion :

The ignored trailing bytes of key1 which will define the magnitud of the subtree to be sweeped, or zero if don't care.

Returns :

TRUE on success, FALSE if it fails


DBH_orderkey()

void         DBH_orderkey                   (unsigned char *key,
                                             unsigned char length,
                                             unsigned int n, 
                                             unsigned char base);

Obtain a key from a secuential series of natural numbers (positive integers without zero) which conserves the order of the natural numbers.

key :

The address where to put the generated key.

length :

The key length

n :

The natural number from which to generate the key

base :

The number system base to use. This will equal the maximum number of nodes per branch. This ---along with the keylength--- will also define a maximum number of records for the DBHashTable

Returns :

an unsigned caracter array based on 'A'


DBH_genkey()




void         DBH_genkey                     (unsigned char *key,
                                             unsigned char length,
                                             unsigned int n);

Obtain a key from a secuential series of natural numbers (positive integers without zero) which does not conserve the order of the natural numbers, but which are optimized for construction of a balanced hash tree. These keys are expressed in cuantified numbers based on the 0 digit.

key :

The address where to put the generated key.

length :

The key length

n :

The natural number from which to generate the key

Returns :

an unsigned caracter array based on '0'


DBH_genkey2()

void         DBH_genkey2                    (unsigned char *key,
                                             unsigned char length,
                                             unsigned int n);

Obtain a key from a secuential series of natural numbers (positive integers without zero) which does not conserve the order of the natural numbers, but which are optimized for construction of a balanced hash tree. These keys are expressed in cuantified numbers based on the A symbol.

key :

The address where to put the generated key.

length :

The key length

n :

The natural number from which to generate the key

Returns :

an unsigned caracter array based on 'A'


DBH_regen()

DBHashTable* DBH_regen                      (DBHashTable* hash_table);

Regenerate the DBHashTable, eliminating erased records and optimizing disk access and speed.

hash_table :

A pointer to a open DBHashTable.

Returns :

The new pointer to the DBHashTable.


DBH_Size()

int          DBH_Size                       (DBHashTable *hash_table,
                                             int new_max_size);

Define the maximum amount of memory to be allocated to the current DBHashTable data record. The default is 1024 bytes.

hash_table :

A pointer to a open DBHashTable.

new_max_size :

The new maximum memory allocation for the data of the DBHashTable record.

Returns :

TRUE if success, FALSE if fails (I guess)


DBH_writeheader()

int          DBH_writeheader                (DBHashTable *hash_table);

Write out the DBHashTable header information. It is advisable to call this function inmediately after creation of a new DBHashTable to force a buffer flush.

hash_table :

A pointer to a open DBHashTable.

Returns :

TRUE if success, FALSE if fails


DBH_exitsweep()

void         DBH_exitsweep                  (DBHashTable *hash_table);

Calling this function from within a DBHashFunc will cause an exit of the sweep.

hash_table :

A pointer to a open DBHashTable.

Returns :

void


DBH_set_recordsize()

void         DBH_set_recordsize             (DBHashTable *hash_table,
                                             int recordsize);

This sets the recordsize of the the data in the current DBHashTable record. It is called implicitly by calling DBH_set_data().

hash_table :

A pointer to a open DBHashTable.

new_max_recordsize :

The amount of valid bytes in the current DBHashTable record.

Returns :

void


DBH_set_key()

void         DBH_set_key                    (DBHashTable *hash_table,
                                             char *key);

This function sets the key of the current DBHashTable record.

hash_table :

A pointer to a open DBHashTable.

key :

The key to set as the current DBHashTable record key.

Returns :

void


DBH_set_data()

void         DBH_set_data                   (DBHashTable *hash_table,
                                             void * data,
                                             int size);

This function copies the user data into the current DBHashTable record and along with DBH_set_key, makes the current DBHashTable record ready for the DBH_update function to commit the record to the actual DBHashTable on disk.

hash_table :

A pointer to a open DBHashTable.

data :

Pointer to the data to copy to the current DBHashTable record.

size :

The amount of bytes to copy to the current DBHashTable record.

Returns :

void (FIXME: should return TRUE or FALSE)


DBH_KEYLENGTH()

unsigned char DBH_KEYLENGTH                  (DBHashTable *hash_table);

This macro returns the DBHashTable's defined keylength.

hash_table :

A pointer to a open DBHashTable.

Returns :

the DBHashTable's defined keylength


DBH_RECORD_SIZE()

int         DBH_RECORD_SIZE                (DBHashTable *hash_table);

This macro returns the current DBHashTable's data size.

hash_table :

A pointer to a open DBHashTable.

Returns :

the current DBHashTable's data size


DBH_PATH()

void *       DBH_PATH                       (DBHashTable *hash_table);

This macro returns a pointer to a string containing the path to the current DBHashTable.

hash_table :

A pointer to a open DBHashTable.

Returns :

a pointer to a string with the DBHashTable path.


DBH_DATA()

void *       DBH_DATA                       (DBHashTable *hash_table);

This macro returns a pointer to the current DBHashTable data area.

hash_table :

A pointer to a open DBHashTable.

Returns :

a pointer to the current DBHashTable data


DBH_KEY()

void *       DBH_KEY                       (DBHashTable *hash_table);

This macro returns a pointer to the current DBHashTable key area.

hash_table :

A pointer to a open DBHashTable.

Returns :

a pointer to the current DBHashTable key


DBH_ERASED_SPACE()

FILEPOINTER  DBH_ERASED_SPACE               (DBHashTable *hash_table);

This macro returns an approximation of the amount of bytes taken up by erased data in the DBHashTable.

hash_table :

A pointer to a open DBHashTable.

Returns :

the amount of bytes taken up by erased data


DBH_DATA_SPACE()

FILEPOINTER  DBH_DATA_SPACE                 (DBHashTable *hash_table);

This macro returns an approximation of the amount of bytes taken up by valid data in the DBHashTable.

hash_table :

A pointer to a open DBHashTable.

Returns :

the amount of bytes taken up by valid data


DBH_TOTAL_SPACE()

FILEPOINTER  DBH_TOTAL_SPACE                (DBHashTable *hash_table);

This macro returns an approximation of the total amount of bytes taken up by the DBHashTable.

hash_table :

A pointer to a open DBHashTable.

Returns :

the total amount of bytes taken up by the DBHashTable


DBH_FORMAT_SPACE()

FILEPOINTER  DBH_FORMAT_SPACE               (DBHashTable *hash_table);

This macro returns an approximation of the total amount of bytes taken up by the format of the DBHashTable.

hash_table :

A pointer to a open DBHashTable.

Returns :

the total amount of bytes taken up by the format of the DBHashTable


DBH_RECORDS()

FILEPOINTER  DBH_RECORDS                    (DBHashTable *hash_table);

This macro returns the number of records in the DBHashTable.

hash_table :

A pointer to a open DBHashTable.

Returns :

the number of records in the DBHashTable


DBH_MAXIMUM_RECORD_SIZE()

FILEPOINTER  DBH_MAXIMUM_RECORD_SIZE        (DBHashTable *hash_table);

This macro returns the maximum allocated space for data in the current DBHashTable record.

hash_table :

A pointer to a open DBHashTable.

Returns :

the maximum allocated space for data in the current DBHashTable record