blob: d1ac70a93cc16497abe3a7c053bf98c33cd67754 [file] [log] [blame]
/*
------------------------------------------------------------------------------
perfect.h: code to generate code for a hash for perfect hashing.
(c) Bob Jenkins, September 1996
You may use this code in any way you wish, and it is free. No warranty.
I hereby place this in the public domain.
Source is http://burtleburtle.net/bob/c/perfect.h
------------------------------------------------------------------------------
*/
#ifndef STANDARD
#include "standard.h"
#endif
#ifndef PERFECT
#define PERFECT
#define MAXKEYLEN 256 /* maximum length of a key */
#define USE_SCRAMBLE 4096 /* use scramble if blen >= USE_SCRAMBLE */
#define SCRAMBLE_LEN ((uint32_t)1<<16) /* length of *scramble* */
#define RETRY_INITKEY 2048 /* number of times to try to find distinct (a,b) */
#define RETRY_PERFECT 1 /* number of times to try to make a perfect hash */
#define RETRY_HEX 200 /* RETRY_PERFECT when hex keys given */
/* the generated code for the final hash, assumes initial hash is done */
struct gencode
{
char **line; /* array of text lines, 80 bytes apiece */
/*
* The code placed here must declare "uint32_t rsl"
* and assign it the value of the perfect hash using the function inputs.
* Later code will be tacked on which returns rsl or manipulates it according
* to the user directives.
*
* This code is at the top of the routine; it may and must declare any
* local variables it needs.
*
* Each way of filling in **line should be given a comment that is a unique
* tag. A testcase named with that tag should also be found which tests
* the generated code.
*/
uint32_t len; /* number of lines available for final hash */
uint32_t used; /* number of lines used by final hash */
uint32_t lowbit; /* for HEX, lowest interesting bit */
uint32_t highbit; /* for HEX, highest interesting bit */
uint32_t diffbits; /* bits which differ for some key */
uint32_t i,j,k,l,m,n,o; /* state machine used in hexn() */
};
typedef struct gencode gencode;
/* user directives: perfect hash? minimal perfect hash? input is an int? */
struct hashform
{
enum {
NORMAL_HM, /* key is a string */
INLINE_HM, /* user will do initial hash, we must choose salt for them */
HEX_HM, /* key to be hashed is a hexidecimal 4-byte integer */
DECIMAL_HM, /* key to be hashed is a decimal 4-byte integer */
AB_HM, /* key to be hashed is "A B", where A and B are (A,B) in hex */
ABDEC_HM /* like AB_HM, but in decimal */
} mode;
enum {
STRING_HT, /* key is a string */
INT_HT, /* key is an integer */
AB_HT /* dunno what key is, but input is distinct (A,B) pair */
} hashtype;
enum {
NORMAL_HP, /* just find a perfect hash */
MINIMAL_HP /* find a minimal perfect hash */
} perfect;
enum {
FAST_HS, /* fast mode */
SLOW_HS /* slow mode */
} speed;
};
typedef struct hashform hashform;
/* representation of a key */
struct key
{
uint8_t *name_k; /* the actual key */
uint32_t len_k; /* the length of the actual key */
uint32_t hash_k; /* the initial hash value for this key */
struct key *next_k; /* next key */
/* beyond this point is mapping-dependent */
uint32_t a_k; /* a, of the key maps to (a,b) */
uint32_t b_k; /* b, of the key maps to (a,b) */
struct key *nextb_k; /* next key with this b */
};
typedef struct key key;
/* things indexed by b of original (a,b) pair */
struct bstuff
{
uint16_t val_b; /* hash=a^tabb[b].val_b */
key *list_b; /* tabb[i].list_b is list of keys with b==i */
uint32_t listlen_b; /* length of list_b */
uint32_t water_b; /* high watermark of who has visited this map node */
};
typedef struct bstuff bstuff;
/* things indexed by final hash value */
struct hstuff
{
key *key_h; /* tabh[i].key_h is the key with a hash of i */
};
typedef struct hstuff hstuff;
/* things indexed by queue position */
struct qstuff
{
bstuff *b_q; /* b that currently occupies this hash */
uint32_t parent_q; /* queue position of parent that could use this hash */
uint16_t newval_q; /* what to change parent tab[b] to to use this hash */
uint16_t oldval_q; /* original value of tab[b] */
};
typedef struct qstuff qstuff;
/* return ceiling(log based 2 of x) */
uint32_t ilog2(uint32_t val);
/* Given the keys, scramble[], and hash mode, find the perfect hash */
void findhash(bstuff **tabb, uint32_t *alen, uint32_t *blen, uint32_t *salt, gencode *final, uint32_t *scramble, uint32_t *smax, key *keys, uint32_t nkeys, hashform *form);
/* private, but in a different file because it's excessively verbose */
int inithex(key *keys, uint32_t nkeys, uint32_t alen, uint32_t blen, uint32_t smax, uint32_t salt, gencode *final, hashform *form);
#endif /* PERFECT */