Code Reading TinyPy dict.c
Start with the basic function.
int tp_lua_hash(void const *v,int l) {
int i,step = (l>>5)+1;
int h = l + (l >= 4?*(int*)v:0);
for (i=l; i>=step; i-=step) {
h = h^((h<<5)+(h>>2)+((unsigned char *)v)[i-1]);
}
return h;
}
Annotate each line “freshman style”
int tp_lua_hash(void const *v,int l) {
// declare i, step = l[ength] right shifted 5 + 1
int i,step = (l>>5)+1;
// if l[ength] >= 4: h[ash] = l[ength] + int value of v
// otherwise: h[ash] = l[ength]
int h = l + (l >= 4?*(int*)v:0);
// For end of v until first step of v, go step by step negatively
for (i=l; i>=step; i-=step) {
// h[ash] = previous hash XOR (hash shift left 5 + has shift right 2)
// + v as char[current step]
h = h^((h<<5)+(h>>2)+((unsigned char *)v)[i-1]);
}
// Return hash
return h;
}
Find instances of tp_lua_hash called in rest of the program
In dict.c function tp_hash(TP,tp_obj v)
case TP_NUMBER: return tp_lua_hash(&v.number.val,sizeof(tp_num));
case TP_STRING: return tp_lua_hash(v.string.val,v.string.len);
case TP_DICT: return tp_lua_hash(&v.dict.val,sizeof(void*));
// (...)
// Reformmated to allow for easier reading
tp_obj vv = v.list.val->items[n];
r += vv.type != TP_LIST ? tp_hash(tp,v.list.val->items[n])
: tp_lua_hash(&vv.list.val,sizeof(void*));
Aha! This implies that the parameter v is a void pointer to any TinyPy data type and that
l is the length of said data type in BYTES.
Let’s update our code snippet.
/* Hash an arbitrary TinyPy object.
* v is a void pointer that points to the TinyPy object to be hashed.
* l is the length/size of the object pointed to by the void pointer.
*/
int tp_lua_hash(void const *v,int l) {
// Declare step to be 1 or 1/32nd the length, whichever is larger.
// declare i, step = l[ength] right shifted 5 + 1
int i,step = (l>>5)+1;
// If the value is more than 4 bytes long (32 bits) add that to its length
// as the initial value for the hash.
// Otherwise start the hash as the length.
// if l[ength] >= 4: h[ash] = l[ength] + int value of v
// otherwise: h[ash] = l[ength]
int h = l + (l >= 4?*(int*)v:0);
// Take into account the hash values of each “step” of the value of v.
// For end of v until first step of v, go step by step negatively
for (i=l; i>=step; i-=step) {
// h[ash] = previous hash XOR (hash shift left 5 + has shift right 2)
// + v as char[current step]
h = h^((h<<5)+(h>>2)+((unsigned char *)v)[i-1]);
}
// Return hash
return h;
}
And I think we’re done. Didn’t have to go as deep as the examples in the
Structured Programming book I have, and I haven’t proven anything about how
good the hash is, but hey, now I understand what it’s doing.
There are a few additional simplifications I can make to the markup, using max() instead of “whichever is larger” and other mathematical notations, I’ll poke in the book sometime to see what they are.