root/trunk/qd/python_hash.d

Revision 778, 9.5 kB (checked in by FeepingCreature, 2 years ago)

Small fixes

Line 
1 module python_hash;
2
3 import std.random, tools.base;
4 static import std.gc, std.c.stdlib;
5 bool dontScan;
6 void* my_calloc(size_t length) {
7   auto res = (cast(ubyte*) std.c.stdlib.malloc(length))[0..length];
8   res[] = 0;
9   if (!dontScan) std.gc.addRange(res.ptr, res.ptr + length);
10   return res.ptr;
11 }
12
13 void my_free(void* p) {
14   if (!dontScan) std.gc.removeRange(p);
15   std.c.stdlib.free(p);
16 }
17
18 pragma(msg, "This class was modified for hashing purposes and may drop entries!");
19
20 import tools.log;
21
22 // models a write-only cache.
23 template PyDict(K, V) {
24 final class PyDict {
25   private:
26   //ulong, double etc. on 32bit
27   struct BigPODWrapper(T)
28   {
29     union {
30       T data;
31       uint[(T.sizeof - 1) / 4 + 1] field;
32     }
33     size_t hash;
34    
35     void ctor()
36     {
37       static assert(T.sizeof >= size_t.sizeof);
38      
39       static if (is(typeof(data._0): void*) && is(typeof(data._1): void*) && is(typeof(data._2): void*) && is(typeof(data._3): void*)) {
40         // presume 16-byte alignment
41         hash = ((cast(size_t) data._0)+
42                 (cast(size_t) data._1)+
43                 (cast(size_t) data._2)+
44                 (cast(size_t) data._3)) >> 4;
45       } else hash = typeid(T).getHash(&data);
46       //will work for ulong with additional hash
47       //hash = *cast(size_t*) &data + (cast(size_t*) &data)[1];
48       //avoid special hashes
49       if(isSpecialKey(*this)) hash += 2;
50     }
51    
52     void markDummy() { hash = dummy_hash; }
53
54     alias typeof(*this) TT;
55     // static bool cmp1(TT a, TT b) { return a.data == b.data; }
56     // static bool cmp2(TT a, TT b) { return false; }
57     // static bool cmp3(TT a, TT b) { return a.data == b.data; }
58     static bool cmp1(TT a, TT b) { for (int i = 0; i < field.length; ++i) if (a.field[i] != b.field[i]) return false; return true; }
59     static bool cmp2(TT a, TT b) { return false; }
60     static bool cmp3(TT a, TT b) { for (int i = 0; i < field.length; ++i) if (a.field[i] != b.field[i]) return false; return true; }
61   }
62  
63   //byte, uint etc. on 32bit
64   struct SmallPODWrapper(T)
65   {
66     union {
67       T data;
68       size_t hash;
69     }
70
71     void ctor() { static assert(T.sizeof <= size_t.sizeof); }
72
73     void markDummy() { this.hash = dummy_hash; }
74
75     alias typeof(*this) TT;
76     static bool cmp1(TT a, TT b) { return a.data == b.data; }
77     static bool cmp2(TT a, TT b) { return false; }
78     static bool cmp3(TT a, TT b) { return a.data == b.data; }
79   }
80  
81   template SelectKeyWrapper(K)
82   {
83     static if (isArray!(K)) static assert(false);
84     else static if (isRefType!(K)) static assert(false);
85     else static if (K.sizeof <= size_t.sizeof) alias SmallPODWrapper!(K) type;
86     else static if (K.sizeof > size_t.sizeof) alias BigPODWrapper!(K) type;
87     pragma(msg, "Selected ", type.stringof, " for ", K.stringof);
88   }
89
90   //key wrapper type
91   alias SelectKeyWrapper!(K).type KW;
92
93   //need to be 0 for the algorithm to terminate
94   static const size_t unused_hash = 0;
95   static const size_t dummy_hash = 1;
96
97   //need to be a power of two
98   static const size_t MINSIZE = 8;
99   static const size_t PERTURB_SHIFT = 5;
100
101   struct Entry
102   {
103     KW key;
104     V value;
105   }
106
107   //active + dummy entries
108   size_t fill = 0;
109
110   //active entries
111   size_t used = 0;
112
113   /*
114   * The table contains mask + 1 slots, and that's a power of 2.
115   * We store the mask instead of the size because the mask
116   * is more frequently needed.
117   */
118   size_t mask = MINSIZE - 1;
119
120   //table of size 2**n
121   Entry* table = void;
122
123   /*
124   * Since this.table can't hold entries for both special keys,
125   * they have to be stored and handled separately.
126   */
127   bool is_unused = false;
128   KW unused_key = KW.init;
129   V unused_value = V.init;
130
131   bool is_dummy = false;
132   KW dummy_key = KW.init;
133   V dummy_value = V.init;
134
135   public this()
136   {
137     // this.table = cast(Entry*) GC.calloc(Entry.sizeof * MINSIZE);
138     // this.table = cast(Entry*) std.gc.malloc(Entry.sizeof * MINSIZE);
139     this.table = cast(Entry*) my_calloc(Entry.sizeof * MINSIZE);
140     /*const SIZE=1024*1024*4 / Entry.sizeof;
141     dictresize(SIZE);*/
142   }
143
144   ~this()
145   {
146           delete this.table;
147   }
148
149   /*
150   * Any key that is not special is active.
151   */
152   private static bool isActiveKey(KW key)
153   {
154           return (key.hash > 1);
155   }
156
157   private static bool isDummyKey(KW key)
158   {
159           return (key.hash == dummy_hash);
160   }
161
162   private static bool isUnusedKey(KW key)
163   {
164           return (key.hash == unused_hash);
165   }
166
167   private static bool isSpecialKey(KW key)
168   {
169           return (key.hash < 2);
170   }
171
172   /*
173   * Lookup an entry in the table.
174   * This is the workhorse.
175   */
176   private Entry* lookdict(KW key)
177   {
178     assert(!isSpecialKey(key));
179     auto hash = key.hash, i = hash, ep = &table[i & mask];
180    
181     /*
182     * This first lookup will succeed in the very most cases
183     */
184     if (isUnusedKey(ep.key) || KW.cmp1(ep.key, key)) return ep;
185    
186     Entry* freeslot = void;
187     if (isDummyKey(ep.key)) freeslot = ep;
188     else {
189       if (KW.cmp2(ep.key, key)) return ep;
190       freeslot = null;
191     }
192    
193     /*
194     * In the loop, key == dummy is by far (factor of 100s) the
195     * least likely outcome, so test for that last.
196     */
197     for (size_t perturb = hash; ; perturb >>= PERTURB_SHIFT)
198     {
199       i = (i << 2) + i + perturb + 1;
200       ep = &table[i & mask];
201      
202       if (isUnusedKey(ep.key)) return freeslot ? freeslot : ep;
203       if (KW.cmp3(ep.key, key)) return ep;
204       if (!freeslot && isDummyKey(ep.key)) freeslot = ep;
205     }
206     assert(0);      //never reached
207   }
208  
209   public V* opIn_r(K k)
210   {
211     //wrap
212     auto key = KW(k);
213     key.ctor();
214    
215     if (isSpecialKey(key))
216     {
217       if (isUnusedKey(key))
218       {
219         return is_unused ? &unused_value : null;
220       }
221       else //must be dummy
222       {
223         assert(isDummyKey(key));
224         return is_dummy ? &dummy_value : null;
225       }
226       assert(0);
227     }
228    
229     Entry* ep = lookdict(key);
230     assert(ep);
231    
232     if (isActiveKey(ep.key)) return &ep.value;
233     else return null;
234   }
235  
236   public void opIndexAssign(V value, K k)
237   {
238     assert(this.fill <= this.mask);  //algorithm need at least one empty slot
239    
240     //wrap
241     auto key = KW(k);
242     key.ctor();
243    
244     if (isSpecialKey(key))
245     {
246       if (isUnusedKey(key))
247       {
248         is_unused = true;
249         unused_key = key;
250         unused_value = value;
251         return;
252       }
253       else //must be dummy
254       {
255         assert(isDummyKey(key));
256         is_dummy = true;
257         dummy_key = key;
258         dummy_value = value;
259         return;
260       }
261     }
262    
263     Entry* ep = lookdict(key);
264     assert(ep);
265
266     if (isActiveKey(ep.key)) ep.value = value; // this may overwrite, but so what
267     else {
268       if (isUnusedKey(ep.key)) this.fill++;
269       else assert(isDummyKey(ep.key));
270       ep.key = key;
271       ep.value = value;
272       this.used++;
273       checkLoad();
274     }
275   }
276
277   /*
278   * Check load factor and allocate new table
279   */
280   private void checkLoad()
281   {
282     //Make table bigger if load factor > 3/4.
283     //This can also result in smaller table if there are many dummy entries)
284     // if (this.fill * 4 >= (this.mask + 1) * 3) //load factor is 3/4
285     if (this.fill * 8 >= (this.mask + 1) * 7) //load factor is 7/8
286     {
287       const limit = 64*1024*1024;
288       // dictresize(2 * this.used);
289       if (this.used * Entry.sizeof < limit) dictresize(2 * this.used);
290       else {
291         logln("PURGE THE UNCLEAN");
292         dictresize(limit / Entry.sizeof, false); // drop dict. This can only work because we use it as a cache.
293       }
294     }
295     /*
296     //make table smaller, table size > MINSIZE and load factor is < 1/8
297     else if ((this.mask + 1) > MINSIZE && this.fill * 4 < (this.mask + 1))
298     {
299             dictresize(this.used / (this.used > 50000 ? 4 : 2));
300     }*/
301   }
302  
303   private void dictresize(size_t minused, bool copy = true)
304   {
305     // Find the smallest table size > minused and size == 2**n.
306     size_t newsize = MINSIZE;
307     while(newsize <= minused) newsize <<= 1;
308     // Get space for a new table.
309     Entry* oldtable = this.table;
310     assert(oldtable !is null);
311    
312     logln("Resize to ", newsize, " * ", Entry.sizeof, " -> ", Entry.sizeof * newsize);
313     auto newtable = oldtable;
314     if (newsize != used || copy) newtable = cast(Entry*) my_calloc(Entry.sizeof * newsize);
315     else (cast(int[])newtable[0..used])[] = 0;
316    
317     assert(newtable);
318     // assert(newtable != oldtable);
319    
320     this.table = newtable;
321     this.mask = newsize - 1;
322    
323     this.used = 0;
324     size_t i = this.fill;
325     this.fill = 0;
326    
327     //copy the data over; filter out dummies
328     if (copy) for (Entry* ep = oldtable; i > 0; ep++)
329     {
330       if (isActiveKey(ep.key))
331       {
332         --i;
333         insertdict_clean(ep.key, ep.value);
334       }
335       else if (isDummyKey(ep.key))
336       {
337         --i;
338       }
339     }
340     // delete oldtable;
341     if (oldtable != newtable) my_free(oldtable);
342   }
343
344   /*
345   * Insert an item which is known to be absent from the dict.
346   * This routine also assumes that the dict contains no deleted entries.
347   */
348   private void insertdict_clean(KW key, V value)
349   {
350           assert(!isSpecialKey(key));
351
352           size_t hash = key.hash;
353           size_t perturb = void;
354           size_t mask = this.mask;
355           Entry *ep0 = this.table;
356
357           size_t i = hash & mask;
358           Entry* ep = &ep0[i];
359
360           for (perturb = hash; !isUnusedKey(ep.key); perturb >>= PERTURB_SHIFT)
361           {
362                   i = (i << 2) + i + perturb + 1;
363                   ep = &ep0[i & mask];
364           }
365
366           this.fill++;
367           ep.key = key;
368           ep.value = value;
369           this.used++;
370   }
371
372   alias Unstatic!(K) K_;
373   alias Unstatic!(V) V_;
374   /*
375   * Get number of active entries stored.
376   */
377   public size_t size() { return used + is_dummy + is_unused; }
378 }
379 }
Note: See TracBrowser for help on using the browser.