[start moving to new C based atom format.
John Meacham <john@repetae.net>**20080210054515] adddir ./StringTable
addfile ./StringTable/StringTable_cbits.c
addfile ./StringTable/StringTable_cbits.h
addfile ./StringTable/Atom.hsc
hunk ./Atom.hs 18
-import Data.Generics
-import Data.Monoid
hunk ./Atom.hs 19
-import List(sort)
-import System.IO.Unsafe
-import Data.IORef
+import Control.Monad
hunk ./Atom.hs 21
-import qualified Data.HashTable as HT
-import qualified Data.IntMap as IM
+import PackedString(PackedString(..))
+import qualified Data.ByteString as BS
+import StringTable.Atom
hunk ./Atom.hs 25
-import PackedString
hunk ./Atom.hs 26
+toString :: Atom -> String
+toString = fromAtom
hunk ./Atom.hs 29
-instance Monoid Atom where
-    mempty = toAtom nilPS
-    mappend x y = toAtom $ appendPS (fromAtom x)  (fromAtom y)
-    mconcat xs = toAtom $ concatPS (map fromAtom xs)
-
-{-# NOINLINE table #-}
-table :: HT.HashTable PackedString Atom
-table = unsafePerformIO (HT.new (==) (fromIntegral . hashPS))
-
-{-# NOINLINE reverseTable #-}
-reverseTable :: IORef (IM.IntMap PackedString)
-reverseTable = unsafePerformIO (newIORef IM.empty)
-
-{-# NOINLINE intPtr #-}
-intPtr :: Ptr Int
-intPtr = unsafePerformIO (new 1)
-
-
-newtype Atom = Atom Int
-    deriving(Typeable, Data,Eq,Ord)
-
-instance Show Atom where
-    showsPrec _ atom = (toString atom ++)
-
-instance Read Atom where
-    readsPrec _ s = [ (fromString s,"") ]
-    --readsPrec p s = [ (fromString x,y) |  (x,y) <- readsPrec p s]
+fromString :: String -> Atom
+fromString string = toAtom string
hunk ./Atom.hs 32
-toString atom = unpackPS $ toPackedString atom
-atomIndex (Atom x) = x
-unsafeAtomToInt (Atom x) = x
hunk ./Atom.hs 33
-{- these are separate in case operations are one-way -}
-class ToAtom a where
-    toAtom :: a -> Atom
-class FromAtom a where
-    fromAtom :: Atom -> a
+atomIndex,unsafeAtomToInt :: Atom -> Int
+atomIndex = fromAtom
+unsafeAtomToInt = fromAtom
hunk ./Atom.hs 37
-instance ToAtom String where
-    toAtom = fromString
-instance FromAtom String where
-    fromAtom = toString
hunk ./Atom.hs 38
-    fromAtom x = showsPS (fromAtom x)
+    fromAtom x = shows (fromAtom x :: String)
hunk ./Atom.hs 41
-    toAtom x = unsafePerformIO $ fromPackedStringIO x
+    toAtomIO (PS x) = toAtomIO x
hunk ./Atom.hs 43
-    fromAtom = toPackedString
+    fromAtomIO atom = PS `liftM` fromAtomIO atom
hunk ./Atom.hs 45
-instance ToAtom Atom where
-    toAtom x = x
-instance FromAtom Atom where
-    fromAtom x = x
-
-instance ToAtom Char where
-    toAtom x = toAtom [x]
-
-
-fromString :: String -> Atom
-fromString xs = unsafePerformIO $ fromStringIO xs
hunk ./Atom.hs 47
-fromStringIO cs = fromPackedStringIO (packString cs)
+fromStringIO cs = toAtomIO cs
hunk ./Atom.hs 49
-{-# NOINLINE fromPackedStringIO #-}
hunk ./Atom.hs 50
-fromPackedStringIO ps = HT.lookup table ps >>= \x -> case x of
-    Just z -> return z
-    Nothing -> do
-        i <- peek intPtr
-        poke intPtr (i + 2)
-        let a = Atom i
-        HT.insert table ps a
-        modifyIORef reverseTable (IM.insert ((i - 1) `div` 2) ps)
-        return a
+fromPackedStringIO ps = toAtomIO ps
hunk ./Atom.hs 53
--- The following are 'unwise' in that they may reveal internal structure that may differ between program runs
-
hunk ./Atom.hs 54
-    x <- HT.toList table
-    mapM_ putStrLn [ show i ++ " " ++ show ps  | (ps,Atom i) <- sort x]
-
-
-{-# NOINLINE intToAtom #-}
-intToAtom :: Monad m => Int -> m Atom
-intToAtom i | odd i && i > 0 = unsafePerformIO $ readIORef (i `seq` reverseTable) >>= \x -> case IM.member ((i-1) `div` 2) x of
-    True -> return $ return $ Atom i
-    False -> return $ fail $ "intToAtom: " ++ show i
-intToAtom i = fail $ "intToAtom: " ++ show i
+    dumpTable
+    dumpStringTableStats
hunk ./Atom.hs 57
-unsafeIntToAtom :: Int -> Atom
-unsafeIntToAtom x = Atom x
hunk ./Atom.hs 58
-{-# NOINLINE toPackedString #-}
hunk ./Atom.hs 59
-toPackedString (Atom i) = unsafePerformIO $ readIORef (i `seq` reverseTable) >>= \x -> case IM.lookup ((i-1) `div` 2) x of
-    Just ps -> return ps
-    Nothing -> do
-        x' <- readIORef reverseTable
-        return $ error $ "toPackedString: " ++ show i ++ " " ++ (show (x,x'))
+toPackedString = fromAtom
hunk ./Atom.hs 62
-    get = do fmap (unsafePerformIO . fromPackedStringIO) get
-    put a = put (toPackedString a)
+    get = (toAtom :: BS.ByteString -> Atom) `fmap` get
+    put a = put (fromAtom a :: BS.ByteString)
hunk ./E/Binary.hs 24
-        unless (even i) $ fail "number not even"
hunk ./E/LambdaLift.hs 5
+import Data.Maybe
hunk ./E/LambdaLift.hs 279
-        globalName tvr | even (tvrIdent tvr) = do
+        globalName tvr | isNothing $ intToAtom (tvrIdent tvr) = do
hunk ./E/Program.hs 10
+import StringTable.Atom
hunk ./E/Program.hs 68
-programSetDs ds prog | flint && any even (map (tvrIdent . fst) ds) = error $ "programSetDs: trying to set non unique top level name: \n" ++ intercalate "\n"  (sort $ map (show . tvrShowName . fst) ds)
+programSetDs ds prog | flint && any (isNothing . intToAtom) (map (tvrIdent . fst) ds) = error $ "programSetDs: trying to set non unique top level name: \n" ++ intercalate "\n"  (sort $ map (show . tvrShowName . fst) ds)
hunk ./E/Show.hs 7
+import StringTable.Atom
hunk ./E/Show.hs 132
-allocTVr tvr (SEM action) | even (tvrIdent tvr) = do
+allocTVr tvr (SEM action) | isNothing $ intToAtom (tvrIdent tvr) = do
hunk ./E/Traverse.hs 17
+import Data.Maybe
hunk ./E/Traverse.hs 20
+import StringTable.Atom
hunk ./E/Traverse.hs 156
-        n' <- if n > 0 && (not ralways || odd n) then uniqueName  n else newName
+        n' <- if n > 0 && (not ralways || isJust (intToAtom n)) then uniqueName  n else newName
hunk ./Main.hs 8
-import Data.Monoid
hunk ./Main.hs 9
-import List hiding(group,union,delete)
hunk ./Main.hs 15
+import Util.Util
+import StringTable.Atom
hunk ./Main.hs 96
-main = runMain $ bracketHtml $ do
+catom action = Control.Exception.catch action (\e -> dumpTable >> dumpStringTableStats >> throw e)
+
+main = runMain $ catom $ bracketHtml $ do
hunk ./Main.hs 402
-        put $ fromList [ (tvrIdent v,(v,lc)) | (v,lc) <- programDs mprog] `union` smap
+        put $ fromList [ (tvrIdent v,(v,lc)) | (v,lc) <- programDs mprog] `S.union` smap
hunk ./Main.hs 565
-    prog <- return $ runIdentity $ annotateProgram mempty (\_ nfo -> return $ modifyProperties (flip (foldr delete) [prop_HASRULE,prop_WORKER]) nfo) letann (\_ -> return) prog
+    prog <- return $ runIdentity $ annotateProgram mempty (\_ nfo -> return $ modifyProperties (flip (foldr S.delete) [prop_HASRULE,prop_WORKER]) nfo) letann (\_ -> return) prog
hunk ./Main.hs 901
-    let f (tvr@TVr { tvrIdent = n },e) | even n = do
+    let f (tvr@TVr { tvrIdent = n },e) | isNothing $ intToAtom n = do
hunk ./Makefile.am 42
-         Util/VarName.hs Version/Version.hs Version/Config.hs
+         Util/VarName.hs Version/Version.hs Version/Config.hs \
+	 StringTable/Atom.hsc
hunk ./Makefile.am 51
-GHCOPTS=  $(HSOPTS)  -O -ignore-package lang  $(GHCDEBUGOPTS) $(GHCINC) $(PACKAGES) $(GHCLANG)
+GHCOPTS=  $(HSOPTS)  -O -ignore-package lang  $(GHCDEBUGOPTS) $(GHCINC) $(PACKAGES) $(GHCLANG) -optc-std=c99
hunk ./Makefile.am 55
-EXTRA_DIST = data utils arch docs FrontEnd/HsParser.y $(BUILT_SOURCES) lib/base lib/haskell98 lib/haskell98.cabal $(JHC_LIBS)
+EXTRA_DIST = data utils arch docs FrontEnd/HsParser.y $(BUILT_SOURCES) lib/base lib/haskell98 lib/haskell98.cabal $(JHC_LIBS) StringTable/StringTable_cbits.c StringTable_cbits.h
hunk ./Makefile.am 90
-jhc: Main.hs  $(HSFILES) $(BUILT_SOURCES)
-	$(HC) $(GHCOPTS) --make $< -o $@
+jhc: Main.hs  $(HSFILES) $(BUILT_SOURCES) StringTable/Atom.hs StringTable/StringTable_cbits.c
+	$(HC) $(GHCOPTS) --make $< StringTable/StringTable_cbits.c -o $@
hunk ./Makefile.am 111
-	$(HSC2HS) -C "$(CFLAGS)" -C -I. -C -I@srcdir@ -o $@ $<
+	$(HSC2HS) -I@srcdir@/StringTable -o $@ $<
hunk ./PackedString.hs 77
-    deriving(Typeable,Binary)
+    deriving(Typeable,Binary,Eq,Ord)
hunk ./PackedString.hs 80
-instance Eq PackedString where
-    (==) (PS x) (PS y) =  x == y
-    (/=) (PS x) (PS y) =  x /= y
-
-instance Ord PackedString where
-    compare (PS x) (PS y) = compare x y
-
hunk ./StringTable/Atom.hsc 1
+{-# OPTIONS_GHC -fffi -XTypeSynonymInstances -XDeriveDataTypeable  #-}
+module StringTable.Atom(
+    Atom(),
+    ToAtom(..),
+    FromAtom(..),
+    intToAtom,
+    unsafeIntToAtom,
+    atomCompare,
+    dumpTable,
+    dumpStringTableStats
+    ) where
+
+#include "StringTable_cbits.h"
+
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Internal as BS
+import System.IO.Unsafe
+import Foreign
+import Data.Word
+import Data.Char
+import Foreign.C
+import Data.Monoid
+import Data.Dynamic
+import Data.Bits
+
+
+
+newtype Atom = Atom (#type atom_t)
+    deriving(Typeable,Eq,Ord)
+
+class FromAtom a where
+    fromAtom :: Atom -> a
+    fromAtomIO :: Atom -> IO a
+
+    fromAtomIO a = return (fromAtom a)
+    fromAtom a = unsafePerformIO (fromAtomIO a)
+
+class ToAtom a where
+    toAtom :: a -> Atom
+    toAtomIO :: a -> IO Atom
+
+    toAtomIO a = return (toAtom a)
+    toAtom a = unsafePerformIO (toAtomIO a)
+
+instance ToAtom Atom where
+    toAtom x = x
+
+instance FromAtom Atom where
+    fromAtom x = x
+
+instance ToAtom Char where
+    toAtom x = toAtom [x]
+
+instance ToAtom CStringLen where
+    toAtomIO (cs,len) = do
+        if (len > (#const MAX_ENTRY_SIZE))
+            then fail "StringTable: atom is too big"
+            else stAdd cs (fromIntegral len)
+
+
+
+instance ToAtom CString where
+    toAtomIO cs = do
+        len <- BS.c_strlen cs
+        toAtomIO (cs,fromIntegral len :: Int)
+
+instance ToAtom String where
+    toAtomIO s = toAtomIO (BS.pack (toUTF s))
+
+instance FromAtom String where
+    fromAtom = fromUTF . BS.unpack . fromAtom
+
+instance ToAtom BS.ByteString where
+    toAtomIO bs = BS.useAsCStringLen bs toAtomIO
+
+instance FromAtom CStringLen where
+    fromAtom a@(Atom v) = (stPtr a,fromIntegral $ v .&. (#const ATOM_LEN_MASK))
+
+instance FromAtom Word where
+    fromAtom (Atom i) = fromIntegral i
+
+instance FromAtom Int where
+    fromAtom (Atom i) = fromIntegral i
+
+instance FromAtom BS.ByteString where
+    fromAtomIO a = do
+        (p,l) <- fromAtomIO a :: IO CStringLen
+        BS.packCStringLen (p,fromIntegral l)
+    --    fp <- newForeignPtr_ =<< (castPtr `fmap` peek p)
+    --    return $ BS.fromForeignPtr fp 0 (fromIntegral l)
+
+instance Monoid Atom where
+    mempty = toAtom BS.empty
+    mappend x y = unsafePerformIO $ atomAppend x y
+
+instance Show Atom where
+    showsPrec _ atom = (fromAtom atom ++)
+
+instance Read Atom where
+    readsPrec _ s = [ (toAtom s,"") ]
+
+intToAtom :: Monad m => Int -> m Atom
+intToAtom i = if 0 /= (#const VALID_BITMASK) .&. i then return (Atom $ fromIntegral i) else fail $ "intToAtom: " ++ show i
+
+unsafeIntToAtom :: Int -> Atom
+unsafeIntToAtom x = Atom (fromIntegral x)
+
+foreign import ccall unsafe "stringtable_lookup" stAdd :: CString -> CInt -> IO Atom
+foreign import ccall unsafe "stringtable_ptr" stPtr :: Atom -> CString
+foreign import ccall unsafe "stringtable_get" stGet :: Atom -> Ptr CChar -> IO CInt
+foreign import ccall unsafe "stringtable_stats" dumpStringTableStats :: IO ()
+foreign import ccall unsafe "dump_table" dumpTable :: IO ()
+foreign import ccall unsafe "atom_append" atomAppend :: Atom -> Atom -> IO Atom
+foreign import ccall unsafe "lexigraphic_compare" c_atomCompare :: Atom -> Atom -> CInt
+
+atomCompare a b = if c == 0 then EQ else if c > 0 then GT else LT where
+    c = c_atomCompare a b
+
+
+
+-- | Convert Unicode characters to UTF-8.
+toUTF :: String -> [Word8]
+toUTF [] = []
+toUTF (x:xs) | ord x<=0x007F = (fromIntegral $ ord x):toUTF xs
+	     | ord x<=0x07FF = fromIntegral (0xC0 .|. ((ord x `shift` (-6)) .&. 0x1F)):
+			       fromIntegral (0x80 .|. (ord x .&. 0x3F)):
+			       toUTF xs
+	     | otherwise     = fromIntegral (0xE0 .|. ((ord x `shift` (-12)) .&. 0x0F)):
+			       fromIntegral (0x80 .|. ((ord x `shift` (-6)) .&. 0x3F)):
+			       fromIntegral (0x80 .|. (ord x .&. 0x3F)):
+			       toUTF xs
+
+-- | Convert UTF-8 to Unicode.
+
+fromUTF :: [Word8] -> String
+fromUTF xs = fromUTF' (map fromIntegral xs) where
+    fromUTF' [] = []
+    fromUTF' (all@(x:xs))
+	| x<=0x7F = (chr (x)):fromUTF' xs
+	| x<=0xBF = err
+	| x<=0xDF = twoBytes all
+	| x<=0xEF = threeBytes all
+	| otherwise   = err
+    twoBytes (x1:x2:xs) = chr  ((((x1 .&. 0x1F) `shift` 6) .|.
+			       (x2 .&. 0x3F))):fromUTF' xs
+    twoBytes _ = error "fromUTF: illegal two byte sequence"
+
+    threeBytes (x1:x2:x3:xs) = chr ((((x1 .&. 0x0F) `shift` 12) .|.
+				    ((x2 .&. 0x3F) `shift` 6) .|.
+				    (x3 .&. 0x3F))):fromUTF' xs
+    threeBytes _ = error "fromUTF: illegal three byte sequence"
+
+    err = error "fromUTF: illegal UTF-8 character"
hunk ./StringTable/StringTable_cbits.c 1
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <pthread.h>
+//#define NDEBUG 1
+#include <assert.h>
+
+#include "StringTable_cbits.h"
+
+static pthread_mutex_t mutex_hash = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t mutex_string = PTHREAD_MUTEX_INITIALIZER;
+
+// 23 bits of chunk space to leave one bit for 'valid' flag.
+// valid flag must be set to 1 for it to be a valid atom
+
+static void dieif(bool,char *);
+static uint32_t hash2(uint32_t salt,unsigned char *key, int key_len);
+static uint32_t hash3(uint32_t salt,unsigned char *key, int key_len);
+
+// string allocation stuff
+
+#define NUM_CHUNKS 256
+#define CHUNK_SIZE 32768
+
+
+
+#define ATOM_LEN(c)     (((atom_t)(c)) & ATOM_LEN_MASK)
+#define CHUNK_INDEX(c)  (((atom_t)(c) >> 8)&0xFF)
+#define CHUNK_OFFSET(c) (((atom_t)(c) & ~VALID_BITMASK) >> 16 )
+
+#define MAKE_ATOM(ci,co,len) ((((len) & 0xFF) | ((((unsigned)ci) & 0xff) << 8) | (((unsigned)co) << 16))|VALID_BITMASK)
+
+#define ATOM_PTR(c) (&(stringtable_chunks[CHUNK_INDEX(c)][CHUNK_OFFSET(c)]))
+
+#define ATOM_VALID(a) ((a) & VALID_BITMASK)
+
+
+
+static unsigned char first_chunk[CHUNK_SIZE];
+static unsigned char *stringtable_chunks[NUM_CHUNKS] = { first_chunk };
+
+static uint16_t current_chunk = 0;
+static uint16_t next_free_offset = 0;
+
+static atom_t
+add_string(unsigned char *cs, int len)
+{
+    pthread_mutex_lock(&mutex_string);
+        //printf("add_string(%c,%c,%i)\n",cs[0],cs[1],len);
+    assert(len >= 0);
+    assert(len < MAX_ENTRY_SIZE);
+    assert(next_free_offset < CHUNK_SIZE);
+    if(next_free_offset + 1 > CHUNK_SIZE - MAX_ENTRY_SIZE) {
+        dieif(current_chunk >= NUM_CHUNKS - 1, "No more chunks");
+        current_chunk++;
+        assert(!stringtable_chunks[current_chunk]);
+        stringtable_chunks[current_chunk] = malloc(CHUNK_SIZE);
+        dieif(!stringtable_chunks[current_chunk], "error alocating memory");
+        next_free_offset = 0;
+    }
+    memcpy(stringtable_chunks[current_chunk] + next_free_offset, cs, len);
+    atom_t r = MAKE_ATOM(current_chunk, next_free_offset, len);
+    assert(CHUNK_INDEX(r) == current_chunk);
+    assert(CHUNK_OFFSET(r) == next_free_offset);
+    assert(ATOM_PTR(r) == stringtable_chunks[current_chunk] + next_free_offset);
+    assert(ATOM_LEN(r) == len);
+    next_free_offset += len;
+    assert(next_free_offset < CHUNK_SIZE);
+    assert(current_chunk < NUM_CHUNKS);
+    pthread_mutex_unlock(&mutex_string);
+    return r;
+}
+
+// hashtable stuff
+
+#define KEEP_HASH 2
+
+#define CUCKOO_HASHES 2U
+#define CUCKOO_BUCKETS 2U
+
+typedef uint32_t hash_t;
+
+struct hentry {
+#if KEEP_HASH
+        hash_t hashes[CUCKOO_HASHES];
+#endif
+        atom_t atom;
+};
+
+#define INIT_SIZE 2
+static uint32_t hsize = INIT_SIZE;
+static struct hentry init_htable[(1 << INIT_SIZE) * CUCKOO_HASHES];
+static struct hentry *htable = init_htable;
+
+#define HASHSIZE  (1 << hsize)
+#define HASHMASK (HASHSIZE - 1)
+
+#define INDEX_HASH(i)  ((i) / HASHSIZE)
+#define HASH_INDEX(h,x) ((h * HASHSIZE) + (HASHMASK & ((uint32_t)x)))
+
+#define HASH_BUCKET(x,b) ((((x) + (b)) % HASHSIZE) + (INDEX_HASH(x)*HASHSIZE))
+
+#define DEPTH_LIMIT 512
+
+static void hash_insert(struct hentry x);
+
+static void
+fast_insert(int t, int tb, struct hentry hb) {
+        hash_insert(hb);
+}
+
+bool
+atom_exists(atom_t a) {
+        for(int i = 0; i < HASHSIZE*CUCKOO_HASHES; i++) {
+                if(a == htable[i].atom) return true;
+        }
+        return false;
+}
+bool
+item_exists(char *cs, int len) {
+        for(int i = 0; i < HASHSIZE*CUCKOO_HASHES; i++) {
+                atom_t a = htable[i].atom;
+                if(ATOM_VALID(a)) {
+                    if(len == ATOM_LEN(a) && !memcmp(ATOM_PTR(a),cs,len))
+                        return true;
+                }
+        }
+        return false;
+}
+
+void
+dump_table(void) {
+        for(int i = 0; i < HASHSIZE*CUCKOO_HASHES; i++) {
+                atom_t a = htable[i].atom;
+                if(ATOM_VALID(a)) {
+                        printf("%p %u: ",ATOM_PTR(a),ATOM_LEN(a));
+                        fwrite(ATOM_PTR(a),1,ATOM_LEN(a),stdout);
+                        fwrite("\n",1,1,stdout);
+                }
+        }
+}
+
+
+static void
+grow_table(void) {
+    fprintf(stderr,"grow_table[[[\n");
+        uint32_t os = (1 << hsize++) * CUCKOO_HASHES;
+        struct hentry *ot = htable;
+        htable = calloc(sizeof(struct hentry),CUCKOO_HASHES * (1 << hsize));
+
+        for(int i = 0; i < os; i++) {
+                if(ATOM_VALID(ot[i].atom))
+                        fast_insert(0,0,ot[i]);
+        }
+        if(ot != init_htable) free(ot);
+    fprintf(stderr,"]]]\n");
+}
+
+#if KEEP_HASH
+#define FHASH(x,i) ((x).hashes[i])
+#else
+#define FHASH(x,i) (hash2(i,ATOM_PTR((x).atom),ATOM_LEN((x).atom)))
+#endif
+
+static void
+hash_insert(struct hentry x) {
+        assert(ATOM_VALID(x.atom));
+         fprintf(stderr,"hash_insert(%x,%p:%i,%x,%x,[%x,%x]", x.atom, ATOM_PTR(x.atom), ATOM_LEN(x.atom), x.hashes[0], x.hashes[1],HASH_INDEX(0,x.hashes[0]),HASH_INDEX(1,x.hashes[1]));
+        assert(!atom_exists(x.atom));
+        if(item_exists(ATOM_PTR(x.atom),ATOM_LEN(x.atom))) 
+            dump_table();
+        assert(!item_exists(ATOM_PTR(x.atom),ATOM_LEN(x.atom)));
+        atom_t start = x.atom;
+        for(int loop = 0; loop < DEPTH_LIMIT;loop++) {
+                for(int i = 0; i < CUCKOO_HASHES; i++) {
+//                        int e = HASH_INDEX(i,FHASH(x,i));
+                        for(int j = 0; j < CUCKOO_BUCKETS; j++) {
+                                //#struct hentry *b = &(htable[(e + j) & HASHJMASK ]);
+                                //struct hentry *b = &htable[HASH_BUCKET(e,j)];
+                                struct hentry *b = &htable[HASH_INDEX(i,FHASH(x,i) + j)];
+                                if(!ATOM_VALID(b->atom)) {
+                                        *b = x;
+                                        fprintf(stderr,")\n");
+                                        return;
+                                }
+                                struct hentry tb = x;
+                                x = *b;
+                                *b = tb;
+                        }
+                        // struct hentry *b = &(htable[e]);
+                }
+                if(x.atom == start) {
+                        break;
+                }
+        }
+        grow_table();
+        fprintf(stderr,"R");
+        return hash_insert(x);
+}
+
+
+
+atom_t
+stringtable_lookup(unsigned char *cs, int len)
+{
+        fprintf(stderr,"stringtable_lookup(%c,%c,%*s,%i)\n",cs[0],cs[1],len,cs,len);
+        pthread_mutex_lock(&mutex_hash);
+        assert(len >= 0);
+        assert(len < MAX_ENTRY_SIZE);
+        hash_t h[CUCKOO_HASHES];
+        for(uint32_t i = 0; i < CUCKOO_HASHES; i++) {
+                h[i] = hash2(i,cs,len);
+                //int e = HASH_INDEX(i,h[i]);
+                for(int j = 0; j < CUCKOO_BUCKETS; j++) {
+                        //struct hentry *b = &htable[(e + i) & HASHJMASK ];
+                        //struct hentry *b = &htable[HASH_BUCKET(e,j)];
+                        struct hentry *b = &htable[HASH_INDEX(i,h[i] + j)];
+#if KEEP_HASH
+                        if (ATOM_VALID(b->atom) && h[i] == b->hashes[i] && len == ATOM_LEN(b->atom) &&  !memcmp(ATOM_PTR(b->atom),cs,len)) {
+                            pthread_mutex_unlock(&mutex_hash);
+                            return b->atom;
+                        }
+#else
+                        if (ATOM_VALID(b->atom) && len == ATOM_LEN(b->atom) && !memcmp(ATOM_PTR(b->atom),cs,len)) {
+                            pthread_mutex_unlock(&mutex_hash);
+                            return b->atom;
+                        }
+#endif
+                }
+        }
+        atom_t na = add_string(cs,len);
+        struct hentry hb;
+        hb.atom = na;
+#if KEEP_HASH
+        memcpy(hb.hashes,h,sizeof hb.hashes);
+#endif
+        hash_insert(hb);
+        pthread_mutex_unlock(&mutex_hash);
+        return na;
+}
+
+
+
+int
+lexigraphic_compare(atom_t x, atom_t y)
+{
+    int xl = ATOM_LEN(x);
+    int yl = ATOM_LEN(y);
+    return memcmp(ATOM_PTR(x),ATOM_PTR(y),xl < yl ? xl : yl) || xl - yl;
+}
+
+
+atom_t
+atom_append(atom_t x,atom_t y)
+{
+    unsigned char *xs,*ys;
+    int xl,yl;
+
+    xl = stringtable_find(x,&xs);
+    yl = stringtable_find(y,&ys);
+
+    unsigned char buf[MAX_ENTRY_SIZE];
+
+    memcpy(buf,xs,xl);
+    memcpy(buf + xl,ys,yl);
+
+    return stringtable_lookup(buf,xl + yl);
+}
+
+char *
+stringtable_ptr(atom_t cl)
+{
+        assert(ATOM_VALID(cl));
+        return ATOM_PTR(cl);
+}
+
+int
+stringtable_get(atom_t cl, char buf[MAX_ENTRY_SIZE])
+{
+        assert(ATOM_VALID(cl));
+        memcpy(buf,ATOM_PTR(cl),ATOM_LEN(cl));
+        return ATOM_LEN(cl);
+}
+
+int
+stringtable_find(atom_t cl, unsigned char **res)
+{
+        assert(ATOM_VALID(cl));
+        *res = ATOM_PTR(cl);
+        return ATOM_LEN(cl);
+}
+
+void
+stringtable_stats(void)
+{
+    unsigned static_memory = sizeof(stringtable_chunks);
+    printf("Static Memory: %u\n", static_memory);
+    unsigned dynamic_memory = (current_chunk + 1) * CHUNK_SIZE;
+    unsigned data_memory = current_chunk*CHUNK_SIZE + next_free_offset;;
+    printf("Used Chunks: %u/%u - %u bytes\n", current_chunk + 1, NUM_CHUNKS, data_memory);
+    unsigned num_entries = 0;
+    unsigned hash_types[CUCKOO_HASHES];
+    memset(hash_types,0,sizeof hash_types);
+    unsigned num_total = 0;
+    for(int i = 0; i < HASHSIZE * CUCKOO_HASHES; i++) {
+            num_total++;
+            dynamic_memory += sizeof(struct hentry);
+            if(ATOM_VALID(htable[i].atom)) {
+                    num_entries++;
+                    hash_types[i / HASHSIZE]++;
+            }
+    }
+
+    for(int i = 0; i < CUCKOO_HASHES; i++)
+            printf("Hash Table  %i: %u\n", i, hash_types[i]);
+
+    printf("Usage: %u/%u %.3f%%\n", num_entries, num_total, (double)num_entries * 100.0 / num_total);
+
+    printf("Dynamic Memory: %u\n", dynamic_memory);
+    printf("Storage Efficiency: %.3f%%\n", (double)data_memory * 100.0/ dynamic_memory);
+
+}
+
+
+
+
+static void
+dieif(bool w,char *str)
+{
+    if(w) {
+        fprintf(stderr, "stringlib: %s\n", str);
+        exit(1);
+    }
+}
+
+// hash functions
+
+
+static uint32_t
+hash2(uint32_t salt, unsigned char *key, int key_len)
+{
+        uint32_t hash = salt;
+        for (int i = 0; i < key_len; i++) {
+                hash += key[i];
+                hash += (hash << 10);
+                hash ^= (hash >> 6);
+        }
+        hash += (hash << 3);
+        hash ^= (hash >> 11);
+        hash += (hash << 15);
+        return hash;
+}
+
+/*
+int
+main(int argc, char *argv[])
+{
+    unsigned char buf[MAX_ENTRY_SIZE];
+    while(fgets((char *)buf,MAX_ENTRY_SIZE,stdin)) {
+        buf[MAX_ENTRY_SIZE - 1] = '\0';
+        unsigned len = strlen((char *)buf);
+        if(!len) continue;
+        if(buf[len - 1] == '\n') buf[--len] = '\0';
+        stringtable_lookup(buf,len);
+        //printf("%x: %s\n", a, buf);
+
+    }
+
+    dump_table();
+    stringtable_stats();
+
+    return 0;
+}
+static hash_t
+hash3(uint32_t salt, unsigned char* str, size_t len)
+{
+        const uint32_t fnv_prime = 0x811C9DC5;
+        unsigned int hash      = salt;
+        for(int i = 0; i < len; i++) {
+                hash *= fnv_prime;
+                hash ^= str[i];
+                hash ^= salt;
+        }
+        return hash;
+}
+*/
hunk ./StringTable/StringTable_cbits.h 1
+#ifndef ST_CBITS_H
+#define ST_CBITS_H
+
+#include <inttypes.h>
+
+#define MAX_ENTRY_SIZE 256
+#define VALID_BITMASK 0x80000000
+#define ATOM_LEN_MASK 0xff
+
+typedef uint32_t atom_t;
+
+
+atom_t stringtable_lookup(unsigned char *cs, int len);
+void stringtable_stats(void);
+int stringtable_find(atom_t cl, unsigned char **res);
+char *stringtable_ptr(atom_t cl);
+
+
+#endif
hunk ./selftest/Makefile 3
-GHCOPTS= -fglasgow-exts -i.. -i../FrontEnd -O -ignore-package lang
+GHCOPTS= -fallow-overlapping-instances -fallow-undecidable-instances -fglasgow-exts -i -i../drift_processed -i../drift_processed/FrontEnd -i.. -i../FrontEnd -O -ignore-package lang  -optc-std=c99 -optc-g
hunk ./selftest/Makefile 7
-all: sha1 TestParse cff
+all: sha1 TestParse cff SelfTest
+
hunk ./selftest/Makefile 11
-	$(HC) $(GHCOPTS) --make $< -o $@
+	$(HC) $(GHCOPTS) --make $< -o $@ ../StringTable/StringTable_cbits.c
hunk ./selftest/SelfTest.hs 40
+    quickCheck prop_aappend
hunk ./selftest/SelfTest.hs 63
+prop_aappend (xs,ys) = (toAtom xs `mappend` toAtom ys) == toAtom ((xs::String) ++ ys)
+prop_aappend' (xs,ys) = fromAtom (toAtom xs `mappend` toAtom ys) == ((xs::String) ++ ys)
hunk ./selftest/SelfTest.hs 153
-    let test = ("hello",3::Int)
+    let test = ("hello",3::Int,toAtom "Up and Atom!")
hunk ./selftest/SelfTest.hs 159
+    print x