[move rest of rts into seperate c files rather than including it directly.
John Meacham <john@repetae.net>**20120216090215
 Ignore-this: d0bf719a38e306f78e182a5c0107573d
] move ./rts/jhc_rts2.c ./rts/rts/jhc_rts.c
hunk ./Makefile.am 7
-BUILT_SOURCES= src/RtsFiles.hs src/RawFiles.hs src/FrontEnd/HsParser.hs src/FlagDump.hs \
+BUILT_SOURCES= src/RawFiles.hs src/FrontEnd/HsParser.hs src/FlagDump.hs \
hunk ./Makefile.am 31
-	src/RawFiles.hs src/RtsFiles.hs src/Stats.hs src/Support/CanType.hs src/Support/CFF.hs src/Support/Compat.hs \
+	src/RawFiles.hs src/Stats.hs src/Support/CanType.hs src/Support/CFF.hs src/Support/Compat.hs \
hunk ./Makefile.am 115
-	   rts/rts/gc_jgc.c rts/rts/profile.c rts/rts/profile.h rts/rts/cdefs.h rts/rts/rts_support.c \
-	   rts/rts/rts_support.h rts/rts/gc.h rts/rts/gc_none.c rts/rts/gc_none.h
-
-RTSFILES = rts/jhc_rts_header.h rts/jhc_rts2.c rts/lib/lib_cbits.c
+	   rts/rts/gc_jgc.c rts/rts/gc_jgc.h rts/rts/profile.c rts/rts/profile.h rts/rts/cdefs.h rts/rts/rts_support.c \
+	   rts/rts/rts_support.h rts/rts/gc.h rts/rts/gc_none.c rts/rts/gc_none.h rts/rts/jhc_rts.c rts/rts/jhc_rts.h \
+	   rts/lib/lib_cbits.c rts/jhc_rts_header.h rts/lib/lib_cbits.h
hunk ./Makefile.am 171
-src/RtsFiles.hs: $(RTSFILES)
-	perl $(srcdir)/utils/op_raw.prl -c $(basename $@) $^ > $@
-
hunk ./Makefile.am 381
-	docs/unboxed.mkd rts/jhc_rts2.c src/FlagOpts.mkd src/FlagDump.mkd src/E/PrimOpt.hs \
+	docs/unboxed.mkd rts/rts/jhc_rts.c src/FlagOpts.mkd src/FlagDump.mkd src/E/PrimOpt.hs \
hunk ./rts/jhc_rts_header.h 58
-static void _amain(void);
+// the program will provide the following
+void _amain(void);
+void jhc_hs_init(void);
+extern const void * const nh_stuff[];
hunk ./rts/jhc_rts_header.h 66
-#define JHC_HEADER
-#include "rts/gc_jgc.c"
-#include "rts/slub.c"
-#undef JHC_HEADER
+#include "rts/jhc_rts.h"
+#include "lib/lib_cbits.h"
hunk ./rts/lib/lib_cbits.c 4
-static HsInt jhc_stdrnd[2] A_UNUSED = { 1 , 1 };
-static HsInt jhc_data_unique A_UNUSED;
+#include <stdio.h>
+
+#include "HsFFI.h"
+#include "rts/cdefs.h"
+
+HsInt jhc_stdrnd[2] A_UNUSED = { 1 , 1 };
+HsInt jhc_data_unique A_UNUSED;
hunk ./rts/lib/lib_cbits.c 12
-static HsBool A_UNUSED
+HsBool A_UNUSED
hunk ./rts/lib/lib_cbits.c 29
-#ifdef __WIN32__
-#define getchar_unlocked() getchar()
-#define putchar_unlocked(x) putchar(x)
-#define getc_unlocked(x) getc(x)
-#define putc_unlocked(x,y) putc(x,y)
-#endif
-
-inline static int A_UNUSED
-jhc_utf8_getchar(void)
-{
-    return getchar_unlocked();
-}
-
-inline static int A_UNUSED
-jhc_utf8_getc(FILE *f)
-{
-    return getc_unlocked(f);
-}
-
-inline static int A_UNUSED
-jhc_utf8_putchar(int ch)
-{
-    return putchar_unlocked(ch);
-}
-
-inline static int A_UNUSED
-jhc_utf8_putc(int ch, FILE *f)
-{
-    return putc_unlocked(ch,f);
-}
-
addfile ./rts/lib/lib_cbits.h
hunk ./rts/lib/lib_cbits.h 1
+#ifndef LIB_CBITS_H
+#define LIB_CBITS_H
+
+#include "HsFFI.h"
+struct FILE;
+
+extern HsInt jhc_stdrnd[2];
+extern HsInt jhc_data_unique;
+HsBool jhc_wait_for_input(FILE *f,HsInt timeout);
+
+#ifdef __WIN32__
+#define getchar_unlocked() getchar()
+#define putchar_unlocked(x) putchar(x)
+#define getc_unlocked(x) getc(x)
+#define putc_unlocked(x,y) putc(x,y)
+#endif
+
+inline static int A_UNUSED
+jhc_utf8_getchar(void)
+{
+    return getchar_unlocked();
+}
+
+inline static int A_UNUSED
+jhc_utf8_getc(FILE *f)
+{
+    return getc_unlocked(f);
+}
+
+inline static int A_UNUSED
+jhc_utf8_putchar(int ch)
+{
+    return putchar_unlocked(ch);
+}
+
+inline static int A_UNUSED
+jhc_utf8_putc(int ch, FILE *f)
+{
+    return putc_unlocked(ch,f);
+}
+
+#endif
addfile ./rts/rts/cdefs.h
hunk ./rts/rts/cdefs.h 1
+#ifndef RTS_CDEFS_H
+#define RTS_CDEFS_H
+
+// GNU attributes
+#if !defined(__predict_true)
+#ifdef __GNUC__
+#  define __predict_true(exp)     __builtin_expect(!!(exp), 1)
+#  define __predict_false(exp)    __builtin_expect(!!(exp), 0)
+#else
+#  define __predict_true(exp)     (exp)
+#  define __predict_false(exp)    (exp)
+#endif
+#endif
+
+#ifdef __GNUC__
+#define A_ALIGNED  __attribute__ ((aligned))
+#define A_CONST    __attribute__ ((const))
+#define A_MALLOC   __attribute__ ((malloc))
+#define A_MAYALIAS __attribute__ ((__may_alias__))
+#define A_NORETURN __attribute__ ((noreturn))
+#define A_PURE     __attribute__ ((pure))
+#define A_UNUSED   __attribute__ ((unused))
+#ifdef __i386__
+#define A_REGPARM __attribute__ ((fastcall))
+#else
+#define A_REGPARM
+#endif
+#define A_STD    A_REGPARM
+
+#else
+#define A_ALIGNED
+#define A_CONST
+#define A_MALLOC
+#define A_MAYALIAS
+#define A_NORETURN
+#define A_PURE
+#define A_UNUSED
+#define A_STD
+#endif
+
+// these should be enabled with newer versions of gcc
+#define A_HOT
+#define A_COLD
+#define A_FALIGNED
+
+#endif
hunk ./rts/rts/gc.h 18
+#include "rts/gc_jgc.h"
hunk ./rts/rts/gc_jgc.c 1
-#if _JHC_GC == _JHC_GC_JGC
-#ifdef JHC_HEADER
+#include "jhc_rts_header.h"
+#include "sys/queue.h"
+#include "sys/bitarray.h"
hunk ./rts/rts/gc_jgc.c 5
-#ifdef JHC_JGC_STACK
-struct frame;
-typedef struct frame *gc_t;
-#else
-typedef void* *gc_t;
-static gc_t gc_stack_base;
-static unsigned number_gcs;             // number of garbage collections
-static unsigned number_allocs;          // number of allocations since last garbage collection
-#endif
+#if _JHC_GC == _JHC_GC_JGC
hunk ./rts/rts/gc_jgc.c 7
-static gc_t saved_gc;
+struct s_arena {
+        struct s_megablock *current_megablock;
+        SLIST_HEAD(,s_block) free_blocks;
+        unsigned block_used;
+        unsigned block_threshold;
+        SLIST_HEAD(,s_cache) caches;
+        SLIST_HEAD(,s_megablock) megablocks;
+};
hunk ./rts/rts/gc_jgc.c 16
-#define GC_BASE sizeof(void *)
-#define TO_BLOCKS(x) ((x) <= GC_BASE ? 1 : (((x) - 1)/GC_BASE) + 1)
+struct s_megablock {
+        void *base;
+        unsigned next_free;
+        SLIST_ENTRY(s_megablock) next;
+};
hunk ./rts/rts/gc_jgc.c 22
-static void gc_perform_gc(gc_t gc);
+struct s_block_info {
+        unsigned char color;
+        unsigned char size;
+        unsigned char num_ptrs;
+        unsigned char flags;
+};
hunk ./rts/rts/gc_jgc.c 29
-#else
+struct s_block {
+        SLIST_ENTRY(s_block) link;
+        struct s_block_info pi;
+        unsigned short num_free;
+        unsigned short next_free;
+        bitarray_t used[];
+};
hunk ./rts/rts/gc_jgc.c 37
-#include "sys/queue.h"
+struct s_cache {
+        SLIST_ENTRY(s_cache) next;
+        SLIST_HEAD(,s_block) blocks;
+        SLIST_HEAD(,s_block) full_blocks;
+        struct s_block_info pi;
+        unsigned short num_entries;
+        struct s_arena *arena;
+};
hunk ./rts/rts/gc_jgc.c 46
-//static gc_t saved_gc;
+gc_t saved_gc;
hunk ./rts/rts/gc_jgc.c 48
+static unsigned number_gcs;             // number of garbage collections
+static unsigned number_allocs;          // number of allocations since last garbage collection
+static gc_t gc_stack_base;
hunk ./rts/rts/gc_jgc.c 71
+static void gc_perform_gc(gc_t gc);
+static bool s_set_used_bit(void *val) A_UNUSED;
+static void clear_used_bits(struct s_arena *arena) A_UNUSED;
+static struct s_arena *new_arena(void);
+static void s_cleanup_blocks(struct s_arena *arena);
+static void print_cache(struct s_cache *sc);
+
hunk ./rts/rts/gc_jgc.c 268
-static const void * const nh_stuff[];
hunk ./rts/rts/gc_jgc.c 314
+#include "sys/bitarray.h"
+#include "sys/queue.h"
+
+/* This finds a bit that isn't set, sets it, then returns its index.  It
+ * assumes that a bit is available to be found, otherwise it goes into an
+ * infinite loop. */
+
+static unsigned
+bitset_find_free(unsigned *next_free,int n,bitarray_t ba[static n]) {
+        assert(*next_free < (unsigned)n);
+        unsigned i = *next_free;
+        do {
+                int o = __builtin_ffsl(~ba[i]);
+                if(__predict_true(o)) {
+                        ba[i] |= (1UL << (o - 1));
+                        *next_free = i;
+                        return (i*BITS_PER_UNIT + (o - 1));
+                }
+                i = (i + 1) % n;
+                assert(i != *next_free);
+        } while (1);
+}
+
+struct s_megablock *
+s_new_megablock(struct s_arena *arena)
+{
+        struct s_megablock *mb = malloc(sizeof(*mb));
+#if defined(__WIN32__)
+        mb->base = _aligned_malloc(MEGABLOCK_SIZE, BLOCK_SIZE);
+        int ret = !mb->base;
+#elif defined(__ARM_EABI__)
+        mb->base = memalign(BLOCK_SIZE,MEGABLOCK_SIZE);
+        int ret = !mb->base;
+#elif (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <  1060)
+        assert(sysconf(_SC_PAGESIZE) == BLOCK_SIZE);
+        mb->base = valloc(MEGABLOCK_SIZE);
+        int ret = !mb->base;
+#else
+        int ret = posix_memalign(&mb->base,BLOCK_SIZE,MEGABLOCK_SIZE);
hunk ./rts/rts/gc_jgc.c 354
+        if(ret != 0) {
+                fprintf(stderr,"Unable to allocate memory for megablock\n");
+                abort();
+        }
+        VALGRIND_MAKE_MEM_NOACCESS(mb->base,MEGABLOCK_SIZE);
+        //VALGRIND_FREELIKE_BLOCK(mb->base,0);
+        mb->next_free = 0;
+        return mb;
+}
+
+/* block allocator */
+
+static struct s_block *
+get_free_block(gc_t gc, struct s_arena *arena) {
+        arena->block_used++;
+        if(__predict_true(SLIST_FIRST(&arena->free_blocks))) {
+                struct s_block *pg = SLIST_FIRST(&arena->free_blocks);
+                SLIST_REMOVE_HEAD(&arena->free_blocks,link);
+                return pg;
+        } else {
+                if((arena->block_used >= arena->block_threshold)) {
+                        gc_perform_gc(gc);
+                        // if we are still using 80% of the heap after a gc, raise the threshold.
+                        if(__predict_false((unsigned)arena->block_used * 10 >= arena->block_threshold * 9)) {
+                                arena->block_threshold *= 2;
+                        }
+                }
+                if(__predict_false(!arena->current_megablock))
+                        arena->current_megablock = s_new_megablock(arena);
+                struct s_megablock *mb = arena->current_megablock;
+                struct s_block *pg = mb->base + BLOCK_SIZE*mb->next_free;
+                mb->next_free++;
+                if(mb->next_free == MEGABLOCK_SIZE / BLOCK_SIZE) {
+                        SLIST_INSERT_HEAD(&arena->megablocks,mb, next);
+                        arena->current_megablock = NULL;
+                }
+                VALGRIND_MAKE_MEM_UNDEFINED(pg,sizeof(struct s_block));
+                pg->num_free = 0;
+                return pg;
+        }
+}
+
+static void
+s_cleanup_blocks(struct s_arena *arena) {
+        struct s_cache *sc = SLIST_FIRST(&arena->caches);
+        for(;sc;sc = SLIST_NEXT(sc,next)) {
+
+                // 'best' keeps track of the block with the fewest free spots
+                // and percolates it to the front, effectively a single pass
+                // of a bubblesort to help combat fragmentation. It does
+                // not increase the complexity of the cleanup algorithm as
+                // we had to scan every block anyway, but over many passes
+                // of the GC it will eventually result in a more sorted list
+                // than would occur by chance.
+
+                struct s_block *best = NULL;
+                int free_best = 4096;
+                struct s_block *pg = SLIST_FIRST(&sc->blocks);
+                struct s_block *fpg = SLIST_FIRST(&sc->full_blocks);
+                SLIST_INIT(&sc->blocks);
+                SLIST_INIT(&sc->full_blocks);
+                if(!pg) {
+                        pg = fpg;
+                        fpg = NULL;
+                }
+                while(pg) {
+                        struct s_block *npg = SLIST_NEXT(pg,link);
+                        if(__predict_false(pg->num_free == 0)) {
+                                SLIST_INSERT_HEAD(&sc->full_blocks,pg,link);
+                        } else if(__predict_true(pg->num_free == sc->num_entries)) {
+                                arena->block_used--;
+                                VALGRIND_MAKE_MEM_NOACCESS((char *)pg + sizeof(struct s_block), BLOCK_SIZE - sizeof(struct s_block));
+                                SLIST_INSERT_HEAD(&arena->free_blocks,pg,link);
+                        } else {
+                                if(!best) {
+                                        free_best = pg->num_free;
+                                        best = pg;
+                                } else {
+                                        if(pg->num_free < free_best) {
+                                                struct s_block *tmp = best;
+                                                best = pg; pg = tmp;
+                                                free_best = pg->num_free;
+                                        }
+                                        SLIST_INSERT_HEAD(&sc->blocks,pg,link);
+                                }
+                        }
+                        if(!npg && fpg) {
+                                pg = fpg;
+                                fpg = NULL;
+                        } else
+                                pg = npg;
+                }
+                if(best)
+                        SLIST_INSERT_HEAD(&sc->blocks,best,link);
+        }
+}
+
+inline static void
+clear_block_used_bits(unsigned num_entries, struct s_block *pg)
+{
+        pg->num_free = num_entries;
+        memset(pg->used,0,BITARRAY_SIZE_IN_BYTES(num_entries) - sizeof(pg->used[0]));
+        int excess = num_entries % BITS_PER_UNIT;
+        pg->used[BITARRAY_SIZE(num_entries) - 1] = ~((1UL << excess) - 1);
+#if JHC_VALGRIND
+                unsigned header =  sizeof(struct s_block) + BITARRAY_SIZE_IN_BYTES(num_entries);
+                VALGRIND_MAKE_MEM_NOACCESS((char *)pg + header, BLOCK_SIZE - header);
+#endif
+}
+
+void *
+s_alloc(gc_t gc, struct s_cache *sc)
+{
+        struct s_block *pg = SLIST_FIRST(&sc->blocks);
+        if(__predict_false(!pg)) {
+                pg = get_free_block(gc, sc->arena);
+                VALGRIND_MAKE_MEM_NOACCESS(pg, BLOCK_SIZE);
+                VALGRIND_MAKE_MEM_DEFINED(pg, sizeof(struct s_block));
+                if(sc->num_entries != pg->num_free)
+                        VALGRIND_MAKE_MEM_UNDEFINED((char *)pg->used,BITARRAY_SIZE_IN_BYTES(sc->num_entries));
+                else
+                        VALGRIND_MAKE_MEM_DEFINED((char *)pg->used,BITARRAY_SIZE_IN_BYTES(sc->num_entries));
+                assert(pg);
+                pg->pi = sc->pi;
+                pg->next_free = 0;
+                SLIST_INSERT_HEAD(&sc->blocks,pg,link);
+                if(sc->num_entries != pg->num_free)
+                        clear_block_used_bits(sc->num_entries, pg);
+                pg->used[0] = 1; //set the first bit
+                pg->num_free = sc->num_entries - 1;
+                return (uintptr_t *)pg + pg->pi.color;
+        } else {
+                __builtin_prefetch(pg->used,1);
+                pg->num_free--;
+                unsigned next_free = pg->next_free;
+                unsigned found = bitset_find_free(&next_free,BITARRAY_SIZE(sc->num_entries),pg->used);
+                pg->next_free = next_free;
+                void *val = (uintptr_t *)pg + pg->pi.color + found*pg->pi.size;
+                if(__predict_false(0 == pg->num_free)) {
+                        assert(pg == SLIST_FIRST(&sc->blocks));
+                        SLIST_REMOVE_HEAD(&sc->blocks,link);
+                        SLIST_INSERT_HEAD(&sc->full_blocks,pg,link);
+                }
+                assert(S_BLOCK(val) == pg);
+                //printf("s_alloc: val: %p s_block: %p size: %i color: %i found: %i num_free: %i\n", val, pg, pg->pi.size, pg->pi.color, found, pg->num_free);
+                return val;
+        }
+}
+
+/*
+static void
+s_free(void *val)
+{
+        assert(val);
+        struct s_block *pg = s_block(val);
+        unsigned int offset = ((uintptr_t *)val - (uintptr_t *)pg) - pg->pi.color;
+//        printf("s_free:  val: %p s_block: %p size: %i color: %i num_free: %i offset: %i bit: %i\n", val, pg, pg->pi.size, pg->pi.color, pg->num_free, offset, offset/pg->pi.size);
+        assert(BIT_VALUE(pg->used,offset/(pg->pi.size)));
+        BIT_UNSET(pg->used,offset/(pg->pi.size));
+        pg->num_free++;
+}
+*/
+
+static struct s_cache *
+new_cache(struct s_arena *arena, unsigned short size, unsigned short num_ptrs)
+{
+        struct s_cache *sc = malloc(sizeof(*sc));
+        sc->arena = arena;
+        sc->pi.size = size;
+        sc->pi.num_ptrs = num_ptrs;
+        sc->pi.flags = 0;
+        size_t excess = BLOCK_SIZE - sizeof(struct s_block);
+        sc->num_entries = (8*excess) / (8*sizeof(uintptr_t)*size + 1) - 1;
+        //sc->num_entries = (8*excess) / (8*size*sizeof(uintptr_t) + 1);
+        sc->pi.color = (sizeof(struct s_block) + BITARRAY_SIZE_IN_BYTES(sc->num_entries) + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
+        SLIST_INIT(&sc->blocks);
+        SLIST_INIT(&sc->full_blocks);
+        SLIST_INSERT_HEAD(&arena->caches,sc,next);
+        //print_cache(sc);
+        return sc;
+}
+
+// clear all used bits, must be followed by a marking phase.
+static void
+clear_used_bits(struct s_arena *arena)
+{
+        struct s_cache *sc = SLIST_FIRST(&arena->caches);
+        for(;sc;sc = SLIST_NEXT(sc,next)) {
+                struct s_block *pg = SLIST_FIRST(&sc->blocks);
+                struct s_block *fpg = SLIST_FIRST(&sc->full_blocks);
+                do {
+                        for(;pg;pg = SLIST_NEXT(pg,link))
+                                clear_block_used_bits(sc->num_entries,pg);
+                        pg = fpg;
+                        fpg = NULL;
+                }  while(pg);
+        }
+}
+
+// set a used bit. returns true if the
+// tagged node should be scanned by the GC.
+// this happens when the used bit was not previously set
+// and the node contains internal pointers.
+
+static bool
+s_set_used_bit(void *val)
+{
+        assert(val);
+        struct s_block *pg = S_BLOCK(val);
+        unsigned int offset = ((uintptr_t *)val - (uintptr_t *)pg) - pg->pi.color;
+        if(__predict_true(BIT_IS_UNSET(pg->used,offset/pg->pi.size))) {
+                BIT_SET(pg->used,offset/pg->pi.size);
+                pg->num_free--;
+                return (bool)pg->pi.num_ptrs;
+        }
+        return false;
+}
+
+struct s_cache *
+find_cache(struct s_cache **rsc, struct s_arena *arena, unsigned short size, unsigned short num_ptrs)
+{
+        if(__predict_true(rsc && *rsc))
+                return *rsc;
+        struct s_cache *sc = SLIST_FIRST(&arena->caches);
+        for(;sc;sc = SLIST_NEXT(sc,next)) {
+                if(sc->pi.size == size && sc->pi.num_ptrs == num_ptrs)
+                        goto found;
+        }
+        sc = new_cache(arena,size,num_ptrs);
+found:
+        if(rsc)
+                *rsc = sc;
+        return sc;
+}
+
+struct s_arena *
+new_arena(void) {
+        struct s_arena *arena = malloc(sizeof(struct s_arena));
+        SLIST_INIT(&arena->caches);
+        SLIST_INIT(&arena->free_blocks);
+        SLIST_INIT(&arena->megablocks);
+        arena->block_used = 0;
+        arena->block_threshold = 8;
+        arena->current_megablock = NULL;
+        return arena;
+}
+
+void
+print_cache(struct s_cache *sc) {
+        fprintf(stderr, "num_entries: %i\n",(int)sc->num_entries);
+//        printf("  entries: %i words\n",(int)(sc->num_entries*sc->pi.size));
+        fprintf(stderr, "  header: %lu bytes\n", sizeof(struct s_block) + BITARRAY_SIZE_IN_BYTES(sc->num_entries));
+        fprintf(stderr, "  size: %i words\n",(int)sc->pi.size);
+//        printf("  color: %i words\n",(int)sc->pi.color);
+        fprintf(stderr, "  nptrs: %i words\n",(int)sc->pi.num_ptrs);
+//        printf("  end: %i bytes\n",(int)(sc->pi.color+ sc->num_entries*sc->pi.size)*sizeof(uintptr_t));
+        fprintf(stderr, "%20s %9s %9s %s\n", "block", "num_free", "next_free", "status");
+        struct s_block *pg;
+        SLIST_FOREACH(pg,&sc->blocks,link) {
+            fprintf(stderr, "%20p %9i %9i %c\n", pg, pg->num_free, pg->next_free, 'P');
+        }
+        fprintf(stderr, "  full_blocks:\n");
+        SLIST_FOREACH(pg,&sc->full_blocks,link) {
+            fprintf(stderr, "%20p %9i %9i %c\n", pg, pg->num_free, pg->next_free, 'F');
+        }
+}
+
addfile ./rts/rts/gc_jgc.h
hunk ./rts/rts/gc_jgc.h 1
+#ifndef JHC_GC_JGC_H
+#define JHC_GC_JGC_H
+
+#include <stdbool.h>
+
+struct s_arena;
+struct s_cache;
+
+#define S_BLOCK(val) ((struct s_block *)((uintptr_t)(val) & ~ (BLOCK_SIZE - 1)))
+#define BLOCK_SIZE     (1UL << 12)
+#define MEGABLOCK_SIZE (1UL << 20)
+
+#ifdef JHC_JGC_STACK
+struct frame;
+typedef struct frame *gc_t;
+#else
+typedef void* *gc_t;
+#endif
+
+extern gc_t saved_gc;
+
+#define GC_BASE sizeof(void *)
+#define TO_BLOCKS(x) ((x) <= GC_BASE ? 1 : (((x) - 1)/GC_BASE) + 1)
+
+#define gc_frame0(gc,n,...) void *ptrs[n] = { __VA_ARGS__ }; for(int i = 0; i < n; i++) gc[i] = (sptr_t)ptrs[i]; gc_t sgc = gc;  gc_t gc = sgc + n;
+
+void *s_alloc(gc_t gc, struct s_cache *sc);
+struct s_cache *find_cache(struct s_cache **rsc, struct s_arena *arena, unsigned short size, unsigned short num_ptrs);
+
+#endif
hunk ./rts/rts/jhc_rts.c 107
-#define P_WHNF  0x0
-#define P_LAZY  0x1
-#define P_VALUE 0x2
-#define P_FUNC  0x3
+#include "jhc_rts_header.h"
hunk ./rts/rts/jhc_rts.c 109
-#define IS_LAZY(x)     (bool)(((uintptr_t)(x)) & 0x1)
-#define IS_PTR(x)      (bool)(!(((uintptr_t)(x)) & 0x2))
-
-#define FROM_SPTR(x)   (typeof (x))((uintptr_t)(x) & ~0x3)  // remove a ptype from a smart pointer
-#define GET_PTYPE(x)   ((uintptr_t)(x) & 0x3)               // return the ptype associated with a smart pointer
-#define TO_SPTR(t,x)   (typeof (x))((uintptr_t)(x) | (t))   // attach a ptype to a smart pointer
-#define TO_SPTR_C(t,x) (typeof (x))((uintptr_t)(x) + (t))   // attach a ptype to a smart pointer, suitable for use by constant initialializers
-
-#define GETHEAD(x)   (NODEP(x)->head)
-#define NODEP(x)     ((node_t *)(x))
-#define DNODEP(x)    ((dnode_t *)(x))
-
-#define MKLAZY(fn)    TO_SPTR(P_LAZY,(sptr_t)fn)
-#define MKLAZY_C(fn)  TO_SPTR_C(P_LAZY,(sptr_t)fn)
-#define TO_FPTR(fn)   TO_SPTR_C(P_FUNC,(fptr_t)fn)
-
-#define RAW_SET_F(n)   ((wptr_t)(((intptr_t)(n) << 2) | P_VALUE))
-#define RAW_SET_UF(n)  ((wptr_t)(((uintptr_t)(n) << 2) | P_VALUE))
-#define RAW_GET_F(n)   ((intptr_t)(n) >> 2)
-#define RAW_GET_UF(n)  ((uintptr_t)(n) >> 2)
-
-#define RAW_SET_16(w)  (wptr_t)(((uintptr_t)(w) << 16) | P_VALUE)
-#define RAW_GET_16(n)  ((intptr_t)(n) >> 16)
-#define RAW_GET_U16(n) ((uintptr_t)(n) >> 16)
-
-// demote is always safe, we must only promote when we know the argument is a WHNF
-#define PROMOTE(n)   ((wptr_t)(n))
-#define DEMOTE(n)    ((sptr_t)(n))
-
-#define FETCH_TAG(x)      RAW_GET_U16(IS_PTR(x) ? FETCH_MEM_TAG(x) : (what_t)(x))
-#define FETCH_RAW_TAG(x)  RAW_GET_U16(x)
-#define SET_RAW_TAG(x)    RAW_SET_16(x)
-#define FETCH_MEM_TAG(x)  (DNODEP(x)->what)
-#define SET_MEM_TAG(x,v)  (DNODEP(x)->what = (what_t)RAW_SET_16(v))
hunk ./rts/rts/jhc_rts.c 112
-struct sptr {};
-struct wptr {};
-struct fptr {};
-
-// we use dummy structs here so the compiler will catch any attempt
-// to use one type in anothers place
-typedef struct sptr * sptr_t;
-typedef struct sptr * wptr_t;
-typedef struct fptr * fptr_t;
-typedef uintptr_t     what_t;
-
-typedef struct node {
-        fptr_t head;
-        sptr_t rest[];
-} A_MAYALIAS node_t;
-
-typedef struct dnode {
-        what_t what;
-        sptr_t rest[];
-} A_MAYALIAS dnode_t;
-
hunk ./rts/rts/jhc_rts.c 160
-static inline wptr_t A_STD A_UNUSED  A_HOT
+#if _JHC_DEBUG
+wptr_t A_STD A_UNUSED
hunk ./rts/rts/jhc_rts.c 169
-static inline sptr_t A_STD A_UNUSED A_HOT
+sptr_t A_STD A_UNUSED
hunk ./rts/rts/jhc_rts.c 176
+#endif
hunk ./rts/rts/jhc_rts.c 191
-static wptr_t A_STD A_UNUSED  A_HOT
+wptr_t A_STD A_UNUSED  A_HOT
hunk ./rts/rts/jhc_rts.c 234
-#include "rts/slub.c"
addfile ./rts/rts/jhc_rts.h
hunk ./rts/rts/jhc_rts.h 1
+#ifndef JHC_RTS_H
+#define JHC_RTS_H
+
+#include "rts/profile.h"
+#include "rts/gc.h"
+
+struct sptr;
+struct wptr;
+struct fptr;
+
+// we use dummy structs here so the compiler will catch any attempt
+// to use one type in anothers place
+typedef struct sptr * sptr_t;
+typedef struct sptr * wptr_t;
+typedef struct fptr * fptr_t;
+typedef uintptr_t     what_t;
+
+typedef struct node {
+        fptr_t head;
+        sptr_t rest[];
+} A_MAYALIAS node_t;
+
+typedef struct dnode {
+        what_t what;
+        sptr_t rest[];
+} A_MAYALIAS dnode_t;
+
+#define P_WHNF  0x0
+#define P_LAZY  0x1
+#define P_VALUE 0x2
+#define P_FUNC  0x3
+
+#define IS_LAZY(x)     (bool)(((uintptr_t)(x)) & 0x1)
+#define IS_PTR(x)      (bool)(!(((uintptr_t)(x)) & 0x2))
+
+#define FROM_SPTR(x)   (typeof (x))((uintptr_t)(x) & ~0x3)  // remove a ptype from a smart pointer
+#define GET_PTYPE(x)   ((uintptr_t)(x) & 0x3)               // return the ptype associated with a smart pointer
+#define TO_SPTR(t,x)   (typeof (x))((uintptr_t)(x) | (t))   // attach a ptype to a smart pointer
+#define TO_SPTR_C(t,x) (typeof (x))((uintptr_t)(x) + (t))   // attach a ptype to a smart pointer, suitable for use by constant initialializers
+
+#define GETHEAD(x)   (NODEP(x)->head)
+#define NODEP(x)     ((node_t *)(x))
+#define DNODEP(x)    ((dnode_t *)(x))
+
+#define MKLAZY(fn)    TO_SPTR(P_LAZY,(sptr_t)fn)
+#define MKLAZY_C(fn)  TO_SPTR_C(P_LAZY,(sptr_t)fn)
+#define TO_FPTR(fn)   TO_SPTR_C(P_FUNC,(fptr_t)fn)
+
+#define RAW_SET_F(n)   ((wptr_t)(((intptr_t)(n) << 2) | P_VALUE))
+#define RAW_SET_UF(n)  ((wptr_t)(((uintptr_t)(n) << 2) | P_VALUE))
+#define RAW_GET_F(n)   ((intptr_t)(n) >> 2)
+#define RAW_GET_UF(n)  ((uintptr_t)(n) >> 2)
+
+#define RAW_SET_16(w)  (wptr_t)(((uintptr_t)(w) << 16) | P_VALUE)
+#define RAW_GET_16(n)  ((intptr_t)(n) >> 16)
+#define RAW_GET_U16(n) ((uintptr_t)(n) >> 16)
+
+// demote is always safe, we must only promote when we know the argument is a WHNF
+#define PROMOTE(n)   ((wptr_t)(n))
+#define DEMOTE(n)    ((sptr_t)(n))
+
+#define FETCH_TAG(x)      RAW_GET_U16(IS_PTR(x) ? FETCH_MEM_TAG(x) : (what_t)(x))
+#define FETCH_RAW_TAG(x)  RAW_GET_U16(x)
+#define SET_RAW_TAG(x)    RAW_SET_16(x)
+#define FETCH_MEM_TAG(x)  (DNODEP(x)->what)
+#define SET_MEM_TAG(x,v)  (DNODEP(x)->what = (what_t)RAW_SET_16(v))
+
+wptr_t A_STD
+#if _JHC_GC == _JHC_GC_JGC
+eval(gc_t gc,sptr_t s);
+#else
+eval(sptr_t s)
+#endif
+
+#if _JHC_DEBUG
+wptr_t A_STD promote(sptr_t s);
+sptr_t A_STD demote(wptr_t s);
+#else
+#define promote(x) PROMOTE(x)
+#define demote(x) DEMOTE(x)
+#endif
+
+#endif
addfile ./rts/rts/profile.h
hunk ./rts/rts/profile.h 1
+#ifndef RTS_PROFILE_H
+#define RTS_PROFILE_H
+
+#include <stdio.h>
+#include "rts/cdefs.h"
+
+#ifndef JHC_VALGRIND
+#define JHC_VALGRIND 0
+#endif
+
+#ifndef JHC_MEM_ANNOTATE
+#define JHC_MEM_ANNOTATE 0
+#endif
+
+#ifndef _JHC_PROFILE
+#define _JHC_PROFILE 0
+#endif
+
+#if JHC_VALGRIND
+#include <valgrind/valgrind.h>
+#include <valgrind/memcheck.h>
+#else
+#define VALGRIND_MAKE_MEM_UNDEFINED(x,y) \
+    do { } while (0)
+#define VALGRIND_MAKE_MEM_DEFINED(x,y) \
+    do { } while (0)
+#define VALGRIND_MAKE_MEM_NOACCESS(x,y) \
+    do { } while (0)
+#define VALGRIND_PRINTF(...) \
+    do { } while (0)
+#endif
+
+void A_UNUSED profile_print_header(FILE *file, char *value_unit);
+void A_COLD jhc_print_profile(void);
+
+#if _JHC_PROFILE
+struct profile_stack;
+extern struct profile_stack gc_alloc_time;
+extern struct profile_stack gc_gc_time;
+void jhc_profile_push(struct profile_stack *ps);
+void jhc_profile_pop(struct profile_stack *ps);
+#define profile_push(x) jhc_profile_push(x)
+#define profile_pop(x)  jhc_profile_pop(x)
+#else
+#define profile_push(x)          do { } while(0)
+#define profile_pop(x)           do { } while(0)
+#define alloc_count(x,y)         do { } while(0)
+#define print_alloc_size_stats() do { } while(0)
+#endif
+
+#if JHC_STATUS > 1
+#define debugf(...) fprintf(stderr,__VA_ARGS__)
+#else
+#define debugf(...) do { } while (0)
+#endif
+
+#endif
hunk ./src/C/FromGrin2.hs 32
-import RtsFiles
hunk ./src/C/FromGrin2.hs 129
-        theData,
+        BS.fromString "#include \"jhc_rts_header.h\"\n",
hunk ./src/C/FromGrin2.hs 150
-    nh_stuff  = text "static const void * const nh_stuff[] = {" $$ fsep (punctuate (char ',') (cafnames ++ constnames ++ [text "NULL"]))  $$ text "};"
+    nh_stuff  = text "const void * const nh_stuff[] = {" $$ fsep (punctuate (char ',') (cafnames ++ constnames ++ [text "NULL"]))  $$ text "};"
hunk ./src/Grin/Main.hs 107
+           ("rts/jhc_rts.c",jhc_rts_c),
+           ("rts/jhc_rts.h",jhc_rts_h),
+           ("lib/lib_cbits.c",lib_cbits_c),
+           ("lib/lib_cbits.h",lib_cbits_h),
hunk ./src/Grin/Main.hs 114
+           ("jhc_rts_header.h",jhc_rts_header_h),
hunk ./src/Grin/Main.hs 117
+           ("rts/gc_jgc.h",gc_jgc_h),
hunk ./src/Grin/Main.hs 122
-    let cFiles = ["rts/profile.c","rts/rts_support.c", "rts/gc_none.c"]
-
+    let cFiles = ["rts/profile.c", "rts/rts_support.c", "rts/gc_none.c",
+                  "rts/jhc_rts.c", "lib/lib_cbits.c"]