diff options
author | bryce <bryce@138bc75d-0d04-0410-961f-82ee72b054a4> | 2000-04-19 10:10:01 +0000 |
---|---|---|
committer | bryce <bryce@138bc75d-0d04-0410-961f-82ee72b054a4> | 2000-04-19 10:10:01 +0000 |
commit | a3e9d271353f431ddf2ff7c1cc0fbc9d59cd1951 (patch) | |
tree | fec69f60b37ca7ee4a47582f914dabbc7b3ee0c4 /boehm-gc | |
parent | f13bf5f6901b9992d51e08626a54684e3f87b065 (diff) | |
download | gcc-a3e9d271353f431ddf2ff7c1cc0fbc9d59cd1951.tar.gz |
Imported version version 5.0alpha6.
* acinclude.m4: Bump version to 5.0a6.
* configure.in: Don't use alpha_mach_dep.s.
* include/private/config.h, irix_threads.c gc_watcom.asm: Delete
obsolete files.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@33251 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'boehm-gc')
44 files changed, 1480 insertions, 2161 deletions
diff --git a/boehm-gc/ChangeLog b/boehm-gc/ChangeLog index 26e0fae9fff..6831e8dfb44 100644 --- a/boehm-gc/ChangeLog +++ b/boehm-gc/ChangeLog @@ -1,3 +1,11 @@ +2000-04-19 Bryce McKinlay <bryce@albatross.co.nz> + + Imported version version 5.0alpha6. + * acinclude.m4: Bump version to 5.0a6. + * configure.in: Don't use alpha_mach_dep.s. + * include/private/config.h, irix_threads.c gc_watcom.asm: Delete + obsolete files. + 2000-03-26 Anthony Green <green@redhat.com> * misc.c (GC_enable): Always define GC_enable and GC_disable. @@ -105,7 +113,7 @@ Fri Jan 28 17:13:20 2000 Anthony Green <green@cygnus.com> Tue Aug 10 00:08:29 1999 Rainer Orth <ro@TechFak.Uni-Bielefeld.DE> - * gc_priv.h: Merged IRIX thread changes from + * gc_priv.h: IRIX thread changes from include/private/gc_priv.h. Mon Aug 9 18:33:38 1999 Rainer Orth <ro@TechFak.Uni-Bielefeld.DE> diff --git a/boehm-gc/README b/boehm-gc/README index 4461e303102..f800ee46322 100644 --- a/boehm-gc/README +++ b/boehm-gc/README @@ -15,7 +15,7 @@ Permission to modify the code and to distribute modified code is granted, provided the above notices are retained, and a notice that the code was modified is included with the above copyright notice. -This is version 5.0alpha4 of a conservative garbage collector for C and C++. +This is version 5.0alpha6 of a conservative garbage collector for C and C++. You might find a more recent version of this at @@ -1506,10 +1506,11 @@ Since 5.0 alpha3 Henderson and Roman Hodek. - Removed the tests for SGI_SOURCE in new_gc_alloc.h. This was causing that interface to fail on nonSGI platforms. - - Changed the Linux stack finding code to use /proc, after chnging it + - Changed the Linux stack finding code to use /proc, after changing it to use HEURISTIC1. (Thanks to David Mossberger for pointing out the /proc hook.) - Added HP/UX incremental GC support and HP/UX 11 thread support. + Thread support is currently still flakey. - Added basic Linux/IA64 support. - Integrated Anthony Green's PicoJava support. - Integrated Scott Ananian's StrongARM/NetBSD support. @@ -1527,6 +1528,58 @@ Since 5.0 alpha3 - GC_debug_free(0, ...) failed. Thanks to Fergus Henderson for the bug report and fix. +Since 5.0 alpha4 + - GC_malloc_explicitly_typed and friends sometimes failed to + initialize first word. + - Added allocation routines and support in the marker for mark descriptors + in a type structure referenced by the first word of an object. This was + introduced to support gcj, but hopefully in a way that makes it + generically useful. + - Added GC_requested_heapsize, and inhibited collections in nonincremental + mode if the actual used heap size is less than what was explicitly + requested. + - The Solaris pthreads version of GC_pthread_create didn't handle a NULL + attribute pointer. Solaris thread support used the wrong default thread + stack size. (Thanks to Melissa O'Neill for the patch.) + - Changed PUSH_CONTENTS macro to no longer modify first parameter. + This usually doesn't matter, but it was certainly an accident waiting + to happen ... + - Added GC_register_finalizer_no_order and friends to gc.h. They're + needed by Java implementations. + - Integrated a fix for a win32 deadlock resulting from clock() calling + malloc. (Thanks to Chris Dodd.) + - Integrated Hiroshi Kawashima's port to Linux/MIPS. This was designed + for a handheld platform, and may or may not be sufficient for other + machines. + - Fixed a va_arg problem with the %c specifier in cordprnt.c. It appears + that this was always broken, but recent versions of gcc are the first to + report the (statically detectable) bug. + - Added an attempt at a more general solution to dlopen races/deadlocks. + GC_dlopen now temporarily disables collection. Still not ideal, but ... + - Added -DUSE_I686_PREFETCH, -DUSE_3DNOW_PREFETCH, and support for IA64 + prefetch instructions. May improve performance measurably, but I'm not + sure the code will run correctly on processors that don't support the + instruction. Won't build except with very recent gcc. + - Added caching for header lookups in the marker. This seems to result + in a barely measurable performance gain. Added support for interleaved + lookups of two pointers, but unconfigured that since the performance + gain is currently near zero, and it adds to code size. + - Changed Linux DATA_START definition to check both data_start and + __data_start, since nothing else seems to be portable. + - Added -DUSE_LD_WRAP to optionally take advantage of the GNU ld function + wrapping mechanism. Probably currently useful only on Linux. + - Moved some variables for the scratch allocator into GC_arrays, on + Martin Hirzel's suggestion. + - Fixed a win32 threads bug that caused the collector to not look for + interior pointers from one of the thread stacks without + ALL_INTERIOR_POINTERS. (Thanks to Jeff Sturm.) + - Added Mingw32 support. (Thanks again to Jeff Sturm for the patch.) + - Changed the alpha port to use the generic register scanning code instead + of alpha_mach_dep.s. Alpha_mach_dep.s doesn't look for pointers in fp + registers, but gcc sometimes spills pointers there. (Thanks to Manuel Serrano + for helping me debug this by email.) Changed the IA64 code to do something + similar for similar reasons. + To do: - Very large root set sizes (> 16 MB or so) could cause the collector to abort with an unexpected mark stack overflow. (Thanks again to @@ -1543,3 +1596,7 @@ To do: - Incremental collector should handle large objects better. Currently, it looks like the whole object is treated as dirty if any part of it is. + - Cord/cordprnt.c doesn't build on a few platforms (notably PowerPC), since + we make some unwarranted assumptions about how varargs are handled. This + currently makes the cord-aware versions of printf unusable on some platforms. + Fixing this is unfortunately not trivial. diff --git a/boehm-gc/README.debugging b/boehm-gc/README.debugging index 80635c22301..f4dd65676aa 100644 --- a/boehm-gc/README.debugging +++ b/boehm-gc/README.debugging @@ -40,7 +40,8 @@ void * big_realloc(void *p, size_t new_size) 1) Consider using GC_malloc_atomic for objects containing nonpointers. This is especially important for large arrays containg compressed data, pseudo-random numbers, and the like. (This isn't all that likely to solve your problem, but it's a useful and easy optimization anyway, and this is a good time to try it.) If you allocate large objects containg only one or two pointers at the beginning, either try the typed allocation primitives is gc.h, or separate out the pointerfree component. 2) If you are using the collector in its default mode, with interior pointer recognition enabled, consider using GC_malloc_ignore_off_page to allocate large objects. (See gc.h and above for details. Large means > 100K in most environments.) 3) GC_print_block_list() will print a list of all currently allocated heap blocks and what size objects they contain. GC_print_hblkfreelist() will print a list of free heap blocks, and whether they are blacklisted. GC_dump calls both of these, and also prints information about heap sections, and root segments. -4) Write a tool that traces back references to the appropriate root. Send me the code. (I have code that does this for old PCR.) +4) Build the collector with -DKEEP_BACK_PTRS, and use the backptr.h +interface to determine why objects are being retained. ****If the collector appears to be losing objects: @@ -54,5 +55,14 @@ void * big_realloc(void *p, size_t new_size) 6) "print *GC_find_header(p)" in dbx or gdb will print the garbage collector block header information associated with the object p (e.g. object size, etc.) 7) GC_is_marked(p) determines whether p is the base address of a marked object. Note that objects allocated since the last collection should not be marked, and that unmarked objects are reclaimed incrementally. It's usually most interesting to set a breakpoint in GC_finish_collection and then to determine how much of the damaged data structure is marked at that point. 8) Look at the tracing facility in mark.c. (Ignore this suggestion unless you are very familiar with collector internals.) +9) [From Melissa O'Neill:] +If you're using multiple threads, double check that all thread +creation goes through the GC_ wrapper functions rather than +calling the thread-creation functions themselves (e.g., +GC_pthread_create rather than pthread_create). The gc.h header +file includes suitable preprocessor definitions to accomplish +this mapping transparently -- the question is: are you including +it in all the modules that create threads? + diff --git a/boehm-gc/README.linux b/boehm-gc/README.linux index b4f136afb5c..e35e712ef95 100644 --- a/boehm-gc/README.linux +++ b/boehm-gc/README.linux @@ -31,16 +31,28 @@ To use threads, you need to abide by the following requirements: 2) You must compile the collector with -DLINUX_THREADS and -D_REENTRANT specified in the Makefile. -3) Every file that makes thread calls should define LINUX_THREADS and +3a) Every file that makes thread calls should define LINUX_THREADS and _REENTRANT and then include gc.h. Gc.h redefines some of the pthread primitives as macros which also provide the collector with information it requires. -4) Currently dlopen() is probably not safe. The collector must traverse - the list of libraries maintained by the runtime loader. That can - probably be an inconsistent state when a thread calling the loader is - is stopped for GC. (It's possible that this is fixable in the - same way it is handled for SOLARIS_THREADS, with GC_dlopen.) +3b) A new alternative to (3a) is to build the collector with + -DUSE_LD_WRAP, and to link the final program with + + (for ld) --wrap read --wrap dlopen --wrap pthread_create \ + --wrap pthread_join --wrap pthread_sigmask + + (for gcc) -Wl,--wrap -Wl,read -Wl,--wrap -Wl,dlopen -Wl,--wrap \ + -Wl,pthread_create -Wl,--wrap -Wl,pthread_join -Wl,--wrap \ + -Wl,pthread_sigmask + + In any case, _REENTRANT should be defined during compilation. + +4) Dlopen() disables collection during its execution. (It can't run + concurrently with the collector, since the collector looks at its + data structures. It can't acquire the allocator lock, since arbitrary + user startup code may run as part of dlopen().) Under unusual + conditions, this may cause unexpected heap growth. 5) The combination of LINUX_THREADS, REDIRECT_MALLOC, and incremental collection fails in seemingly random places. This hasn't been tracked @@ -48,3 +60,9 @@ To use threads, you need to abide by the following requirements: uses malloc, and thus can presumably get SIGSEGVs while inside the package. There is no real guarantee that signals are handled properly at that point. + +6) Thread local storage may not be viewed as part of the root set by the + collector. This probably depends on the linuxthreads version. For the + time being, any collectable memory referenced by thread local storage should + also be referenced from elsewhere, or be allocated as uncollectable. + (This is really a bug that should be fixed somehow.) diff --git a/boehm-gc/README.solaris2 b/boehm-gc/README.solaris2 index e5935131c77..cb15e30a19c 100644 --- a/boehm-gc/README.solaris2 +++ b/boehm-gc/README.solaris2 @@ -37,13 +37,10 @@ a thread stack. If you know that you will only be running Solaris2.5 or later, it should be possible to fix this by compiling the collector with -DSOLARIS23_MPROTECT_BUG_FIXED. -Jeremy Fitzhardinge points out that there is a problem with the dlopen -replacement, in that startup code in the library is run while the allocation -lock is held. This appears to be difficult to fix, since the collector does -look at data structures maintained by dlopen, and hence some locking is needed -around the dlopen call. Defining USE_PROC_FOR_LIBRARIES will get address -space layout information from /proc avoiding the dlopen lock. But this has -other disadvanatages, e.g. mmapped files may be scanned. +Since 5.0 alpha5, dlopen disables collection temporarily, +unless USE_PROC_FOR_LIBRARIES is defined. In some unlikely cases, this +can result in unpleasant heap growth. But it seems better than the +race/deadlock issues we had before. If solaris_threads are used on an X86 processor with malloc redirected to GC_malloc, it is necessary to call GC_thr_init explicitly before forking the diff --git a/boehm-gc/acinclude.m4 b/boehm-gc/acinclude.m4 index 916579ad820..e413c6ab9df 100644 --- a/boehm-gc/acinclude.m4 +++ b/boehm-gc/acinclude.m4 @@ -31,7 +31,7 @@ AC_SUBST(boehm_gc_basedir) AC_CANONICAL_HOST -AM_INIT_AUTOMAKE(boehm-gc, 5.0a4, no-define) +AM_INIT_AUTOMAKE(boehm-gc, 5.0a6, no-define) # FIXME: We temporarily define our own version of AC_PROG_CC. This is # copied from autoconf 2.12, but does not call AC_PROG_CC_WORKS. We diff --git a/boehm-gc/aclocal.m4 b/boehm-gc/aclocal.m4 index a83cfba58e0..6510bcd3133 100644 --- a/boehm-gc/aclocal.m4 +++ b/boehm-gc/aclocal.m4 @@ -43,7 +43,7 @@ AC_SUBST(boehm_gc_basedir) AC_CANONICAL_HOST -AM_INIT_AUTOMAKE(boehm-gc, 5.0a4, no-define) +AM_INIT_AUTOMAKE(boehm-gc, 5.0a6, no-define) # FIXME: We temporarily define our own version of AC_PROG_CC. This is # copied from autoconf 2.12, but does not call AC_PROG_CC_WORKS. We diff --git a/boehm-gc/allchblk.c b/boehm-gc/allchblk.c index 189b94214a7..1505f8e2c71 100644 --- a/boehm-gc/allchblk.c +++ b/boehm-gc/allchblk.c @@ -19,6 +19,7 @@ #include <stdio.h> #include "gc_priv.h" +GC_bool GC_use_entire_heap = 0; /* * Free heap blocks are kept on one of several free lists, @@ -229,11 +230,15 @@ int n; GC_ASSERT(HDR(GC_hblkfreelist[index]) == hhdr); GC_hblkfreelist[index] = hhdr -> hb_next; } else { - PHDR(hhdr) -> hb_next = hhdr -> hb_next; + hdr *phdr; + GET_HDR(hhdr -> hb_prev, phdr); + phdr -> hb_next = hhdr -> hb_next; } if (0 != hhdr -> hb_next) { + hdr * nhdr; GC_ASSERT(!IS_FORWARDING_ADDR_OR_NIL(NHDR(hhdr))); - NHDR(hhdr) -> hb_prev = hhdr -> hb_prev; + GET_HDR(hhdr -> hb_next, nhdr); + nhdr -> hb_prev = hhdr -> hb_prev; } } @@ -244,13 +249,20 @@ struct hblk * GC_free_block_ending_at(h) struct hblk *h; { struct hblk * p = h - 1; - hdr * phdr = HDR(p); + hdr * phdr; + GET_HDR(p, phdr); while (0 != phdr && IS_FORWARDING_ADDR_OR_NIL(phdr)) { p = FORWARDED_ADDR(p,phdr); phdr = HDR(p); } - if (0 != phdr && HBLK_IS_FREE(phdr)) return p; + if (0 != phdr) { + if(HBLK_IS_FREE(phdr)) { + return p; + } else { + return 0; + } + } p = GC_prev_block(h - 1); if (0 != p) { phdr = HDR(p); @@ -271,6 +283,7 @@ hdr * hhdr; { int index = GC_hblk_fl_from_blocks(divHBLKSZ(hhdr -> hb_sz)); struct hblk *second = GC_hblkfreelist[index]; + hdr * second_hdr; # ifdef GC_ASSERTIONS struct hblk *next = (struct hblk *)((word)h + hhdr -> hb_sz); hdr * nexthdr = HDR(next); @@ -283,7 +296,10 @@ hdr * hhdr; GC_hblkfreelist[index] = h; hhdr -> hb_next = second; hhdr -> hb_prev = 0; - if (0 != second) HDR(second) -> hb_prev = h; + if (0 != second) { + GET_HDR(second, second_hdr); + second_hdr -> hb_prev = h; + } GC_invalidate_map(hhdr); } @@ -330,10 +346,10 @@ void GC_merge_unmapped(void) for (i = 0; i <= N_HBLK_FLS; ++i) { h = GC_hblkfreelist[i]; while (h != 0) { - hhdr = HDR(h); + GET_HDR(h, hhdr); size = hhdr->hb_sz; next = (struct hblk *)((word)h + size); - nexthdr = HDR(next); + GET_HDR(next, nexthdr); /* Coalesce with successor, if possible */ if (0 != nexthdr && HBLK_IS_FREE(nexthdr)) { nextsize = nexthdr -> hb_sz; @@ -398,8 +414,8 @@ int index; GC_remove_from_fl(hhdr, index); if (total_size == bytes) return h; rest = (struct hblk *)((word)h + bytes); - if (!GC_install_header(rest)) return(0); - rest_hdr = HDR(rest); + rest_hdr = GC_install_header(rest); + if (0 == rest_hdr) return(0); rest_hdr -> hb_sz = total_size - bytes; rest_hdr -> hb_flags = 0; # ifdef GC_ASSERTIONS @@ -506,16 +522,17 @@ int n; /* search for a big enough block in free list */ hbp = GC_hblkfreelist[n]; - hhdr = HDR(hbp); - for(; 0 != hbp; hbp = hhdr -> hb_next, hhdr = HDR(hbp)) { + for(; 0 != hbp; hbp = hhdr -> hb_next) { + GET_HDR(hbp, hhdr); size_avail = hhdr->hb_sz; if (size_avail < size_needed) continue; -# ifdef PRESERVE_LAST + if (!GC_use_entire_heap) { if (size_avail != size_needed + && USED_HEAP_SIZE >= GC_requested_heapsize && !GC_incremental && GC_should_collect()) { continue; } -# endif + } /* If the next heap block is obviously better, go on. */ /* This prevents us from disassembling a single large block */ /* to get tiny blocks. */ @@ -524,7 +541,7 @@ int n; thishbp = hhdr -> hb_next; if (thishbp != 0) { - thishdr = HDR(thishbp); + GET_HDR(thishbp, thishdr); next_size = (signed_word)(thishdr -> hb_sz); if (next_size < size_avail && next_size >= size_needed @@ -551,7 +568,8 @@ int n; size_avail -= (ptr_t)lasthbp - (ptr_t)hbp; thishbp = lasthbp; if (size_avail >= size_needed) { - if (thishbp != hbp && GC_install_header(thishbp)) { + if (thishbp != hbp && + 0 != (thishdr = GC_install_header(thishbp))) { /* Make sure it's mapped before we mangle it. */ # ifdef USE_MUNMAP if (!IS_MAPPED(hhdr)) { @@ -560,7 +578,6 @@ int n; } # endif /* Split the block at thishbp */ - thishdr = HDR(thishbp); GC_split_block(hbp, hhdr, thishbp, thishdr, n); /* Advance to thishbp */ hbp = thishbp; @@ -598,8 +615,7 @@ int n; GC_large_free_bytes -= total_size; GC_remove_from_fl(hhdr, n); for (h = hbp; h < limit; h++) { - if (h == hbp || GC_install_header(h)) { - hhdr = HDR(h); + if (h == hbp || 0 != (hhdr = GC_install_header(h))) { (void) setup_header( hhdr, BYTES_TO_WORDS(HBLKSIZE - HDR_BYTES), @@ -686,7 +702,7 @@ hdr *hhdr, *prevhdr, *nexthdr; signed_word size; - hhdr = HDR(hbp); + GET_HDR(hbp, hhdr); size = hhdr->hb_sz; size = HBLKSIZE * OBJ_SZ_TO_BLOCKS(size); GC_remove_counts(hbp, (word)size); @@ -701,7 +717,7 @@ signed_word size; GC_ASSERT(IS_MAPPED(hhdr)); GC_invalidate_map(hhdr); next = (struct hblk *)((word)hbp + size); - nexthdr = HDR(next); + GET_HDR(next, nexthdr); prev = GC_free_block_ending_at(hbp); /* Coalesce with successor, if possible */ if(0 != nexthdr && HBLK_IS_FREE(nexthdr) && IS_MAPPED(nexthdr)) { diff --git a/boehm-gc/alloc.c b/boehm-gc/alloc.c index 3d0ddf05b36..7b923885b96 100644 --- a/boehm-gc/alloc.c +++ b/boehm-gc/alloc.c @@ -70,8 +70,6 @@ int GC_full_freq = 19; /* Every 20th collection is a full */ GC_bool GC_need_full_gc = FALSE; /* Need full GC do to heap growth. */ -#define USED_HEAP_SIZE (GC_heapsize - GC_large_free_bytes) - word GC_used_heap_size_after_full = 0; char * GC_copyright[] = @@ -655,7 +653,8 @@ word bytes; if (GC_n_heap_sects >= MAX_HEAP_SECTS) { ABORT("Too many heap sections: Increase MAXHINCR or MAX_HEAP_SECTS"); } - if (!GC_install_header(p)) { + phdr = GC_install_header(p); + if (0 == phdr) { /* This is extremely unlikely. Can't add it. This will */ /* almost certainly result in a 0 return from the allocator, */ /* which is entirely appropriate. */ @@ -665,7 +664,6 @@ word bytes; GC_heap_sects[GC_n_heap_sects].hs_bytes = bytes; GC_n_heap_sects++; words = BYTES_TO_WORDS(bytes - HDR_BYTES); - phdr = HDR(p); phdr -> hb_sz = words; phdr -> hb_map = (char *)1; /* A value != GC_invalid_map */ phdr -> hb_flags = 0; @@ -814,6 +812,7 @@ word n; LOCK(); if (!GC_is_initialized) GC_init_inner(); result = (int)GC_expand_hp_inner(divHBLKSZ((word)bytes)); + if (result) GC_requested_heapsize += bytes; UNLOCK(); ENABLE_SIGNALS(); return(result); @@ -827,7 +826,8 @@ GC_bool GC_collect_or_expand(needed_blocks, ignore_off_page) word needed_blocks; GC_bool ignore_off_page; { - if (!GC_incremental && !GC_dont_gc && GC_should_collect()) { + if (!GC_incremental && !GC_dont_gc && + (GC_dont_expand && GC_words_allocd > 0 || GC_should_collect())) { GC_notify_full_gc(); GC_gcollect_inner(); } else { diff --git a/boehm-gc/alpha_mach_dep.s b/boehm-gc/alpha_mach_dep.s index eed3998914a..124de696601 100644 --- a/boehm-gc/alpha_mach_dep.s +++ b/boehm-gc/alpha_mach_dep.s @@ -1,4 +1,8 @@ +# This is BROKEN on a 21264 running gcc, and probably in other cases. +# The compiler may spill pointers to fp registers, and this code doesn't +# scan those. + # define call_push(x) \ lda $16, 0(x); /* copy x to first argument register */ \ jsr $26, GC_push_one; /* call GC_push_one, ret addr in $26 */ \ diff --git a/boehm-gc/configure b/boehm-gc/configure index f26998170bb..b63d4e8aa86 100755 --- a/boehm-gc/configure +++ b/boehm-gc/configure @@ -2168,9 +2168,11 @@ esac machdep= case "$host" in - alpha*-*-*) - machdep="alpha_mach_dep.lo" - ;; +# alpha_mach_dep.s assumes that pointers are not saved in fp registers. +# Gcc on a 21264 can spill pointers to fp registers. Oops. +# alpha*-*-*) +# machdep="alpha_mach_dep.lo" +# ;; mipstx39-*-elf*) machdep="mips_ultrix_mach_dep.lo" cat >> confdefs.h <<\EOF diff --git a/boehm-gc/configure.in b/boehm-gc/configure.in index f3e875ac3d9..5d5e25d0333 100644 --- a/boehm-gc/configure.in +++ b/boehm-gc/configure.in @@ -134,9 +134,11 @@ AC_SUBST(CXXINCLUDES) machdep= case "$host" in - alpha*-*-*) - machdep="alpha_mach_dep.lo" - ;; +# alpha_mach_dep.s assumes that pointers are not saved in fp registers. +# Gcc on a 21264 can spill pointers to fp registers. Oops. +# alpha*-*-*) +# machdep="alpha_mach_dep.lo" +# ;; mipstx39-*-elf*) machdep="mips_ultrix_mach_dep.lo" AC_DEFINE(STACKBASE, __stackbase) diff --git a/boehm-gc/dbg_mlc.c b/boehm-gc/dbg_mlc.c index cf6514b2836..776dc3f6ee8 100644 --- a/boehm-gc/dbg_mlc.c +++ b/boehm-gc/dbg_mlc.c @@ -2,6 +2,7 @@ * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers * Copyright (c) 1991-1995 by Xerox Corporation. All rights reserved. * Copyright (c) 1997 by Silicon Graphics. All rights reserved. + * Copyright (c) 1999 by Hewlett-Packard Company. All rights reserved. * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. @@ -12,64 +13,14 @@ * provided the above notices are retained, and a notice that the code was * modified is included with the above copyright notice. */ -# define I_HIDE_POINTERS -# include "gc_priv.h" -# ifdef KEEP_BACK_PTRS -# include "backptr.h" -# endif + +#include "dbg_mlc.h" void GC_default_print_heap_obj_proc(); GC_API void GC_register_finalizer_no_order GC_PROTO((GC_PTR obj, GC_finalization_proc fn, GC_PTR cd, GC_finalization_proc *ofn, GC_PTR *ocd)); -/* Do we want to and know how to save the call stack at the time of */ -/* an allocation? How much space do we want to use in each object? */ - -# define START_FLAG ((word)0xfedcedcb) -# define END_FLAG ((word)0xbcdecdef) - /* Stored both one past the end of user object, and one before */ - /* the end of the object as seen by the allocator. */ - - -/* Object header */ -typedef struct { -# ifdef KEEP_BACK_PTRS - ptr_t oh_back_ptr; -# define MARKED_FOR_FINALIZATION (ptr_t)(-1) - /* Object was marked because it is finalizable. */ -# ifdef ALIGN_DOUBLE - word oh_dummy; -# endif -# endif - char * oh_string; /* object descriptor string */ - word oh_int; /* object descriptor integers */ -# ifdef NEED_CALLINFO - struct callinfo oh_ci[NFRAMES]; -# endif - word oh_sz; /* Original malloc arg. */ - word oh_sf; /* start flag */ -} oh; -/* The size of the above structure is assumed not to dealign things, */ -/* and to be a multiple of the word length. */ - -#define DEBUG_BYTES (sizeof (oh) + sizeof (word)) -#undef ROUNDED_UP_WORDS -#define ROUNDED_UP_WORDS(n) BYTES_TO_WORDS((n) + WORDS_TO_BYTES(1) - 1) - - -#ifdef SAVE_CALL_CHAIN -# define ADD_CALL_CHAIN(base, ra) GC_save_callers(((oh *)(base)) -> oh_ci) -# define PRINT_CALL_CHAIN(base) GC_print_callers(((oh *)(base)) -> oh_ci) -#else -# ifdef GC_ADD_CALLER -# define ADD_CALL_CHAIN(base, ra) ((oh *)(base)) -> oh_ci[0].ci_pc = (ra) -# define PRINT_CALL_CHAIN(base) GC_print_callers(((oh *)(base)) -> oh_ci) -# else -# define ADD_CALL_CHAIN(base, ra) -# define PRINT_CALL_CHAIN(base) -# endif -#endif /* Check whether object with base pointer p has debugging info */ /* p is assumed to point to a legitimate object in our part */ @@ -116,7 +67,7 @@ ptr_t p; /* Store information about the object referencing dest in *base_p */ /* and *offset_p. */ - /* source is root ==> *base_p = 0, *offset_p = address */ + /* source is root ==> *base_p = address, *offset_p = 0 */ /* source is heap object ==> *base_p != 0, *offset_p = offset */ /* Returns 1 on success, 0 if source couldn't be determined. */ /* Dest can be any address within a heap object. */ @@ -128,6 +79,7 @@ ptr_t p; if (!GC_has_debug_info((ptr_t) hdr)) return GC_NO_SPACE; bp = hdr -> oh_back_ptr; if (MARKED_FOR_FINALIZATION == bp) return GC_FINALIZER_REFD; + if (MARKED_FROM_REGISTER == bp) return GC_REFD_FROM_REG; if (0 == bp) return GC_UNREFERENCED; bp = REVEAL_POINTER(bp); bp_base = GC_base(bp); @@ -177,18 +129,15 @@ ptr_t p; } } - /* Force a garbage collection and generate a backtrace from a */ - /* random heap address. */ - void GC_generate_random_backtrace(void) + /* Print back trace for p */ + void GC_print_backtrace(void *p) { - void * current; + void *current = p; int i; - void * base; - size_t offset; GC_ref_kind source; - GC_gcollect(); - current = GC_generate_random_valid_address(); - GC_printf1("Chose address 0x%lx in object\n", (unsigned long)current); + size_t offset; + void *base; + GC_print_heap_obj(GC_base(current)); GC_err_printf0("\n"); for (i = 0; ; ++i) { @@ -207,6 +156,9 @@ ptr_t p; case GC_REFD_FROM_ROOT: GC_err_printf1("root at 0x%lx\n", (unsigned long)base); goto out; + case GC_REFD_FROM_REG: + GC_err_printf0("root in register\n"); + goto out; case GC_FINALIZER_REFD: GC_err_printf0("list of finalizable objects\n"); goto out; @@ -221,6 +173,17 @@ ptr_t p; } out:; } + + /* Force a garbage collection and generate a backtrace from a */ + /* random heap address. */ + void GC_generate_random_backtrace(void) + { + void * current; + GC_gcollect(); + current = GC_generate_random_valid_address(); + GC_printf1("Chose address 0x%lx in object\n", (unsigned long)current); + GC_print_backtrace(current); + } #endif /* KEEP_BACK_PTRS */ @@ -342,16 +305,8 @@ void GC_start_debugging() GC_register_displacement((word)sizeof(oh) + offset); } -# ifdef GC_ADD_CALLER -# define EXTRA_ARGS word ra, CONST char * s, int i -# define OPT_RA ra, -# else -# define EXTRA_ARGS CONST char * s, int i -# define OPT_RA -# endif - # ifdef __STDC__ - GC_PTR GC_debug_malloc(size_t lb, EXTRA_ARGS) + GC_PTR GC_debug_malloc(size_t lb, GC_EXTRA_PARAMS) # else GC_PTR GC_debug_malloc(lb, s, i) size_t lb; @@ -379,7 +334,7 @@ void GC_start_debugging() } # ifdef __STDC__ - GC_PTR GC_debug_generic_malloc(size_t lb, int k, EXTRA_ARGS) + GC_PTR GC_debug_generic_malloc(size_t lb, int k, GC_EXTRA_PARAMS) # else GC_PTR GC_debug_malloc(lb, k, s, i) size_t lb; @@ -409,7 +364,7 @@ void GC_start_debugging() #ifdef STUBBORN_ALLOC # ifdef __STDC__ - GC_PTR GC_debug_malloc_stubborn(size_t lb, EXTRA_ARGS) + GC_PTR GC_debug_malloc_stubborn(size_t lb, GC_EXTRA_PARAMS) # else GC_PTR GC_debug_malloc_stubborn(lb, s, i) size_t lb; @@ -476,7 +431,7 @@ GC_PTR p; #endif /* STUBBORN_ALLOC */ # ifdef __STDC__ - GC_PTR GC_debug_malloc_atomic(size_t lb, EXTRA_ARGS) + GC_PTR GC_debug_malloc_atomic(size_t lb, GC_EXTRA_PARAMS) # else GC_PTR GC_debug_malloc_atomic(lb, s, i) size_t lb; @@ -501,7 +456,7 @@ GC_PTR p; } # ifdef __STDC__ - GC_PTR GC_debug_malloc_uncollectable(size_t lb, EXTRA_ARGS) + GC_PTR GC_debug_malloc_uncollectable(size_t lb, GC_EXTRA_PARAMS) # else GC_PTR GC_debug_malloc_uncollectable(lb, s, i) size_t lb; @@ -527,7 +482,7 @@ GC_PTR p; #ifdef ATOMIC_UNCOLLECTABLE # ifdef __STDC__ - GC_PTR GC_debug_malloc_atomic_uncollectable(size_t lb, EXTRA_ARGS) + GC_PTR GC_debug_malloc_atomic_uncollectable(size_t lb, GC_EXTRA_PARAMS) # else GC_PTR GC_debug_malloc_atomic_uncollectable(lb, s, i) size_t lb; @@ -607,7 +562,7 @@ GC_PTR p; } # ifdef __STDC__ - GC_PTR GC_debug_realloc(GC_PTR p, size_t lb, EXTRA_ARGS) + GC_PTR GC_debug_realloc(GC_PTR p, size_t lb, GC_EXTRA_PARAMS) # else GC_PTR GC_debug_realloc(p, lb, s, i) GC_PTR p; @@ -810,7 +765,7 @@ struct closure { GC_PTR cd, GC_finalization_proc *ofn, GC_PTR *ocd) # else - void GC_debug_register_finalizer_no_order + void GC_debug_register_finalizer_ignore_self (obj, fn, cd, ofn, ocd) GC_PTR obj; GC_finalization_proc fn; @@ -822,9 +777,9 @@ struct closure { ptr_t base = GC_base(obj); if (0 == base || (ptr_t)obj - base != sizeof(oh)) { GC_err_printf1( - "GC_register_finalizer_no_order called with non-base-pointer 0x%lx\n", + "GC_register_finalizer_ignore_self called with non-base-pointer 0x%lx\n", obj); } - GC_register_finalizer_no_order(base, GC_debug_invoke_finalizer, + GC_register_finalizer_ignore_self(base, GC_debug_invoke_finalizer, GC_make_closure(fn,cd), ofn, ocd); } diff --git a/boehm-gc/dyn_load.c b/boehm-gc/dyn_load.c index f44726bc833..8d00346c790 100644 --- a/boehm-gc/dyn_load.c +++ b/boehm-gc/dyn_load.c @@ -32,7 +32,9 @@ #include "gc_priv.h" /* BTL: avoid circular redefinition of dlopen if SOLARIS_THREADS defined */ -# if (defined(SOLARIS_THREADS) || defined(LINUX_THREADS)) && defined(dlopen) +# if (defined(LINUX_THREADS) || defined(SOLARIS_THREADS) \ + || defined(HPUX_THREADS) || defined(IRIX_THREADS)) && defined(dlopen) \ + && !defined(USE_LD_WRAP) /* To support threads in Solaris, gc.h interposes on dlopen by */ /* defining "dlopen" to be "GC_dlopen", which is implemented below. */ /* However, both GC_FirstDLOpenedLinkMap() and GC_dlopen() use the */ @@ -159,37 +161,77 @@ static ptr_t GC_first_common() #endif /* SUNOS4 ... */ -# if defined(SUNOS4) || defined(SUNOS5DL) -/* Add dynamic library data sections to the root set. */ -# if !defined(PCR) && !defined(SOLARIS_THREADS) && defined(THREADS) -# ifndef SRC_M3 - --> fix mutual exclusion with dlopen -# endif /* We assume M3 programs don't call dlopen for now */ -# endif +# if defined(LINUX_THREADS) || defined(SOLARIS_THREADS) \ + || defined(HPUX_THREADS) || defined(IRIX_THREADS) + /* Make sure we're not in the middle of a collection, and make */ + /* sure we don't start any. Returns previous value of GC_dont_gc. */ + /* This is invoked prior to a dlopen call to avoid synchronization */ + /* issues. We can't just acquire the allocation lock, since startup */ + /* code in dlopen may try to allocate. */ + /* This solution risks heap growth in the presence of many dlopen */ + /* calls in either a multithreaded environment, or if the library */ + /* initialization code allocates substantial amounts of GC'ed memory. */ + /* But I don't know of a better solution. */ + /* This can still deadlock if the client explicitly starts a GC */ + /* during the dlopen. He shouldn't do that. */ + static GC_bool disable_gc_for_dlopen() + { + GC_bool result; + LOCK(); + result = GC_dont_gc; + while (GC_incremental && GC_collection_in_progress()) { + GC_collect_a_little_inner(1000); + } + GC_dont_gc = TRUE; + UNLOCK(); + return(result); + } -# ifdef SOLARIS_THREADS /* Redefine dlopen to guarantee mutual exclusion with */ /* GC_register_dynamic_libraries. */ - /* assumes that dlopen doesn't need to call GC_malloc */ - /* and friends. */ -# include <thread.h> -# include <synch.h> + /* Should probably happen for other operating systems, too. */ -void * GC_dlopen(const char *path, int mode) +#include <dlfcn.h> + +#ifdef USE_LD_WRAP + void * __wrap_dlopen(const char *path, int mode) +#else + void * GC_dlopen(path, mode) + GC_CONST char * path; + int mode; +#endif { void * result; + GC_bool dont_gc_save; # ifndef USE_PROC_FOR_LIBRARIES - mutex_lock(&GC_allocate_ml); + dont_gc_save = disable_gc_for_dlopen(); +# endif +# ifdef USE_LD_WRAP + result = __real_dlopen(path, mode); +# else + result = dlopen(path, mode); # endif - result = dlopen(path, mode); # ifndef USE_PROC_FOR_LIBRARIES - mutex_unlock(&GC_allocate_ml); + GC_dont_gc = dont_gc_save; # endif return(result); } # endif /* SOLARIS_THREADS */ +/* BTL: added to fix circular dlopen definition if SOLARIS_THREADS defined */ +# if defined(GC_must_restore_redefined_dlopen) +# define dlopen GC_dlopen +# endif + +# if defined(SUNOS4) || defined(SUNOS5DL) +/* Add dynamic library data sections to the root set. */ +# if !defined(PCR) && !defined(SOLARIS_THREADS) && defined(THREADS) +# ifndef SRC_M3 + --> fix mutual exclusion with dlopen +# endif /* We assume M3 programs don't call dlopen for now */ +# endif + # ifndef USE_PROC_FOR_LIBRARIES void GC_register_dynamic_libraries() { @@ -255,25 +297,6 @@ void GC_register_dynamic_libraries() # endif /* !USE_PROC ... */ # endif /* SUNOS */ -#ifdef LINUX_THREADS -#include <dlfcn.h> - -void * GC_dlopen(const char *path, int mode) -{ - void * result; - - LOCK(); - result = dlopen(path, mode); - UNLOCK(); - return(result); -} -#endif /* LINUX_THREADS */ - -/* BTL: added to fix circular dlopen definition if SOLARIS_THREADS defined */ -#if defined(GC_must_restore_redefined_dlopen) -# define dlopen GC_dlopen -#endif - #if defined(LINUX) && defined(__ELF__) || defined(SCO_ELF) /* Dynamic loading code for Linux running ELF. Somewhat tested on diff --git a/boehm-gc/finalize.c b/boehm-gc/finalize.c index 2ee927fe432..1ab56cee82e 100644 --- a/boehm-gc/finalize.c +++ b/boehm-gc/finalize.c @@ -694,6 +694,14 @@ GC_API void GC_finalize_all() } #endif +/* Returns true if it is worth calling GC_invoke_finalizers. (Useful if */ +/* finalizers can only be called from some kind of `safe state' and */ +/* getting into that safe state is expensive.) */ +int GC_should_invoke_finalizers GC_PROTO((void)) +{ + return GC_finalize_now != 0; +} + /* Invoke finalizers for all objects that are ready to be finalized. */ /* Should be called without allocation lock. */ int GC_invoke_finalizers() diff --git a/boehm-gc/gc.h b/boehm-gc/gc.h index bd7fddf4391..e35f54f7d3f 100644 --- a/boehm-gc/gc.h +++ b/boehm-gc/gc.h @@ -1,7 +1,8 @@ /* * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers * Copyright (c) 1991-1995 by Xerox Corporation. All rights reserved. - * Copyright 1996 by Silicon Graphics. All rights reserved. + * Copyright 1996-1999 by Silicon Graphics. All rights reserved. + * Copyright 1999 by Hewlett-Packard Company. All rights reserved. * * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. @@ -35,6 +36,14 @@ #include "libgc_globals.h" #endif +#if defined(__MINGW32__) && defined(WIN32_THREADS) +# ifdef GC_BUILD +# define GC_API __declspec(dllexport) +# else +# define GC_API __declspec(dllimport) +# endif +#endif + #if defined(_MSC_VER) && defined(_DLL) # ifdef GC_BUILD # define GC_API __declspec(dllexport) @@ -130,6 +139,17 @@ GC_API int GC_dont_expand; /* Dont expand heap unless explicitly requested */ /* or forced to. */ +GC_API int GC_use_entire_heap; + /* Causes the nonincremental collector to use the */ + /* entire heap before collecting. This was the only */ + /* option for GC versions < 5.0. This sometimes */ + /* results in more large block fragmentation, since */ + /* very larg blocks will tend to get broken up */ + /* during each GC cycle. It is likely to result in a */ + /* larger working set, but lower collection */ + /* frequencies, and hence fewer instructions executed */ + /* in the collector. */ + GC_API int GC_full_freq; /* Number of partial collections between */ /* full collections. Matters only if */ /* GC_incremental is set. */ @@ -352,11 +372,11 @@ GC_API GC_PTR GC_malloc_atomic_ignore_off_page GC_PROTO((size_t lb)); #ifdef GC_ADD_CALLER # define GC_EXTRAS GC_RETURN_ADDR, __FILE__, __LINE__ -# define GC_EXTRA_PARAMS GC_word ra, GC_CONST char * descr_string, - int descr_int +# define GC_EXTRA_PARAMS GC_word ra, GC_CONST char * s, + int i #else # define GC_EXTRAS __FILE__, __LINE__ -# define GC_EXTRA_PARAMS GC_CONST char * descr_string, int descr_int +# define GC_EXTRA_PARAMS GC_CONST char * s, int i #endif /* Debugging (annotated) allocation. GC_gcollect will check */ @@ -494,6 +514,7 @@ GC_API void GC_debug_register_finalizer_no_order GC_PROTO((GC_PTR obj, GC_finalization_proc fn, GC_PTR cd, GC_finalization_proc *ofn, GC_PTR *ocd)); + /* The following routine may be used to break cycles between */ /* finalizable objects, thus causing cyclic finalizable */ /* objects to be finalized in the correct order. Standard */ @@ -550,6 +571,9 @@ GC_API int GC_unregister_disappearing_link GC_PROTO((GC_PTR * /* link */)); GC_API GC_PTR GC_make_closure GC_PROTO((GC_finalization_proc fn, GC_PTR data)); GC_API void GC_debug_invoke_finalizer GC_PROTO((GC_PTR obj, GC_PTR data)); +/* Returns !=0 if GC_invoke_finalizers has something to do. */ +GC_API int GC_should_invoke_finalizers GC_PROTO((void)); + GC_API int GC_invoke_finalizers GC_PROTO((void)); /* Run finalizers for all objects that are ready to */ /* be finalized. Return the number of finalizers */ @@ -712,12 +736,9 @@ GC_API void (*GC_is_visible_print_proc) # endif /* SOLARIS_THREADS */ -#if defined(LINUX_THREADS) - void * GC_dlopen(const char *path, int mode); -# define dlopen GC_dlopen -#endif -#if defined(IRIX_THREADS) || defined(LINUX_THREADS) || defined(HPUX_THREADS) +#if !defined(USE_LD_WRAP) && \ + (defined(IRIX_THREADS) || defined(LINUX_THREADS) || defined(HPUX_THREADS)) /* We treat these similarly. */ # include <pthread.h> # include <signal.h> @@ -731,8 +752,9 @@ GC_API void (*GC_is_visible_print_proc) # define pthread_create GC_pthread_create # define pthread_sigmask GC_pthread_sigmask # define pthread_join GC_pthread_join +# define dlopen GC_dlopen -#endif /* IRIX_THREADS || LINUX_THREADS */ +#endif /* xxxxx_THREADS */ # if defined(PCR) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || \ defined(IRIX_THREADS) || defined(LINUX_THREADS) || \ diff --git a/boehm-gc/gc_cpp.h b/boehm-gc/gc_cpp.h index ad7df5d71fa..36013e135b9 100644 --- a/boehm-gc/gc_cpp.h +++ b/boehm-gc/gc_cpp.h @@ -16,12 +16,11 @@ the code was modified is included with the above copyright notice. C++ Interface to the Boehm Collector John R. Ellis and Jesse Hull - Last modified on Mon Jul 24 15:43:42 PDT 1995 by ellis This interface provides access to the Boehm collector. It provides basic facilities similar to those described in "Safe, Efficient Garbage Collection for C++", by John R. Elis and David L. Detlefs -(ftp.parc.xerox.com:/pub/ellis/gc). +(ftp://ftp.parc.xerox.com/pub/ellis/gc). All heap-allocated objects are either "collectable" or "uncollectable". Programs must explicitly delete uncollectable @@ -38,7 +37,7 @@ Objects derived from class "gc" are collectable. For example: A* a = new A; // a is collectable. Collectable instances of non-class types can be allocated using the GC -placement: +(or UseGC) placement: typedef int A[ 10 ]; A* a = new (GC) A; @@ -124,6 +123,12 @@ invoked using the ANSI-conforming syntax t->~T(). If you're using cfront 3.0, you'll have to comment out the class gc_cleanup, which uses explicit invocation. +5. GC name conflicts: + +Many other systems seem to use the identifier "GC" as an abbreviation +for "Graphics Context". Since version 5.0, GC placement has been replaced +by UseGC. GC is an alias for UseGC, unless GC_NAME_CONFLICT is defined. + ****************************************************************************/ #include "gc.h" @@ -138,7 +143,11 @@ uses explicit invocation. # define OPERATOR_NEW_ARRAY #endif -enum GCPlacement {GC, NoGC, PointerFreeGC}; +enum GCPlacement {UseGC, +#ifndef GC_NAME_CONFLICT + GC=UseGC, +#endif + NoGC, PointerFreeGC}; class gc {public: inline void* operator new( size_t size ); @@ -211,7 +220,7 @@ inline void* gc::operator new( size_t size ) { return GC_MALLOC( size );} inline void* gc::operator new( size_t size, GCPlacement gcp ) { - if (gcp == GC) + if (gcp == UseGC) return GC_MALLOC( size ); else if (gcp == PointerFreeGC) return GC_MALLOC_ATOMIC( size ); @@ -261,7 +270,7 @@ inline void* operator new( { void* obj; - if (gcp == GC) { + if (gcp == UseGC) { obj = GC_MALLOC( size ); if (cleanup != 0) GC_REGISTER_FINALIZER_IGNORE_SELF( diff --git a/boehm-gc/gc_hdrs.h b/boehm-gc/gc_hdrs.h index 60dc2ad37d6..6966a9a1a87 100644 --- a/boehm-gc/gc_hdrs.h +++ b/boehm-gc/gc_hdrs.h @@ -24,6 +24,17 @@ typedef struct hblkhdr hdr; * The 2 level tree data structure that is used to find block headers. * If there are more than 32 bits in a pointer, the top level is a hash * table. + * + * This defines HDR, GET_HDR, and SET_HDR, the main macros used to + * retrieve and set object headers. We also define some variants to + * retrieve 2 unrelated headers in interleaved fashion. This + * slightly improves scheduling. + * + * Since 5.0 alpha 5, we can also take advantage of a header lookup + * cache. This is a locally declared direct mapped cache, used inside + * the marker. The HC_GET_HDR and HC_GET_HDR2 macros use and maintain this + * cache. Assuming we get reasonable hit rates, this shaves a few + * memory references from each pointer validation. */ # if CPP_WORDSZ > 32 @@ -45,6 +56,127 @@ typedef struct hblkhdr hdr; # define TOP_SZ (1 << LOG_TOP_SZ) # define BOTTOM_SZ (1 << LOG_BOTTOM_SZ) +#ifndef SMALL_CONFIG +# define USE_HDR_CACHE +#endif + +/* #define COUNT_HDR_CACHE_HITS */ + +extern hdr * GC_invalid_header; /* header for an imaginary block */ + /* containing no objects. */ + + +/* Check whether p and corresponding hhdr point to long or invalid */ +/* object. If so, advance them to */ +/* beginning of block, or set hhdr to GC_invalid_header. */ +#define ADVANCE(p, hhdr, source) \ + if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) { \ + p = GC_FIND_START(p, hhdr, (word)source); \ + if (p == 0) { \ + hhdr = GC_invalid_header; \ + } else { \ + hhdr = GC_find_header(p); \ + } \ + } + +#ifdef USE_HDR_CACHE + +# ifdef COUNT_HDR_CACHE_HITS + extern word GC_hdr_cache_hits; + extern word GC_hdr_cache_misses; +# define HC_HIT() ++GC_hdr_cache_hits +# define HC_MISS() ++GC_hdr_cache_misses +# else +# define HC_HIT() +# define HC_MISS() +# endif + + typedef struct hce { + word block_addr; /* right shifted by LOG_HBLKSIZE */ + hdr * hce_hdr; + } hdr_cache_entry; + +# define HDR_CACHE_SIZE 8 /* power of 2 */ + +# define DECLARE_HDR_CACHE \ + hdr_cache_entry hdr_cache[HDR_CACHE_SIZE] + +# define INIT_HDR_CACHE BZERO(hdr_cache, sizeof(hdr_cache)); + +# define HCE(h) hdr_cache + (((word)(h) >> LOG_HBLKSIZE) & (HDR_CACHE_SIZE-1)) + +# define HCE_VALID_FOR(hce,h) ((hce) -> block_addr == \ + ((word)(h) >> LOG_HBLKSIZE)) + +# define HCE_HDR(h) ((hce) -> hce_hdr) + + +/* Analogous to GET_HDR, except that in the case of large objects, it */ +/* Returns the header for the object beginning, and updates p. */ +/* Returns &GC_bad_header instead of 0. All of this saves a branch */ +/* in the fast path. */ +# define HC_GET_HDR(p, hhdr, source) \ + { \ + hdr_cache_entry * hce = HCE(p); \ + if (HCE_VALID_FOR(hce, p)) { \ + HC_HIT(); \ + hhdr = hce -> hce_hdr; \ + } else { \ + HC_MISS(); \ + GET_HDR(p, hhdr); \ + ADVANCE(p, hhdr, source); \ + hce -> block_addr = (word)(p) >> LOG_HBLKSIZE; \ + hce -> hce_hdr = hhdr; \ + } \ + } + +# define HC_GET_HDR2(p1, hhdr1, source1, p2, hhdr2, source2) \ + { \ + hdr_cache_entry * hce1 = HCE(p1); \ + hdr_cache_entry * hce2 = HCE(p2); \ + if (HCE_VALID_FOR(hce1, p1)) { \ + HC_HIT(); \ + hhdr1 = hce1 -> hce_hdr; \ + } else { \ + HC_MISS(); \ + GET_HDR(p1, hhdr1); \ + ADVANCE(p1, hhdr1, source1); \ + hce1 -> block_addr = (word)(p1) >> LOG_HBLKSIZE; \ + hce1 -> hce_hdr = hhdr1; \ + } \ + if (HCE_VALID_FOR(hce2, p2)) { \ + HC_HIT(); \ + hhdr2 = hce2 -> hce_hdr; \ + } else { \ + HC_MISS(); \ + GET_HDR(p2, hhdr2); \ + ADVANCE(p2, hhdr2, source2); \ + hce2 -> block_addr = (word)(p2) >> LOG_HBLKSIZE; \ + hce2 -> hce_hdr = hhdr2; \ + } \ + } + +#else /* !USE_HDR_CACHE */ + +# define DECLARE_HDR_CACHE + +# define INIT_HDR_CACHE + +# define HC_GET_HDR(p, hhdr, source) \ + { \ + GET_HDR(p, hhdr); \ + ADVANCE(p, hhdr, source); \ + } + +# define HC_GET_HDR2(p1, hhdr1, source1, p2, hhdr2, source2) \ + { \ + GET_HDR2(p1, hhdr1, p2, hhdr2); \ + ADVANCE(p1, hhdr1, source1); \ + ADVANCE(p2, hhdr2, source2); \ + } + +#endif + typedef struct bi { hdr * index[BOTTOM_SZ]; /* @@ -97,6 +229,8 @@ typedef struct bi { # define GET_HDR(p, hhdr) (hhdr) = HDR(p) # define SET_HDR(p, hhdr) HDR_INNER(p) = (hhdr) # define GET_HDR_ADDR(p, ha) (ha) = &(HDR_INNER(p)) +# define GET_HDR2(p1, hhdr1, p2, hhdr2) \ + { GET_HDR(p1, hhdr1); GET_HDR(p2, hhdr2); } # else /* hash */ /* Hash function for tree top level */ # define TL_HASH(hi) ((hi) & (TOP_SZ - 1)) @@ -123,6 +257,40 @@ typedef struct bi { # define SET_HDR(p, hhdr) { register hdr ** _ha; GET_HDR_ADDR(p, _ha); \ *_ha = (hhdr); } # define HDR(p) GC_find_header((ptr_t)(p)) + /* And some interleaved versions for two pointers at once. */ + /* This hopefully helps scheduling on processors like IA64. */ +# define GET_BI2(p1, bottom_indx1, p2, bottom_indx2) \ + { \ + register word hi1 = \ + (word)(p1) >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE); \ + register word hi2 = \ + (word)(p2) >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE); \ + register bottom_index * _bi1 = GC_top_index[TL_HASH(hi1)]; \ + register bottom_index * _bi2 = GC_top_index[TL_HASH(hi2)]; \ + \ + while (_bi1 -> key != hi1 && _bi1 != GC_all_nils) \ + _bi1 = _bi1 -> hash_link; \ + while (_bi2 -> key != hi2 && _bi2 != GC_all_nils) \ + _bi2 = _bi2 -> hash_link; \ + (bottom_indx1) = _bi1; \ + (bottom_indx2) = _bi2; \ + } +# define GET_HDR_ADDR2(p1, ha1, p2, ha2) \ + { \ + register bottom_index * bi1; \ + register bottom_index * bi2; \ + \ + GET_BI2(p1, bi1, p2, bi2); \ + (ha1) = &(HDR_FROM_BI(bi1, p1)); \ + (ha2) = &(HDR_FROM_BI(bi2, p2)); \ + } +# define GET_HDR2(p1, hhdr1, p2, hhdr2) \ + { register hdr ** _ha1; \ + register hdr ** _ha2; \ + GET_HDR_ADDR2(p1, _ha1, p2, _ha2); \ + (hhdr1) = *_ha1; \ + (hhdr2) = *_ha2; \ + } # endif /* Is the result a forwarding address to someplace closer to the */ diff --git a/boehm-gc/gc_mark.h b/boehm-gc/gc_mark.h index 4628323f990..3a4908fb908 100644 --- a/boehm-gc/gc_mark.h +++ b/boehm-gc/gc_mark.h @@ -20,6 +20,10 @@ #ifndef GC_MARK_H # define GC_MARK_H +# ifdef KEEP_BACK_PTRS +# include "dbg_mlc.h" +# endif + /* A client supplied mark procedure. Returns new mark stack pointer. */ /* Primary effect should be to push new entries on the mark stack. */ /* Mark stack pointer values are passed and returned explicitly. */ @@ -41,8 +45,10 @@ /* The real declarations of the following are in gc_priv.h, so that */ /* we can avoid scanning the following table. */ /* -typedef struct ms_entry * (*mark_proc)( word * addr, mark_stack_ptr, - mark_stack_limit, env ); +typedef struct ms_entry * (*mark_proc)( word * addr, + struct ms_entry *mark_stack_ptr, + struct ms_entry *mark_stack_limit, + word env ); # define LOG_MAX_MARK_PROCS 6 # define MAX_MARK_PROCS (1 << LOG_MAX_MARK_PROCS) @@ -51,6 +57,12 @@ extern mark_proc GC_mark_procs[MAX_MARK_PROCS]; extern word GC_n_mark_procs; +/* In a few cases it's necessary to assign statically known indices to */ +/* certain mark procs. Thus we reserve a few for well known clients. */ +/* (This is necessary if mark descriptors are compiler generated.) */ +#define GC_RESERVED_MARK_PROCS 8 +# define GCJ_RESERVED_MARK_PROC_INDEX 0 + /* Object descriptors on mark stack or in objects. Low order two */ /* bits are tags distinguishing among the following 4 possibilities */ /* for the high order 30 bits. */ @@ -84,6 +96,13 @@ extern word GC_n_mark_procs; #define DS_PER_OBJECT 3 /* The real descriptor is at the */ /* byte displacement from the beginning of the */ /* object given by descr & ~DS_TAGS */ + /* If the descriptor is negative, the real */ + /* descriptor is at (*<object_start>) - */ + /* (descr & ~DS_TAGS) - INDIR_PER_OBJ_BIAS */ + /* The latter alternative can be used if each */ + /* object contains a type descriptor in the */ + /* first word. */ +#define INDIR_PER_OBJ_BIAS 0x10 typedef struct ms_entry { word * mse_start; /* First word of object */ @@ -98,7 +117,7 @@ extern mse * GC_mark_stack_top; extern mse * GC_mark_stack; -word GC_find_start(); +ptr_t GC_find_start(); mse * GC_signal_mark_stack_overflow(); @@ -144,16 +163,60 @@ mse * GC_signal_mark_stack_overflow(); # define PUSH_CONTENTS(current, mark_stack_top, mark_stack_limit, \ source, exit_label) \ { \ - register int displ; /* Displacement in block; first bytes, then words */ \ - register hdr * hhdr; \ - register map_entry_type map_entry; \ - \ - GET_HDR(current,hhdr); \ - if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) { \ - current = GC_FIND_START(current, hhdr, (word)source); \ - if (current == 0) goto exit_label; \ - hhdr = HDR(current); \ + hdr * my_hhdr; \ + ptr_t my_current = current; \ + \ + GET_HDR(my_current, my_hhdr); \ + if (IS_FORWARDING_ADDR_OR_NIL(my_hhdr)) { \ + my_current = GC_FIND_START(my_current, my_hhdr, (word)source); \ + if (my_current == 0) goto exit_label; \ + my_hhdr = GC_find_header(my_current); \ } \ + PUSH_CONTENTS_HDR(my_current, mark_stack_top, mark_stack_limit, \ + source, exit_label, my_hhdr); \ +exit_label: ; \ +} + +/* As above, but use header cache for header lookup. */ +# define HC_PUSH_CONTENTS(current, mark_stack_top, mark_stack_limit, \ + source, exit_label) \ +{ \ + hdr * my_hhdr; \ + ptr_t my_current = current; \ + \ + HC_GET_HDR(my_current, my_hhdr, source); \ + PUSH_CONTENTS_HDR(my_current, mark_stack_top, mark_stack_limit, \ + source, exit_label, my_hhdr); \ +exit_label: ; \ +} + +/* As above, but deal with two pointers in interleaved fashion. */ +# define HC_PUSH_CONTENTS2(current1, current2, mark_stack_top, \ + mark_stack_limit, \ + source1, source2, exit_label1, exit_label2) \ +{ \ + hdr * hhdr1; \ + ptr_t my_current1 = current1; \ + hdr * hhdr2; \ + ptr_t my_current2 = current2; \ + \ + HC_GET_HDR2(my_current1, hhdr1, source1, my_current2, hhdr2, source2); \ + PUSH_CONTENTS_HDR(my_current1, mark_stack_top, mark_stack_limit, \ + source1, exit_label1, hhdr1); \ +exit_label1: ; \ + if (0 != hhdr2) { \ + PUSH_CONTENTS_HDR(my_current2, mark_stack_top, mark_stack_limit, \ + source2, exit_label2, hhdr2); \ + } \ +exit_label2: ; \ +} + +# define PUSH_CONTENTS_HDR(current, mark_stack_top, mark_stack_limit, \ + source, exit_label, hhdr) \ +{ \ + int displ; /* Displacement in block; first bytes, then words */ \ + map_entry_type map_entry; \ + \ displ = HBLKDISPL(current); \ map_entry = MAP_ENTRY((hhdr -> hb_map), displ); \ if (map_entry == OBJ_INVALID) { \ @@ -177,10 +240,9 @@ mse * GC_signal_mark_stack_overflow(); } \ PUSH_OBJ(((word *)(HBLKPTR(current)) + displ), hhdr, \ mark_stack_top, mark_stack_limit) \ - exit_label: ; \ } -#ifdef PRINT_BLACK_LIST +#if defined(PRINT_BLACK_LIST) || defined(KEEP_BACK_PTRS) # define PUSH_ONE_CHECKED(p, ip, source) \ GC_push_one_checked(p, ip, (ptr_t)(source)) #else diff --git a/boehm-gc/gc_priv.h b/boehm-gc/gc_priv.h index 23977d3b37e..a4312b13dca 100644 --- a/boehm-gc/gc_priv.h +++ b/boehm-gc/gc_priv.h @@ -82,6 +82,7 @@ typedef char * ptr_t; /* A generic pointer to which we can add */ # define GC_FAR #endif + /*********************************/ /* */ /* Definitions for conservative */ @@ -173,15 +174,6 @@ typedef char * ptr_t; /* A generic pointer to which we can add */ /* May save significant amounts of space for obj_map */ /* entries. */ -#ifndef OLD_BLOCK_ALLOC - /* Macros controlling large block allocation strategy. */ -# define EXACT_FIRST /* Make a complete pass through the large object */ - /* free list before splitting a block */ -# define PRESERVE_LAST /* Do not divide last allocated heap segment */ - /* unless we would otherwise need to expand the */ - /* heap. */ -#endif - /* ALIGN_DOUBLE requires MERGE_SIZES at present. */ # if defined(ALIGN_DOUBLE) && !defined(MERGE_SIZES) # define MERGE_SIZES @@ -281,6 +273,13 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */); # define MS_TIME_DIFF(a,b) ((double) (a.tv_sec - b.tv_sec) * 1000.0 \ + (double) (a.tv_usec - b.tv_usec) / 1000.0) #else /* !BSD_TIME */ +# ifdef MSWIN32 +# include <windows.h> +# include <winbase.h> +# define CLOCK_TYPE DWORD +# define GET_TIME(x) x = GetTickCount() +# define MS_TIME_DIFF(a,b) ((long)((a)-(b))) +# else /* !MSWIN32, !BSD_TIME */ # include <time.h> # if !defined(__STDC__) && defined(SPARC) && defined(SUNOS4) clock_t clock(); /* Not in time.h, where it belongs */ @@ -306,6 +305,7 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */); # define GET_TIME(x) x = clock() # define MS_TIME_DIFF(a,b) ((unsigned long) \ (1000.0*(double)((a)-(b))/(double)CLOCKS_PER_SEC)) +# endif /* !MSWIN32 */ #endif /* !BSD_TIME */ /* We use bzero and bcopy internally. They may not be available. */ @@ -437,8 +437,11 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */); # define LOCK() mutex_lock(&GC_allocate_ml); # define UNLOCK() mutex_unlock(&GC_allocate_ml); # endif -# ifdef LINUX_THREADS +# if defined(LINUX_THREADS) +# if defined(I386)|| defined(POWERPC) || defined(ALPHA) || defined(IA64) \ + || defined(M68K) # include <pthread.h> +# define USE_SPIN_LOCK # if defined(I386) inline static int GC_test_and_set(volatile unsigned int *addr) { int oldval; @@ -448,9 +451,38 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */); : "0"(1), "m"(*(addr))); return oldval; } -# else -# if defined(POWERPC) +# endif +# if defined(IA64) inline static int GC_test_and_set(volatile unsigned int *addr) { + int oldval; + __asm__ __volatile__("xchg4 %0=%1,%2" + : "=r"(oldval), "=m"(*addr) + : "r"(1), "1"(*addr)); + return oldval; + } + inline static void GC_clear(volatile unsigned int *addr) { + __asm__ __volatile__("st4.rel %0=r0" : "=m" (*addr)); + } +# define GC_CLEAR_DEFINED +# endif +# ifdef M68K + /* Contributed by Tony Mantler. I'm not sure how well it was */ + /* tested. */ + inline static int GC_test_and_set(volatile unsigned int *addr) { + char oldval; /* this must be no longer than 8 bits */ + + /* The return value is semi-phony. */ + /* 'tas' sets bit 7 while the return */ + /* value pretends bit 0 was set */ + __asm__ __volatile__( + "tas %1@; sne %0; negb %0" + : "=d" (oldval) + : "a" (addr)); + return oldval; + } +# endif +# if defined(POWERPC) + inline static int GC_test_and_set(volatile unsigned int *addr) { int oldval; int temp = 1; // locked value @@ -465,46 +497,61 @@ void GC_print_callers (/* struct callinfo info[NFRAMES] */); : "r"(temp), "1"(addr) : "memory"); return (int)oldval; - } -# else -# ifdef ALPHA - inline static int GC_test_and_set(volatile unsigned int * -addr) - { - unsigned long oldvalue; - unsigned long temp; - - __asm__ __volatile__( - "1: ldl_l %0,%1\n" - " and %0,%3,%2\n" - " bne %2,2f\n" - " xor %0,%3,%0\n" - " stl_c %0,%1\n" - " beq %0,3f\n" - " mb\n" - "2:\n" - ".section .text2,\"ax\"\n" - "3: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (*addr), "=&r" -(oldvalue) - :"Ir" (1), "m" (*addr)); - - return oldvalue; - } -# else - -- > Need implementation of GC_test_and_set() -# endif -# endif + } + inline static void GC_clear(volatile unsigned int *addr) { + __asm__ __volatile__("eieio"); + *(addr) = 0; + } +# define GC_CLEAR_DEFINED # endif - inline static void GC_clear(volatile unsigned int *addr) { +# ifdef ALPHA + inline static int GC_test_and_set(volatile unsigned int * addr) + { + unsigned long oldvalue; + unsigned long temp; + + __asm__ __volatile__( + "1: ldl_l %0,%1\n" + " and %0,%3,%2\n" + " bne %2,2f\n" + " xor %0,%3,%0\n" + " stl_c %0,%1\n" + " beq %0,3f\n" + " mb\n" + "2:\n" + ".section .text2,\"ax\"\n" + "3: br 1b\n" + ".previous" + :"=&r" (temp), "=m" (*addr), "=&r" (oldvalue) + :"Ir" (1), "m" (*addr)); + + return oldvalue; + } + /* Should probably also define GC_clear, since it needs */ + /* a memory barrier ?? */ +# endif /* ALPHA */ +# ifdef ARM32 + inline static int GC_test_and_set(volatile unsigned int *addr) { + int oldval; + /* SWP on ARM is very similar to XCHG on x86. Doesn't lock the + * bus because there are no SMP ARM machines. If/when there are, + * this code will likely need to be updated. */ + /* See linuxthreads/sysdeps/arm/pt-machine.h in glibc-2.1 */ + __asm__ __volatile__("swp %0, %1, [%2]" + : "=r"(oldval) + : "r"(1), "r"(addr)); + return oldval; + } +# endif +# ifndef GC_CLEAR_DEFINED + inline static void GC_clear(volatile unsigned int *addr) { + /* Try to discourage gcc from moving anything past this. */ + __asm__ __volatile__(" "); *(addr) = 0; - } + } +# endif extern volatile unsigned int GC_allocate_lock; - /* This is not a mutex because mutexes that obey the (optional) */ - /* POSIX scheduling rules are subject to convoys in high contention */ - /* applications. This is basically a spin lock. */ extern pthread_t GC_lock_holder; extern void GC_lock(void); /* Allocation lock holder. Only set if acquired by client through */ @@ -517,12 +564,19 @@ addr) { if (GC_test_and_set(&GC_allocate_lock)) GC_lock(); } # define UNLOCK() \ GC_clear(&GC_allocate_lock) - extern GC_bool GC_collecting; + extern VOLATILE GC_bool GC_collecting; # define ENTER_GC() \ { \ GC_collecting = 1; \ } # define EXIT_GC() GC_collecting = 0; +# else /* LINUX_THREADS on hardware for which we don't know how */ + /* to do test and set. */ +# include <pthread.h> + extern pthread_mutex_t GC_allocate_ml; +# define LOCK() pthread_mutex_lock(&GC_allocate_ml) +# define UNLOCK() pthread_mutex_unlock(&GC_allocate_ml) +# endif # endif /* LINUX_THREADS */ # if defined(HPUX_THREADS) # include <pthread.h> @@ -581,7 +635,7 @@ addr) *(volatile unsigned long *)(&GC_allocate_lock) = 0; } # endif # endif - extern GC_bool GC_collecting; + extern VOLATILE GC_bool GC_collecting; # define ENTER_GC() \ { \ GC_collecting = 1; \ @@ -957,8 +1011,10 @@ struct hblk { /* The type of mark procedures. This really belongs in gc_mark.h. */ /* But we put it here, so that we can avoid scanning the mark proc */ /* table. */ -typedef struct ms_entry * (*mark_proc)(/* word * addr, mark_stack_ptr, - mark_stack_limit, env */); +typedef struct ms_entry * (*mark_proc)(/* word * addr, + struct ms_entry *mark_stack_ptr, + struct ms_entry *mark_stack_limit, + word env */); # define LOG_MAX_MARK_PROCS 6 # define MAX_MARK_PROCS (1 << LOG_MAX_MARK_PROCS) @@ -1035,6 +1091,7 @@ struct roots { struct _GC_arrays { word _heapsize; word _max_heapsize; + word _requested_heapsize; /* Heap size due to explicit expansion */ ptr_t _last_heap_addr; ptr_t _prev_heap_addr; word _large_free_bytes; @@ -1059,6 +1116,10 @@ struct _GC_arrays { word _mem_freed; /* Number of explicitly deallocated words of memory */ /* since last collection. */ + ptr_t _scratch_end_ptr; + ptr_t _scratch_last_end_ptr; + /* Used by headers.c, and can easily appear to point to */ + /* heap. */ mark_proc _mark_procs[MAX_MARK_PROCS]; /* Table of user-defined mark procedures. There is */ /* a small number of these, which can be referenced */ @@ -1223,9 +1284,12 @@ GC_API GC_FAR struct _GC_arrays GC_arrays; # define GC_words_finalized GC_arrays._words_finalized # define GC_non_gc_bytes_at_gc GC_arrays._non_gc_bytes_at_gc # define GC_mem_freed GC_arrays._mem_freed +# define GC_scratch_end_ptr GC_arrays._scratch_end_ptr +# define GC_scratch_last_end_ptr GC_arrays._scratch_last_end_ptr # define GC_mark_procs GC_arrays._mark_procs # define GC_heapsize GC_arrays._heapsize # define GC_max_heapsize GC_arrays._max_heapsize +# define GC_requested_heapsize GC_arrays._requested_heapsize # define GC_words_allocd_before_gc GC_arrays._words_allocd_before_gc # define GC_heap_sects GC_arrays._heap_sects # define GC_last_stack GC_arrays._last_stack @@ -1260,6 +1324,8 @@ GC_API GC_FAR struct _GC_arrays GC_arrays; # define beginGC_arrays ((ptr_t)(&GC_arrays)) # define endGC_arrays (((ptr_t)(&GC_arrays)) + (sizeof GC_arrays)) +#define USED_HEAP_SIZE (GC_heapsize - GC_large_free_bytes) + /* Object kinds: */ # define MAXOBJKINDS 16 @@ -1392,10 +1458,7 @@ extern ptr_t GC_greatest_plausible_heap_addr; ptr_t GC_approx_sp(); GC_bool GC_should_collect(); -#ifdef PRESERVE_LAST - GC_bool GC_in_last_heap_sect(/* ptr_t */); - /* In last added heap section? If so, avoid breaking up. */ -#endif + void GC_apply_to_all_blocks(/*fn, client_data*/); /* Invoke fn(hbp, client_data) for each */ /* allocated heap block. */ @@ -1672,9 +1735,10 @@ ptr_t GC_allocobj(/* sz_inn_words, kind */); /* head. */ void GC_init_headers(); -GC_bool GC_install_header(/*h*/); +struct hblkhdr * GC_install_header(/*h*/); /* Install a header for block h. */ - /* Return FALSE on failure. */ + /* Return 0 on failure, or the header */ + /* otherwise. */ GC_bool GC_install_counts(/*h, sz*/); /* Set up forwarding counts for block */ /* h of size sz. */ diff --git a/boehm-gc/gc_typed.h b/boehm-gc/gc_typed.h index e4a6b94756e..2e0598f204c 100644 --- a/boehm-gc/gc_typed.h +++ b/boehm-gc/gc_typed.h @@ -61,6 +61,7 @@ GC_API GC_PTR GC_malloc_explicitly_typed GC_PROTO((size_t size_in_bytes, GC_descr d)); /* Allocate an object whose layout is described by d. */ /* The resulting object MAY NOT BE PASSED TO REALLOC. */ + /* The returned object is cleared. */ GC_API GC_PTR GC_malloc_explicitly_typed_ignore_off_page GC_PROTO((size_t size_in_bytes, GC_descr d)); @@ -75,6 +76,7 @@ GC_API GC_PTR GC_calloc_explicitly_typed /* alignment required for pointers. E.g. on a 32-bit */ /* machine with 16-bit aligned pointers, size_in_bytes */ /* must be a multiple of 2. */ + /* Returned object is cleared. */ #ifdef GC_DEBUG # define GC_MALLOC_EXPLICTLY_TYPED(bytes, d) GC_MALLOC(bytes) diff --git a/boehm-gc/gc_watcom.asm b/boehm-gc/gc_watcom.asm deleted file mode 100644 index 5131ab96505..00000000000 --- a/boehm-gc/gc_watcom.asm +++ /dev/null @@ -1,51 +0,0 @@ - - name gc_watcom - -.386p - - extrn _edata : byte ; end of DATA (start of BSS) - extrn _end : byte ; end of BSS (start of STACK) - extrn __nullarea : word - - extrn "C",_STACKLOW : dword - extrn "C",_STACKTOP : dword - - -DGROUP group _DATA - -_DATA segment dword public 'DATA' -_DATA ends - -_TEXT segment para public use32 'CODE' - assume cs:_TEXT, ds:DGROUP, ss:DGROUP - - public Get_DATASTART - align 4 -Get_DATASTART proc near - - mov eax,offset DGROUP:__nullarea - ret - -Get_DATASTART endp - - public Get_DATAEND - align 4 -Get_DATAEND proc near - - mov eax,offset DGROUP:_end - ret - -Get_DATAEND endp - - public Get_STACKBOTTOM - align 4 -Get_STACKBOTTOM proc near - - mov eax,_STACKTOP - ret - -Get_STACKBOTTOM endp - -_TEXT ends - - end diff --git a/boehm-gc/gcconfig.h b/boehm-gc/gcconfig.h index 25178d095ec..47398a6deba 100644 --- a/boehm-gc/gcconfig.h +++ b/boehm-gc/gcconfig.h @@ -69,15 +69,18 @@ # endif # if defined(mips) || defined(__mips) # define MIPS -# if defined(ultrix) || defined(__ultrix) || defined(__NetBSD__) -# define ULTRIX -# else -# if defined(_SYSTYPE_SVR4) || defined(SYSTYPE_SVR4) || defined(__SYSTYPE_SVR4__) -# define IRIX5 /* or IRIX 6.X */ -# else -# define RISCOS /* or IRIX 4.X */ -# endif -# endif +# if !defined(LINUX) +# if defined(ultrix) || defined(__ultrix) || defined(__NetBSD__) +# define ULTRIX +# else +# if defined(_SYSTYPE_SVR4) || defined(SYSTYPE_SVR4) \ + || defined(__SYSTYPE_SVR4__) +# define IRIX5 /* or IRIX 6.X */ +# else +# define RISCOS /* or IRIX 4.X */ +# endif +# endif +# endif /* !LINUX */ # define mach_type_known # endif # if defined(sequent) && defined(i386) @@ -159,10 +162,14 @@ # define M68K # define mach_type_known # endif -# if defined(LINUX) && defined(sparc) +# if defined(LINUX) && (defined(sparc) || defined(__sparc__)) # define SPARC # define mach_type_known # endif +# if defined(LINUX) && defined(arm) +# define ARM32 +# define mach_type_known +# endif # if defined(__alpha) || defined(__alpha__) # define ALPHA # if !defined(LINUX) @@ -255,6 +262,11 @@ # define CYGWIN32 # define mach_type_known # endif +# if defined(__MINGW32__) +# define I386 +# define MSWIN32 +# define mach_type_known +# endif # if defined(__BORLANDC__) # define I386 # define MSWIN32 @@ -323,6 +335,9 @@ /* (CX_UX and DGUX) */ /* S370 ==> 370-like machine */ /* running Amdahl UTS4 */ + /* ARM32 ==> Intel StrongARM */ + /* IA64 ==> Intel IA64 */ + /* (e.g. Itanium) */ /* @@ -408,6 +423,15 @@ * * An architecture may define DYNAMIC_LOADING if dynamic_load.c * defined GC_register_dynamic_libraries() for the architecture. + * + * An architecture may define PREFETCH(x) to preload the cache with *x. + * This defaults to a no-op. + * + * PREFETCH_FOR_WRITE(x) is used if *x is about to be written. + * + * An architecture may also define CLEAR_DOUBLE(x) to be a fast way to + * clear the two words at GC_malloc-aligned address x. By default, + * word stores of 0 are used instead. */ @@ -532,11 +556,9 @@ # undef STACK_GRAN # define STACK_GRAN 0x10000000 /* Stack usually starts at 0x80000000 */ - extern int data_start; -# define DATASTART (&data_start) +# define LINUX_DATA_START extern int _end; # define DATAEND (&_end) -# define DYNAMIC_LOADING # endif # ifdef MACOSX # define ALIGNMENT 4 @@ -633,8 +655,8 @@ # ifdef LINUX # define OS_TYPE "LINUX" # ifdef __ELF__ -# define DATASTART GC_data_start -# define DYNAMIC_LOADING +# define LINUX_DATA_START +# define DYNAMIC_LOADING # else Linux Sparc non elf ? # endif @@ -702,13 +724,16 @@ # endif # ifdef LINUX # define OS_TYPE "LINUX" -# define HEURISTIC1 -# undef STACK_GRAN -# define STACK_GRAN 0x10000000 - /* STACKBOTTOM is usually 0xc0000000, but this changes with */ - /* different kernel configurations. In particular, systems */ - /* with 2GB physical memory will usually move the user */ - /* address space limit, and hence initial SP to 0x80000000. */ +# define LINUX_STACKBOTTOM +# if 0 +# define HEURISTIC1 +# undef STACK_GRAN +# define STACK_GRAN 0x10000000 + /* STACKBOTTOM is usually 0xc0000000, but this changes with */ + /* different kernel configurations. In particular, systems */ + /* with 2GB physical memory will usually move the user */ + /* address space limit, and hence initial SP to 0x80000000. */ +# endif # if !defined(LINUX_THREADS) || !defined(REDIRECT_MALLOC) /* libgcj: Linux threads don't interact well with the read() wrapper. Not defining MPROTECT_VDB fixes this. */ @@ -726,8 +751,7 @@ # endif # include <features.h> # if defined(__GLIBC__) && __GLIBC__ >= 2 - extern int __data_start; -# define DATASTART ((ptr_t)(&__data_start)) +# define LINUX_DATA_START # else extern char **__environ; # define DATASTART ((ptr_t)(&__environ)) @@ -746,6 +770,26 @@ extern int etext; # define DATASTART ((ptr_t)((((word) (&etext)) + 0xfff) & ~0xfff)) # endif +# ifdef USE_I686_PREFETCH +# define PREFETCH(x) \ + __asm__ __volatile__ (" prefetchnta %0": : "m"(*(char *)(x))) + /* Empirically prefetcht0 is much more effective at reducing */ + /* cache miss stalls for the targetted load instructions. But it */ + /* seems to interfere enough with other cache traffic that the net */ + /* result is worse than prefetchnta. */ +# if 0 + /* Using prefetches for write seems to have a slight negative */ + /* impact on performance, at least for a PIII/500. */ +# define PREFETCH_FOR_WRITE(x) \ + __asm__ __volatile__ (" prefetcht0 %0": : "m"(*(char *)(x))) +# endif +# endif +# ifdef USE_3DNOW_PREFETCH +# define PREFETCH(x) \ + __asm__ __volatile__ (" prefetch %0": : "m"(*(char *)(x))) +# define PREFETCH_FOR_WRITE(x) + __asm__ __volatile__ (" prefetchw %0": : "m"(*(char *)(x))) +# endif # endif # ifdef CYGWIN32 # define OS_TYPE "CYGWIN32" @@ -862,36 +906,48 @@ extern int _etext; # define DATASTART ((ptr_t)(&_etext)) # else -# ifndef IRIX5 +/* # define STACKBOTTOM ((ptr_t)0x7fff8000) sometimes also works. */ +# ifdef LINUX + /* This was developed for a linuxce style platform. Probably */ + /* needs to be tweaked for workstation class machines. */ +# define OS_TYPE "LINUX" + extern int __data_start; +# define DATASTART ((ptr_t)(&__data_start)) +# define ALIGNMENT 4 +# define USE_GENERIC_PUSH_REGS 1 +# define STACKBOTTOM 0x80000000 + /* In many cases, this should probably use LINUX_STACKBOTTOM */ + /* instead. But some kernel versions seem to give the wrong */ + /* value from /proc. */ +# endif /* Linux */ +# ifdef ULTRIX +# define HEURISTIC2 # define DATASTART (ptr_t)0x10000000 /* Could probably be slightly higher since */ /* startup code allocates lots of stuff. */ -# else +# define OS_TYPE "ULTRIX" +# define ALIGNMENT 4 +# endif +# ifdef RISCOS +# define HEURISTIC2 +# define DATASTART (ptr_t)0x10000000 +# define OS_TYPE "RISCOS" +# define ALIGNMENT 4 /* Required by hardware */ +# endif +# ifdef IRIX5 +# define HEURISTIC2 extern int _fdata; # define DATASTART ((ptr_t)(&_fdata)) # ifdef USE_MMAP -# define HEAP_START (ptr_t)0x30000000 +# define HEAP_START (ptr_t)0x30000000 # else -# define HEAP_START DATASTART +# define HEAP_START DATASTART # endif /* Lowest plausible heap address. */ /* In the MMAP case, we map there. */ /* In either case it is used to identify */ /* heap sections so they're not */ /* considered as roots. */ -# endif /* IRIX5 */ -# endif /* DATASTART_IS_ETEXT */ -# define HEURISTIC2 -/* # define STACKBOTTOM ((ptr_t)0x7fff8000) sometimes also works. */ -# ifdef ULTRIX -# define OS_TYPE "ULTRIX" -# define ALIGNMENT 4 -# endif -# ifdef RISCOS -# define OS_TYPE "RISCOS" -# define ALIGNMENT 4 /* Required by hardware */ -# endif -# ifdef IRIX5 # define OS_TYPE "IRIX5" # define MPROTECT_VDB # ifdef _MIPS_SZPTR @@ -906,6 +962,7 @@ # endif # define DYNAMIC_LOADING # endif +# endif /* DATASTART_IS_ETEXT */ # endif /* ECOS */ # ifdef ECOS extern char __ram_data_start; @@ -963,12 +1020,16 @@ # endif # include <unistd.h> # define GETPAGESIZE() sysconf(_SC_PAGE_SIZE) - /* They misspelled the Posix macro? */ # endif # ifdef ALPHA # define MACH_TYPE "ALPHA" # define ALIGNMENT 8 +# define USE_GENERIC_PUSH_REGS + /* Gcc and probably the DEC/Compaq compiler spill pointers to preserved */ + /* fp registers in some cases when the target is a 21264. The assembly */ + /* code doesn't handle that yet, and version dependencies make that a */ + /* bit tricky. Do the easy thing for now. */ # ifdef OSF1 # define OS_TYPE "OSF1" # define DATASTART ((ptr_t) 0x140000000) @@ -989,12 +1050,9 @@ # define CPP_WORDSZ 64 # define STACKBOTTOM ((ptr_t) 0x120000000) # ifdef __ELF__ - /* glibc for Linux/Alpha no longer provides a symbol marking - the start of the data segment. So libgcj defines - data_start on its own (in libgcjdata.a). */ - extern int data_start; -# define DATASTART &data_start -# define DYNAMIC_LOADING +# define LINUX_DATA_START +# define DYNAMIC_LOADING + /* This doesn't work if the collector is in a dynamic library. */ # else # define DATASTART ((ptr_t) 0x140000000) # endif @@ -1011,6 +1069,9 @@ # define ALIGN_DOUBLE /* Requires 16 byte alignment for malloc */ # define ALIGNMENT 8 +# define USE_GENERIC_PUSH_REGS + /* We need to get preserved registers in addition to register windows. */ + /* That's easiest to do with setjmp. */ # ifdef HPUX --> needs work # endif @@ -1024,10 +1085,25 @@ /* backing store. There is probably a better way to */ /* get that, too ... */ # define BACKING_STORE_BASE ((ptr_t) 0x9fffffff80000000l) -# define DATASTART GC_data_start +# if 1 +# define SEARCH_FOR_DATA_START +# define DATASTART GC_data_start +# else + extern int data_start; +# define DATASTART ((ptr_t)(&data_start)) +# endif # define DYNAMIC_LOADING +# define MPROTECT_VDB + /* Requires Linux 2.3.47 or later. */ extern int _end; # define DATAEND (&_end) + /* PREFETCH appears to have a large performance impact. */ +# define PREFETCH(x) \ + __asm__ (" lfetch [%0]": : "r"((void *)(x))) +# define PREFETCH_FOR_WRITE(x) \ + __asm__ (" lfetch.excl [%0]": : "r"((void *)(x))) +# define CLEAR_DOUBLE(x) \ + __asm__ (" stf.spill [%0]=f0": : "r"((void *)(x))) # endif # endif @@ -1079,6 +1155,49 @@ # define DATASTART ((ptr_t)(&etext)) # define USE_GENERIC_PUSH_REGS # endif +# ifdef LINUX +# define OS_TYPE "LINUX" +# define HEURISTIC1 +# undef STACK_GRAN +# define STACK_GRAN 0x10000000 +# define USE_GENERIC_PUSH_REGS +# ifdef __ELF__ +# define DYNAMIC_LOADING +# include <features.h> +# if defined(__GLIBC__) && __GLIBC__ >= 2 +# define LINUX_DATA_START +# else + extern char **__environ; +# define DATASTART ((ptr_t)(&__environ)) + /* hideous kludge: __environ is the first */ + /* word in crt0.o, and delimits the start */ + /* of the data segment, no matter which */ + /* ld options were passed through. */ + /* We could use _etext instead, but that */ + /* would include .rodata, which may */ + /* contain large read-only data tables */ + /* that we'd rather not scan. */ +# endif + extern int _end; +# define DATAEND (&_end) +# else + extern int etext; +# define DATASTART ((ptr_t)((((word) (&etext)) + 0xfff) & ~0xfff)) +# endif +# endif +#endif + +#ifdef LINUX_DATA_START + /* Some Linux distributions arrange to define __data_start. Some */ + /* define data_start as a weak symbol. The latter is technically */ + /* broken, since the user program may define data_start, in which */ + /* case we lose. Nonetheless, we try both, prefering __data_start. */ + /* We assume gcc. */ +# pragma weak __data_start + extern int __data_start; +# pragma weak data_start + extern int data_start; +# define DATASTART ((ptr_t)(&__data_start != 0? &__data_start : &data_start)) #endif # ifndef STACK_GROWS_UP @@ -1161,6 +1280,26 @@ # define DEFAULT_VDB # endif +# ifndef PREFETCH +# define PREFETCH(x) +# define NO_PREFETCH +# endif + +# ifndef PREFETCH_FOR_WRITE +# define PREFETCH_FOR_WRITE(x) +# define NO_PREFETCH_FOR_WRITE +# endif + +# ifndef CACHE_LINE_SIZE +# define CACHE_LINE_SIZE 32 /* Wild guess */ +# endif + +# ifndef CLEAR_DOUBLE +# define CLEAR_DOUBLE(x) \ + ((word*)x)[0] = 0; \ + ((word*)x)[1] = 0; +# endif /* CLEAR_DOUBLE */ + # if defined(_SOLARIS_PTHREADS) && !defined(SOLARIS_THREADS) # define SOLARIS_THREADS # endif @@ -1197,4 +1336,4 @@ /* include assembly code to do it well. */ # endif -# endif +# endif /* GCCONFIG_H */ diff --git a/boehm-gc/headers.c b/boehm-gc/headers.c index 9564a6a5359..6e47bba8e3f 100644 --- a/boehm-gc/headers.c +++ b/boehm-gc/headers.c @@ -50,10 +50,8 @@ ptr_t h; static ptr_t scratch_free_ptr = 0; -ptr_t GC_scratch_end_ptr = 0; - -ptr_t GC_scratch_last_end_ptr = 0; - /* End point of last obtained scratch area */ +/* GC_scratch_last_end_ptr is end point of last obtained scratch area. */ +/* GC_scratch_end_ptr is end point of current scratch area. */ ptr_t GC_scratch_alloc(bytes) register word bytes; @@ -128,6 +126,13 @@ hdr * hhdr; hhdr -> hb_next = (struct hblk *) hdr_free_list; hdr_free_list = hhdr; } + +hdr * GC_invalid_header; + +#ifdef USE_HDR_CACHE + word GC_hdr_cache_hits = 0; + word GC_hdr_cache_misses = 0; +#endif void GC_init_headers() { @@ -138,6 +143,8 @@ void GC_init_headers() for (i = 0; i < TOP_SZ; i++) { GC_top_index[i] = GC_all_nils; } + GC_invalid_header = alloc_hdr(); + GC_invalidate_map(GC_invalid_header); } /* Make sure that there is a bottom level index block for address addr */ @@ -191,10 +198,10 @@ word addr; return(TRUE); } -/* Install a header for block h. */ -/* The header is uninitialized. */ -/* Returns FALSE on failure. */ -GC_bool GC_install_header(h) +/* Install a header for block h. */ +/* The header is uninitialized. */ +/* Returns the header or 0 on failure. */ +struct hblkhdr * GC_install_header(h) register struct hblk * h; { hdr * result; @@ -205,7 +212,7 @@ register struct hblk * h; # ifdef USE_MUNMAP result -> hb_last_reclaimed = GC_gc_no; # endif - return(result != 0); + return(result); } /* Set up forwarding counts for block h of size sz */ diff --git a/boehm-gc/include/private/config.h b/boehm-gc/include/private/config.h deleted file mode 100644 index fc8004cebe5..00000000000 --- a/boehm-gc/include/private/config.h +++ /dev/null @@ -1,965 +0,0 @@ -/* - * Copyright 1988, 1989 Hans-J. Boehm, Alan J. Demers - * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved. - * Copyright (c) 1996 by Silicon Graphics. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED - * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program - * for any purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is granted, - * provided the above notices are retained, and a notice that the code was - * modified is included with the above copyright notice. - */ - -#ifndef CONFIG_H - -# define CONFIG_H - -/* Machine dependent parameters. Some tuning parameters can be found */ -/* near the top of gc_private.h. */ - -/* Machine specific parts contributed by various people. See README file. */ - -/* Determine the machine type: */ -# if defined(sun) && defined(mc68000) -# define M68K -# define SUNOS4 -# define mach_type_known -# endif -# if defined(hp9000s300) -# define M68K -# define HP -# define mach_type_known -# endif -# if defined(__NetBSD__) && defined(m68k) -# define M68K -# define NETBSD -# define mach_type_known -# endif -# if defined(vax) -# define VAX -# ifdef ultrix -# define ULTRIX -# else -# define BSD -# endif -# define mach_type_known -# endif -# if defined(mips) || defined(__mips) -# define MIPS -# if defined(ultrix) || defined(__ultrix) || defined(__NetBSD__) -# define ULTRIX -# else -# if defined(_SYSTYPE_SVR4) || defined(SYSTYPE_SVR4) || defined(__SYSTYPE_SVR4__) -# define IRIX5 /* or IRIX 6.X */ -# else -# define RISCOS /* or IRIX 4.X */ -# endif -# endif -# define mach_type_known -# endif -# if defined(sequent) && defined(i386) -# define I386 -# define SEQUENT -# define mach_type_known -# endif -# if defined(sun) && defined(i386) -# define I386 -# define SUNOS5 -# define mach_type_known -# endif -# if (defined(__OS2__) || defined(__EMX__)) && defined(__32BIT__) -# define I386 -# define OS2 -# define mach_type_known -# endif -# if defined(ibm032) -# define RT -# define mach_type_known -# endif -# if defined(sun) && (defined(sparc) || defined(__sparc)) -# define SPARC - /* Test for SunOS 5.x */ -# include <errno.h> -# ifdef ECHRNG -# define SUNOS5 -# else -# define SUNOS4 -# endif -# define mach_type_known -# endif -# if defined(sparc) && defined(unix) && !defined(sun) -# define SPARC -# define DRSNX -# define mach_type_known -# endif -# if defined(_IBMR2) -# define RS6000 -# define mach_type_known -# endif -# if defined(_M_XENIX) && defined(_M_SYSV) && defined(_M_I386) - /* The above test may need refinement */ -# define I386 -# if defined(_SCO_ELF) -# define SCO_ELF -# else -# define SCO -# endif -# define mach_type_known -# endif -# if defined(_AUX_SOURCE) -# define M68K -# define SYSV -# define mach_type_known -# endif -# if defined(_PA_RISC1_0) || defined(_PA_RISC1_1) -# define HP_PA -# define mach_type_known -# endif -# if defined(linux) && defined(i386) -# define I386 -# define LINUX -# define mach_type_known -# endif -# if defined(linux) && defined(powerpc) -# define POWERPC -# define LINUX -# define mach_type_known -# endif -# if defined(__alpha) || defined(__alpha__) -# define ALPHA -# if defined(linux) || defined(__linux__) -# define LINUX -# else -# define OSF1 /* a.k.a Digital Unix */ -# endif -# define mach_type_known -# endif -# if defined(_AMIGA) -# define M68K -# define AMIGA -# define mach_type_known -# endif -# if defined(THINK_C) || defined(__MWERKS__) && !defined(__powerc) -# define M68K -# define MACOS -# define mach_type_known -# endif -# if defined(__MWERKS__) && defined(__powerc) -# define POWERPC -# define MACOS -# define mach_type_known -# endif -# if defined(NeXT) && defined(mc68000) -# define M68K -# define NEXT -# define mach_type_known -# endif -# if defined(NeXT) && defined(i386) -# define I386 -# define NEXT -# define mach_type_known -# endif -# if defined(__FreeBSD__) && defined(i386) -# define I386 -# define FREEBSD -# define mach_type_known -# endif -# if defined(__NetBSD__) && defined(i386) -# define I386 -# define NETBSD -# define mach_type_known -# endif -# if defined(bsdi) && defined(i386) -# define I386 -# define BSDI -# define mach_type_known -# endif -# if !defined(mach_type_known) && defined(__386BSD__) -# define I386 -# define THREE86BSD -# define mach_type_known -# endif -# if defined(_CX_UX) && defined(_M88K) -# define M88K -# define CX_UX -# define mach_type_known -# endif -# if defined(DGUX) -# define M88K - /* DGUX defined */ -# define mach_type_known -# endif -# if (defined(_MSDOS) || defined(_MSC_VER)) && (_M_IX86 >= 300) -# define I386 -# define MSWIN32 /* or Win32s */ -# define mach_type_known -# endif -# if defined(__DJGPP__) -# define I386 -# ifndef DJGPP -# define DJGPP /* MSDOS running the DJGPP port of GCC */ -# endif -# define mach_type_known -# endif -# if defined(__CYGWIN32__) -# define I386 -# define CYGWIN32 -# define mach_type_known -# endif -# if defined(__BORLANDC__) -# define I386 -# define MSWIN32 -# define mach_type_known -# endif -# if defined(_UTS) && !defined(mach_type_known) -# define S370 -# define UTS4 -# define mach_type_known -# endif -/* Ivan Demakov */ -# if defined(__WATCOMC__) && defined(__386__) -# define I386 -# if !defined(OS2) && !defined(MSWIN32) && !defined(DOS4GW) -# if defined(__OS2__) -# define OS2 -# else -# if defined(__WINDOWS_386__) || defined(__NT__) -# define MSWIN32 -# else -# define DOS4GW -# endif -# endif -# endif -# define mach_type_known -# endif - -/* Feel free to add more clauses here */ - -/* Or manually define the machine type here. A machine type is */ -/* characterized by the architecture. Some */ -/* machine types are further subdivided by OS. */ -/* the macros ULTRIX, RISCOS, and BSD to distinguish. */ -/* Note that SGI IRIX is treated identically to RISCOS. */ -/* SYSV on an M68K actually means A/UX. */ -/* The distinction in these cases is usually the stack starting address */ -# ifndef mach_type_known - --> unknown machine type -# endif - /* Mapping is: M68K ==> Motorola 680X0 */ - /* (SUNOS4,HP,NEXT, and SYSV (A/UX), */ - /* MACOS and AMIGA variants) */ - /* I386 ==> Intel 386 */ - /* (SEQUENT, OS2, SCO, LINUX, NETBSD, */ - /* FREEBSD, THREE86BSD, MSWIN32, */ - /* BSDI, SUNOS5, NEXT variants) */ - /* NS32K ==> Encore Multimax */ - /* MIPS ==> R2000 or R3000 */ - /* (RISCOS, ULTRIX variants) */ - /* VAX ==> DEC VAX */ - /* (BSD, ULTRIX variants) */ - /* RS6000 ==> IBM RS/6000 AIX3.X */ - /* RT ==> IBM PC/RT */ - /* HP_PA ==> HP9000/700 & /800 */ - /* HP/UX */ - /* SPARC ==> SPARC under SunOS */ - /* (SUNOS4, SUNOS5, */ - /* DRSNX variants) */ - /* ALPHA ==> DEC Alpha */ - /* (OSF1 and LINUX variants) */ - /* M88K ==> Motorola 88XX0 */ - /* (CX_UX and DGUX) */ - /* S370 ==> 370-like machine */ - /* running Amdahl UTS4 */ - - -/* - * For each architecture and OS, the following need to be defined: - * - * CPP_WORD_SZ is a simple integer constant representing the word size. - * in bits. We assume byte addressibility, where a byte has 8 bits. - * We also assume CPP_WORD_SZ is either 32 or 64. - * (We care about the length of pointers, not hardware - * bus widths. Thus a 64 bit processor with a C compiler that uses - * 32 bit pointers should use CPP_WORD_SZ of 32, not 64. Default is 32.) - * - * MACH_TYPE is a string representation of the machine type. - * OS_TYPE is analogous for the OS. - * - * ALIGNMENT is the largest N, such that - * all pointer are guaranteed to be aligned on N byte boundaries. - * defining it to be 1 will always work, but perform poorly. - * - * DATASTART is the beginning of the data segment. - * On UNIX systems, the collector will scan the area between DATASTART - * and DATAEND for root pointers. - * - * DATAEND, if not &end. - * - * ALIGN_DOUBLE of GC_malloc should return blocks aligned to twice - * the pointer size. - * - * STACKBOTTOM is the cool end of the stack, which is usually the - * highest address in the stack. - * Under PCR or OS/2, we have other ways of finding thread stacks. - * For each machine, the following should: - * 1) define STACK_GROWS_UP if the stack grows toward higher addresses, and - * 2) define exactly one of - * STACKBOTTOM (should be defined to be an expression) - * HEURISTIC1 - * HEURISTIC2 - * If either of the last two macros are defined, then STACKBOTTOM is computed - * during collector startup using one of the following two heuristics: - * HEURISTIC1: Take an address inside GC_init's frame, and round it up to - * the next multiple of STACK_GRAN. - * HEURISTIC2: Take an address inside GC_init's frame, increment it repeatedly - * in small steps (decrement if STACK_GROWS_UP), and read the value - * at each location. Remember the value when the first - * Segmentation violation or Bus error is signalled. Round that - * to the nearest plausible page boundary, and use that instead - * of STACKBOTTOM. - * - * If no expression for STACKBOTTOM can be found, and neither of the above - * heuristics are usable, the collector can still be used with all of the above - * undefined, provided one of the following is done: - * 1) GC_mark_roots can be changed to somehow mark from the correct stack(s) - * without reference to STACKBOTTOM. This is appropriate for use in - * conjunction with thread packages, since there will be multiple stacks. - * (Allocating thread stacks in the heap, and treating them as ordinary - * heap data objects is also possible as a last resort. However, this is - * likely to introduce significant amounts of excess storage retention - * unless the dead parts of the thread stacks are periodically cleared.) - * 2) Client code may set GC_stackbottom before calling any GC_ routines. - * If the author of the client code controls the main program, this is - * easily accomplished by introducing a new main program, setting - * GC_stackbottom to the address of a local variable, and then calling - * the original main program. The new main program would read something - * like: - * - * # include "gc_private.h" - * - * main(argc, argv, envp) - * int argc; - * char **argv, **envp; - * { - * int dummy; - * - * GC_stackbottom = (ptr_t)(&dummy); - * return(real_main(argc, argv, envp)); - * } - * - * - * Each architecture may also define the style of virtual dirty bit - * implementation to be used: - * MPROTECT_VDB: Write protect the heap and catch faults. - * PROC_VDB: Use the SVR4 /proc primitives to read dirty bits. - * - * An architecture may define DYNAMIC_LOADING if dynamic_load.c - * defined GC_register_dynamic_libraries() for the architecture. - */ - - -# define STACK_GRAN 0x1000000 -# ifdef M68K -# define MACH_TYPE "M68K" -# define ALIGNMENT 2 -# ifdef NETBSD -# define OS_TYPE "NETBSD" -# define HEURISTIC2 - extern char etext; -# define DATASTART ((ptr_t)(&etext)) -# endif -# ifdef SUNOS4 -# define OS_TYPE "SUNOS4" - extern char etext; -# define DATASTART ((ptr_t)((((word) (&etext)) + 0x1ffff) & ~0x1ffff)) -# define HEURISTIC1 /* differs */ -# define DYNAMIC_LOADING -# endif -# ifdef HP -# define OS_TYPE "HP" - extern char etext; -# define DATASTART ((ptr_t)((((word) (&etext)) + 0xfff) & ~0xfff)) -# define STACKBOTTOM ((ptr_t) 0xffeffffc) - /* empirically determined. seems to work. */ -# include <unistd.h> -# define GETPAGESIZE() sysconf(_SC_PAGE_SIZE) -# endif -# ifdef SYSV -# define OS_TYPE "SYSV" - extern etext; -# define DATASTART ((ptr_t)((((word) (&etext)) + 0x3fffff) \ - & ~0x3fffff) \ - +((word)&etext & 0x1fff)) - /* This only works for shared-text binaries with magic number 0413. - The other sorts of SysV binaries put the data at the end of the text, - in which case the default of &etext would work. Unfortunately, - handling both would require having the magic-number available. - -- Parag - */ -# define STACKBOTTOM ((ptr_t)0xFFFFFFFE) - /* The stack starts at the top of memory, but */ - /* 0x0 cannot be used as setjump_test complains */ - /* that the stack direction is incorrect. Two */ - /* bytes down from 0x0 should be safe enough. */ - /* --Parag */ -# include <sys/mmu.h> -# define GETPAGESIZE() PAGESIZE /* Is this still right? */ -# endif -# ifdef AMIGA -# define OS_TYPE "AMIGA" - /* STACKBOTTOM and DATASTART handled specially */ - /* in os_dep.c */ -# define DATAEND /* not needed */ -# define GETPAGESIZE() 4096 -# endif -# ifdef MACOS -# ifndef __LOWMEM__ -# include <LowMem.h> -# endif -# define OS_TYPE "MACOS" - /* see os_dep.c for details of global data segments. */ -# define STACKBOTTOM ((ptr_t) LMGetCurStackBase()) -# define DATAEND /* not needed */ -# define GETPAGESIZE() 4096 -# endif -# ifdef NEXT -# define OS_TYPE "NEXT" -# define DATASTART ((ptr_t) get_etext()) -# define STACKBOTTOM ((ptr_t) 0x4000000) -# define DATAEND /* not needed */ -# endif -# endif - -# ifdef POWERPC -# define MACH_TYPE "POWERPC" -# define ALIGNMENT 2 -# ifdef MACOS -# ifndef __LOWMEM__ -# include <LowMem.h> -# endif -# define OS_TYPE "MACOS" - /* see os_dep.c for details of global data segments. */ -# define STACKBOTTOM ((ptr_t) LMGetCurStackBase()) -# define DATAEND /* not needed */ -# endif -# ifdef LINUX -# define OS_TYPE "LINUX" -# define STACKBOTTOM ((ptr_t)0x80000000) -# define DATASTART GC_data_start - extern int _end; -# define DATAEND (&_end) -# endif -# endif - -# ifdef VAX -# define MACH_TYPE "VAX" -# define ALIGNMENT 4 /* Pointers are longword aligned by 4.2 C compiler */ - extern char etext; -# define DATASTART ((ptr_t)(&etext)) -# ifdef BSD -# define OS_TYPE "BSD" -# define HEURISTIC1 - /* HEURISTIC2 may be OK, but it's hard to test. */ -# endif -# ifdef ULTRIX -# define OS_TYPE "ULTRIX" -# define STACKBOTTOM ((ptr_t) 0x7fffc800) -# endif -# endif - -# ifdef RT -# define MACH_TYPE "RT" -# define ALIGNMENT 4 -# define DATASTART ((ptr_t) 0x10000000) -# define STACKBOTTOM ((ptr_t) 0x1fffd800) -# endif - -# ifdef SPARC -# define MACH_TYPE "SPARC" -# define ALIGNMENT 4 /* Required by hardware */ -# define ALIGN_DOUBLE - extern int etext; -# ifdef SUNOS5 -# define OS_TYPE "SUNOS5" - extern int _etext; - extern int _end; - extern char * GC_SysVGetDataStart(); -# define DATASTART (ptr_t)GC_SysVGetDataStart(0x10000, &_etext) -# define DATAEND (&_end) -# ifndef USE_MMAP -# define USE_MMAP -# endif -# ifdef USE_MMAP -# define HEAP_START (ptr_t)0x40000000 -# else -# define HEAP_START DATAEND -# endif -# define PROC_VDB -# define HEURISTIC1 -# include <unistd.h> -# define GETPAGESIZE() sysconf(_SC_PAGESIZE) - /* getpagesize() appeared to be missing from at least one */ - /* Solaris 5.4 installation. Weird. */ -# endif -# ifdef SUNOS4 -# define OS_TYPE "SUNOS4" - /* [If you have a weak stomach, don't read this.] */ - /* We would like to use: */ -/* # define DATASTART ((ptr_t)((((word) (&etext)) + 0x1fff) & ~0x1fff)) */ - /* This fails occasionally, due to an ancient, but very */ - /* persistent ld bug. &etext is set 32 bytes too high. */ - /* We instead read the text segment size from the a.out */ - /* header, which happens to be mapped into our address space */ - /* at the start of the text segment. The detective work here */ - /* was done by Robert Ehrlich, Manuel Serrano, and Bernard */ - /* Serpette of INRIA. */ - /* This assumes ZMAGIC, i.e. demand-loadable executables. */ -# define TEXTSTART 0x2000 -# define DATASTART ((ptr_t)(*(int *)(TEXTSTART+0x4)+TEXTSTART)) -# define MPROTECT_VDB -# define HEURISTIC1 -# endif -# ifdef DRSNX -# define CPP_WORDSZ 32 -# define OS_TYPE "DRSNX" - extern char * GC_SysVGetDataStart(); - extern int etext; -# define DATASTART (ptr_t)GC_SysVGetDataStart(0x10000, &etext) -# define MPROTECT_VDB -# define STACKBOTTOM ((ptr_t) 0xdfff0000) -# endif -# define DYNAMIC_LOADING -# endif - -# ifdef I386 -# define MACH_TYPE "I386" -# define ALIGNMENT 4 /* Appears to hold for all "32 bit" compilers */ - /* except Borland. The -a4 option fixes */ - /* Borland. */ - /* Ivan Demakov: For Watcom the option is -zp4. */ -# ifndef SMALL_CONFIG -# define ALIGN_DOUBLE /* Not strictly necessary, but may give speed */ - /* improvement on Pentiums. */ -# endif -# ifdef SEQUENT -# define OS_TYPE "SEQUENT" - extern int etext; -# define DATASTART ((ptr_t)((((word) (&etext)) + 0xfff) & ~0xfff)) -# define STACKBOTTOM ((ptr_t) 0x3ffff000) -# endif -# ifdef SUNOS5 -# define OS_TYPE "SUNOS5" - extern int etext, _start; - extern char * GC_SysVGetDataStart(); -# define DATASTART GC_SysVGetDataStart(0x1000, &etext) -# define STACKBOTTOM ((ptr_t)(&_start)) -/** At least in Solaris 2.5, PROC_VDB gives wrong values for dirty bits. */ -/*# define PROC_VDB*/ -# define DYNAMIC_LOADING -# ifndef USE_MMAP -# define USE_MMAP -# endif -# ifdef USE_MMAP -# define HEAP_START (ptr_t)0x40000000 -# else -# define HEAP_START DATAEND -# endif -# endif -# ifdef SCO -# define OS_TYPE "SCO" - extern int etext; -# define DATASTART ((ptr_t)((((word) (&etext)) + 0x3fffff) \ - & ~0x3fffff) \ - +((word)&etext & 0xfff)) -# define STACKBOTTOM ((ptr_t) 0x7ffffffc) -# endif -# ifdef SCO_ELF -# define OS_TYPE "SCO_ELF" - extern int etext; -# define DATASTART ((ptr_t)(&etext)) -# define STACKBOTTOM ((ptr_t) 0x08048000) -# define DYNAMIC_LOADING -# define ELF_CLASS ELFCLASS32 -# endif -# ifdef LINUX -# define OS_TYPE "LINUX" -# define STACKBOTTOM ((ptr_t)0xc0000000) - /* Appears to be 0xe0000000 for at least one 2.1.91 kernel. */ - /* Probably needs to be more flexible, but I don't yet */ - /* fully understand how flexible. */ -# define MPROTECT_VDB -# ifdef __ELF__ -# define DYNAMIC_LOADING -# ifdef UNDEFINED /* includes ro data */ - extern int _etext; -# define DATASTART ((ptr_t)((((word) (&_etext)) + 0xfff) & ~0xfff)) -# endif -# include <linux/version.h> -# include <features.h> -# if LINUX_VERSION_CODE >= 0x20000 && defined(__GLIBC__) && __GLIBC__ >= 2 - extern int __data_start; -# define DATASTART ((ptr_t)(&__data_start)) -# else - extern char **__environ; -# define DATASTART ((ptr_t)(&__environ)) - /* hideous kludge: __environ is the first */ - /* word in crt0.o, and delimits the start */ - /* of the data segment, no matter which */ - /* ld options were passed through. */ - /* We could use _etext instead, but that */ - /* would include .rodata, which may */ - /* contain large read-only data tables */ - /* that we'd rather not scan. */ -# endif - extern int _end; -# define DATAEND (&_end) -# else - extern int etext; -# define DATASTART ((ptr_t)((((word) (&etext)) + 0xfff) & ~0xfff)) -# endif -# endif -# ifdef CYGWIN32 - extern int _data_start__; - extern int _data_end__; - extern int _bss_start__; - extern int _bss_end__; - /* For binutils 2.9.1, we have */ - /* DATASTART = _data_start__ */ - /* DATAEND = _bss_end__ */ - /* whereas for some earlier versions it was */ - /* DATASTART = _bss_start__ */ - /* DATAEND = _data_end__ */ - /* To get it right for both, we take the */ - /* minumum/maximum of the two. */ -# define MAX(x,y) ((x) > (y) ? (x) : (y)) -# define MIN(x,y) ((x) < (y) ? (x) : (y)) -# define DATASTART ((ptr_t) MIN(_data_start__, _bss_start__)) -# define DATAEND ((ptr_t) MAX(_data_end__, _bss_end__)) -# undef STACK_GRAN -# define STACK_GRAN 0x10000 -# define HEURISTIC1 -# endif -# ifdef OS2 -# define OS_TYPE "OS2" - /* STACKBOTTOM and DATASTART are handled specially in */ - /* os_dep.c. OS2 actually has the right */ - /* system call! */ -# define DATAEND /* not needed */ -# endif -# ifdef MSWIN32 -# define OS_TYPE "MSWIN32" - /* STACKBOTTOM and DATASTART are handled specially in */ - /* os_dep.c. */ -# ifndef __WATCOMC__ -# define MPROTECT_VDB -# endif -# define DATAEND /* not needed */ -# endif -# ifdef DJGPP -# define OS_TYPE "DJGPP" -# include "stubinfo.h" - extern int etext; - extern int _stklen; -# define DATASTART ((ptr_t)((((word) (&etext)) + 0x1ff) & ~0x1ff)) -# define STACKBOTTOM ((ptr_t)((word) _stubinfo + _stubinfo->size \ - + _stklen)) - /* This may not be right. */ -# endif -# ifdef FREEBSD -# define OS_TYPE "FREEBSD" -# define MPROTECT_VDB -# endif -# ifdef NETBSD -# define OS_TYPE "NETBSD" -# endif -# ifdef THREE86BSD -# define OS_TYPE "THREE86BSD" -# endif -# ifdef BSDI -# define OS_TYPE "BSDI" -# endif -# if defined(FREEBSD) || defined(NETBSD) \ - || defined(THREE86BSD) || defined(BSDI) -# define HEURISTIC2 - extern char etext; -# define DATASTART ((ptr_t)(&etext)) -# endif -# ifdef NEXT -# define OS_TYPE "NEXT" -# define DATASTART ((ptr_t) get_etext()) -# define STACKBOTTOM ((ptr_t)0xc0000000) -# define DATAEND /* not needed */ -# endif -# ifdef DOS4GW -# define OS_TYPE "DOS4GW" - /* Get_DATASTART, Get_DATAEND, Get_STACKBOTTOM - * Defined in gc-watcom.asm - */ - extern char* Get_DATASTART (void); - extern char* Get_DATAEND (void); - extern char* Get_STACKBOTTOM (void); -# pragma aux Get_DATASTART "*" value [eax]; -# pragma aux Get_DATAEND "*" value [eax]; -# pragma aux Get_STACKBOTTOM "*" value [eax]; -# define DATASTART ((ptr_t) Get_DATASTART()) -# define STACKBOTTOM ((ptr_t) Get_STACKBOTTOM()) -# define DATAEND ((ptr_t) Get_DATAEND()) -# endif -# endif - -# ifdef NS32K -# define MACH_TYPE "NS32K" -# define ALIGNMENT 4 - extern char **environ; -# define DATASTART ((ptr_t)(&environ)) - /* hideous kludge: environ is the first */ - /* word in crt0.o, and delimits the start */ - /* of the data segment, no matter which */ - /* ld options were passed through. */ -# define STACKBOTTOM ((ptr_t) 0xfffff000) /* for Encore */ -# endif - -# ifdef MIPS -# define MACH_TYPE "MIPS" -# ifndef IRIX5 -# define DATASTART (ptr_t)0x10000000 - /* Could probably be slightly higher since */ - /* startup code allocates lots of stuff. */ -# else - extern int _fdata; -# define DATASTART ((ptr_t)(&_fdata)) -# ifdef USE_MMAP -# define HEAP_START (ptr_t)0x30000000 -# else -# define HEAP_START DATASTART -# endif - /* Lowest plausible heap address. */ - /* In the MMAP case, we map there. */ - /* In either case it is used to identify */ - /* heap sections so they're not */ - /* considered as roots. */ -# endif /* IRIX5 */ -# define HEURISTIC2 -/* # define STACKBOTTOM ((ptr_t)0x7fff8000) sometimes also works. */ -# ifdef ULTRIX -# define OS_TYPE "ULTRIX" -# define ALIGNMENT 4 -# endif -# ifdef RISCOS -# define OS_TYPE "RISCOS" -# define ALIGNMENT 4 /* Required by hardware */ -# endif -# ifdef IRIX5 -# define OS_TYPE "IRIX5" -# define MPROTECT_VDB -# ifdef _MIPS_SZPTR -# define CPP_WORDSZ _MIPS_SZPTR -# define ALIGNMENT (_MIPS_SZPTR/8) -# if CPP_WORDSZ != 64 -# define ALIGN_DOUBLE -# endif -# else -# define ALIGNMENT 4 -# define ALIGN_DOUBLE -# endif -# define DYNAMIC_LOADING -# endif -# endif - -# ifdef RS6000 -# define MACH_TYPE "RS6000" -# define ALIGNMENT 4 -# define DATASTART ((ptr_t)0x20000000) - extern int errno; -# define STACKBOTTOM ((ptr_t)((ulong)&errno)) -# define DYNAMIC_LOADING - /* For really old versions of AIX, this may have to be removed. */ -# endif - -# ifdef HP_PA -# define MACH_TYPE "HP_PA" -# define ALIGNMENT 4 -# define ALIGN_DOUBLE - extern int __data_start; -# define DATASTART ((ptr_t)(&__data_start)) -# if 0 - /* The following appears to work for 7xx systems running HP/UX */ - /* 9.xx Furthermore, it might result in much faster */ - /* collections than HEURISTIC2, which may involve scanning */ - /* segments that directly precede the stack. It is not the */ - /* default, since it may not work on older machine/OS */ - /* combinations. (Thanks to Raymond X.T. Nijssen for uncovering */ - /* this.) */ -# define STACKBOTTOM ((ptr_t) 0x7b033000) /* from /etc/conf/h/param.h */ -# else -# define HEURISTIC2 -# endif -# define STACK_GROWS_UP -# define DYNAMIC_LOADING -# include <unistd.h> -# define GETPAGESIZE() sysconf(_SC_PAGE_SIZE) - /* They misspelled the Posix macro? */ -# endif - -# ifdef ALPHA -# define MACH_TYPE "ALPHA" -# define ALIGNMENT 8 -# ifdef OSF1 -# define OS_TYPE "OSF1" -# define DATASTART ((ptr_t) 0x140000000) -# define HEURISTIC2 - /* Normally HEURISTIC2 is too conervative, since */ - /* the text segment immediately follows the stack. */ - /* Hence we give an upper pound. */ - extern __start; -# define HEURISTIC2_LIMIT ((ptr_t)((word)(&__start) & ~(getpagesize()-1))) -# define CPP_WORDSZ 64 -# define MPROTECT_VDB -# define DYNAMIC_LOADING -# endif -# ifdef LINUX -# define OS_TYPE "LINUX" -# define CPP_WORDSZ 64 -# define STACKBOTTOM ((ptr_t) 0x120000000) -# ifdef __ELF__ - extern int __data_start; -# define DATASTART &__data_start -# define DYNAMIC_LOADING -# else -# define DATASTART ((ptr_t) 0x140000000) -# endif - extern int _end; -# define DATAEND (&_end) - /* As of 1.3.90, I couldn't find a way to retrieve the correct */ - /* fault address from a signal handler. */ - /* Hence MPROTECT_VDB is broken. */ -# endif -# endif - -# ifdef M88K -# define MACH_TYPE "M88K" -# define ALIGNMENT 4 -# define ALIGN_DOUBLE - extern int etext; -# ifdef CX_UX -# define OS_TYPE "CX_UX" -# define DATASTART ((((word)&etext + 0x3fffff) & ~0x3fffff) + 0x10000) -# endif -# ifdef DGUX -# define OS_TYPE "DGUX" - extern char * GC_SysVGetDataStart(); -# define DATASTART (ptr_t)GC_SysVGetDataStart(0x10000, &etext) -# endif -# define STACKBOTTOM ((char*)0xf0000000) /* determined empirically */ -# endif - -# ifdef S370 -# define MACH_TYPE "S370" -# define OS_TYPE "UTS4" -# define ALIGNMENT 4 /* Required by hardware */ - extern int etext; - extern int _etext; - extern int _end; - extern char * GC_SysVGetDataStart(); -# define DATASTART (ptr_t)GC_SysVGetDataStart(0x10000, &_etext) -# define DATAEND (&_end) -# define HEURISTIC2 -# endif - -# ifndef STACK_GROWS_UP -# define STACK_GROWS_DOWN -# endif - -# ifndef CPP_WORDSZ -# define CPP_WORDSZ 32 -# endif - -# ifndef OS_TYPE -# define OS_TYPE "" -# endif - -# ifndef DATAEND - extern int end; -# define DATAEND (&end) -# endif - -# if defined(SVR4) && !defined(GETPAGESIZE) -# include <unistd.h> -# define GETPAGESIZE() sysconf(_SC_PAGESIZE) -# endif - -# ifndef GETPAGESIZE -# if defined(SUNOS5) || defined(IRIX5) -# include <unistd.h> -# endif -# define GETPAGESIZE() getpagesize() -# endif - -# if defined(SUNOS5) || defined(DRSNX) || defined(UTS4) - /* OS has SVR4 generic features. Probably others also qualify. */ -# define SVR4 -# endif - -# if defined(SUNOS5) || defined(DRSNX) - /* OS has SUNOS5 style semi-undocumented interface to dynamic */ - /* loader. */ -# define SUNOS5DL - /* OS has SUNOS5 style signal handlers. */ -# define SUNOS5SIGS -# endif - -# if CPP_WORDSZ != 32 && CPP_WORDSZ != 64 - -> bad word size -# endif - -# ifdef PCR -# undef DYNAMIC_LOADING -# undef STACKBOTTOM -# undef HEURISTIC1 -# undef HEURISTIC2 -# undef PROC_VDB -# undef MPROTECT_VDB -# define PCR_VDB -# endif - -# ifdef SRC_M3 -/* Postponed for now. */ -# undef PROC_VDB -# undef MPROTECT_VDB -# endif - -# ifdef SMALL_CONFIG -/* Presumably not worth the space it takes. */ -# undef PROC_VDB -# undef MPROTECT_VDB -# endif - -# if !defined(PCR_VDB) && !defined(PROC_VDB) && !defined(MPROTECT_VDB) -# define DEFAULT_VDB -# endif - -# if defined(IRIX_THREADS) && !defined(IRIX5) ---> inconsistent configuration -# endif -# if defined(LINUX_THREADS) && !defined(LINUX) ---> inconsistent configuration -# endif -# if defined(SOLARIS_THREADS) && !defined(SUNOS5) ---> inconsistent configuration -# endif -# if defined(PCR) || defined(SRC_M3) || \ - defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || \ - defined(IRIX_THREADS) || defined(LINUX_THREADS) -# define THREADS -# endif - -# if defined(SPARC) -# define SAVE_CALL_CHAIN -# define ASM_CLEAR_CODE /* Stack clearing is crucial, and we */ - /* include assembly code to do it well. */ -# endif - -# endif diff --git a/boehm-gc/irix_threads.c b/boehm-gc/irix_threads.c deleted file mode 100644 index 5efca211034..00000000000 --- a/boehm-gc/irix_threads.c +++ /dev/null @@ -1,674 +0,0 @@ -/* - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * Copyright (c) 1996 by Silicon Graphics. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED - * OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program - * for any purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is granted, - * provided the above notices are retained, and a notice that the code was - * modified is included with the above copyright notice. - */ -/* - * Support code for Irix (>=6.2) Pthreads. This relies on properties - * not guaranteed by the Pthread standard. It may or may not be portable - * to other implementations. - * - * Note that there is a lot of code duplication between linux_threads.c - * and irix_threads.c; any changes made here may need to be reflected - * there too. - */ - -# if defined(IRIX_THREADS) - -# include "gc_priv.h" -# include <pthread.h> -# include <semaphore.h> -# include <time.h> -# include <errno.h> -# include <unistd.h> -# include <sys/mman.h> -# include <sys/time.h> - -#undef pthread_create -#undef pthread_sigmask -#undef pthread_join - -void GC_thr_init(); - -#if 0 -void GC_print_sig_mask() -{ - sigset_t blocked; - int i; - - if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0) - ABORT("pthread_sigmask"); - GC_printf0("Blocked: "); - for (i = 1; i <= MAXSIG; i++) { - if (sigismember(&blocked, i)) { GC_printf1("%ld ",(long) i); } - } - GC_printf0("\n"); -} -#endif - -/* We use the allocation lock to protect thread-related data structures. */ - -/* The set of all known threads. We intercept thread creation and */ -/* joins. We never actually create detached threads. We allocate all */ -/* new thread stacks ourselves. These allow us to maintain this */ -/* data structure. */ -/* Protected by GC_thr_lock. */ -/* Some of this should be declared volatile, but that's incosnsistent */ -/* with some library routine declarations. */ -typedef struct GC_Thread_Rep { - struct GC_Thread_Rep * next; /* More recently allocated threads */ - /* with a given pthread id come */ - /* first. (All but the first are */ - /* guaranteed to be dead, but we may */ - /* not yet have registered the join.) */ - pthread_t id; - word stop; -# define NOT_STOPPED 0 -# define PLEASE_STOP 1 -# define STOPPED 2 - word flags; -# define FINISHED 1 /* Thread has exited. */ -# define DETACHED 2 /* Thread is intended to be detached. */ -# define CLIENT_OWNS_STACK 4 - /* Stack was supplied by client. */ - ptr_t stack; - ptr_t stack_ptr; /* Valid only when stopped. */ - /* But must be within stack region at */ - /* all times. */ - size_t stack_size; /* 0 for original thread. */ - void * status; /* Used only to avoid premature */ - /* reclamation of any data it might */ - /* reference. */ -} * GC_thread; - -GC_thread GC_lookup_thread(pthread_t id); - -/* - * The only way to suspend threads given the pthread interface is to send - * signals. Unfortunately, this means we have to reserve - * a signal, and intercept client calls to change the signal mask. - */ -# define SIG_SUSPEND (SIGRTMIN + 6) - -pthread_mutex_t GC_suspend_lock = PTHREAD_MUTEX_INITIALIZER; - /* Number of threads stopped so far */ -pthread_cond_t GC_suspend_ack_cv = PTHREAD_COND_INITIALIZER; -pthread_cond_t GC_continue_cv = PTHREAD_COND_INITIALIZER; - -void GC_suspend_handler(int sig) -{ - int dummy; - GC_thread me; - sigset_t all_sigs; - sigset_t old_sigs; - int i; - - if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler"); - me = GC_lookup_thread(pthread_self()); - /* The lookup here is safe, since I'm doing this on behalf */ - /* of a thread which holds the allocation lock in order */ - /* to stop the world. Thus concurrent modification of the */ - /* data structure is impossible. */ - if (PLEASE_STOP != me -> stop) { - /* Misdirected signal. */ - pthread_mutex_unlock(&GC_suspend_lock); - return; - } - pthread_mutex_lock(&GC_suspend_lock); - me -> stack_ptr = (ptr_t)(&dummy); - me -> stop = STOPPED; - pthread_cond_signal(&GC_suspend_ack_cv); - pthread_cond_wait(&GC_continue_cv, &GC_suspend_lock); - pthread_mutex_unlock(&GC_suspend_lock); - /* GC_printf1("Continuing 0x%x\n", pthread_self()); */ -} - - -GC_bool GC_thr_initialized = FALSE; - -size_t GC_min_stack_sz; - -size_t GC_page_sz; - -# define N_FREE_LISTS 25 -ptr_t GC_stack_free_lists[N_FREE_LISTS] = { 0 }; - /* GC_stack_free_lists[i] is free list for stacks of */ - /* size GC_min_stack_sz*2**i. */ - /* Free lists are linked through first word. */ - -/* Return a stack of size at least *stack_size. *stack_size is */ -/* replaced by the actual stack size. */ -/* Caller holds allocation lock. */ -ptr_t GC_stack_alloc(size_t * stack_size) -{ - register size_t requested_sz = *stack_size; - register size_t search_sz = GC_min_stack_sz; - register int index = 0; /* = log2(search_sz/GC_min_stack_sz) */ - register ptr_t result; - - while (search_sz < requested_sz) { - search_sz *= 2; - index++; - } - if ((result = GC_stack_free_lists[index]) == 0 - && (result = GC_stack_free_lists[index+1]) != 0) { - /* Try next size up. */ - search_sz *= 2; index++; - } - if (result != 0) { - GC_stack_free_lists[index] = *(ptr_t *)result; - } else { - result = (ptr_t) GC_scratch_alloc(search_sz + 2*GC_page_sz); - result = (ptr_t)(((word)result + GC_page_sz) & ~(GC_page_sz - 1)); - /* Protect hottest page to detect overflow. */ - /* mprotect(result, GC_page_sz, PROT_NONE); */ - result += GC_page_sz; - } - *stack_size = search_sz; - return(result); -} - -/* Caller holds allocation lock. */ -void GC_stack_free(ptr_t stack, size_t size) -{ - register int index = 0; - register size_t search_sz = GC_min_stack_sz; - - while (search_sz < size) { - search_sz *= 2; - index++; - } - if (search_sz != size) ABORT("Bad stack size"); - *(ptr_t *)stack = GC_stack_free_lists[index]; - GC_stack_free_lists[index] = stack; -} - - - -# define THREAD_TABLE_SZ 128 /* Must be power of 2 */ -volatile GC_thread GC_threads[THREAD_TABLE_SZ]; - -/* Add a thread to GC_threads. We assume it wasn't already there. */ -/* Caller holds allocation lock. */ -GC_thread GC_new_thread(pthread_t id) -{ - int hv = ((word)id) % THREAD_TABLE_SZ; - GC_thread result; - static struct GC_Thread_Rep first_thread; - static GC_bool first_thread_used = FALSE; - - if (!first_thread_used) { - result = &first_thread; - first_thread_used = TRUE; - /* Dont acquire allocation lock, since we may already hold it. */ - } else { - result = (struct GC_Thread_Rep *) - GC_generic_malloc_inner(sizeof(struct GC_Thread_Rep), NORMAL); - } - if (result == 0) return(0); - result -> id = id; - result -> next = GC_threads[hv]; - GC_threads[hv] = result; - /* result -> flags = 0; */ - /* result -> stop = 0; */ - return(result); -} - -/* Delete a thread from GC_threads. We assume it is there. */ -/* (The code intentionally traps if it wasn't.) */ -/* Caller holds allocation lock. */ -void GC_delete_thread(pthread_t id) -{ - int hv = ((word)id) % THREAD_TABLE_SZ; - register GC_thread p = GC_threads[hv]; - register GC_thread prev = 0; - - while (!pthread_equal(p -> id, id)) { - prev = p; - p = p -> next; - } - if (prev == 0) { - GC_threads[hv] = p -> next; - } else { - prev -> next = p -> next; - } -} - -/* If a thread has been joined, but we have not yet */ -/* been notified, then there may be more than one thread */ -/* in the table with the same pthread id. */ -/* This is OK, but we need a way to delete a specific one. */ -void GC_delete_gc_thread(pthread_t id, GC_thread gc_id) -{ - int hv = ((word)id) % THREAD_TABLE_SZ; - register GC_thread p = GC_threads[hv]; - register GC_thread prev = 0; - - while (p != gc_id) { - prev = p; - p = p -> next; - } - if (prev == 0) { - GC_threads[hv] = p -> next; - } else { - prev -> next = p -> next; - } -} - -/* Return a GC_thread corresponding to a given thread_t. */ -/* Returns 0 if it's not there. */ -/* Caller holds allocation lock or otherwise inhibits */ -/* updates. */ -/* If there is more than one thread with the given id we */ -/* return the most recent one. */ -GC_thread GC_lookup_thread(pthread_t id) -{ - int hv = ((word)id) % THREAD_TABLE_SZ; - register GC_thread p = GC_threads[hv]; - - while (p != 0 && !pthread_equal(p -> id, id)) p = p -> next; - return(p); -} - - -/* Caller holds allocation lock. */ -void GC_stop_world() -{ - pthread_t my_thread = pthread_self(); - register int i; - register GC_thread p; - register int result; - struct timespec timeout; - - for (i = 0; i < THREAD_TABLE_SZ; i++) { - for (p = GC_threads[i]; p != 0; p = p -> next) { - if (p -> id != my_thread) { - if (p -> flags & FINISHED) { - p -> stop = STOPPED; - continue; - } - p -> stop = PLEASE_STOP; - result = pthread_kill(p -> id, SIG_SUSPEND); - /* GC_printf1("Sent signal to 0x%x\n", p -> id); */ - switch(result) { - case ESRCH: - /* Not really there anymore. Possible? */ - p -> stop = STOPPED; - break; - case 0: - break; - default: - ABORT("pthread_kill failed"); - } - } - } - } - pthread_mutex_lock(&GC_suspend_lock); - for (i = 0; i < THREAD_TABLE_SZ; i++) { - for (p = GC_threads[i]; p != 0; p = p -> next) { - while (p -> id != my_thread && p -> stop != STOPPED) { - clock_gettime(CLOCK_REALTIME, &timeout); - timeout.tv_nsec += 50000000; /* 50 msecs */ - if (timeout.tv_nsec >= 1000000000) { - timeout.tv_nsec -= 1000000000; - ++timeout.tv_sec; - } - result = pthread_cond_timedwait(&GC_suspend_ack_cv, - &GC_suspend_lock, - &timeout); - if (result == ETIMEDOUT) { - /* Signal was lost or misdirected. Try again. */ - /* Duplicate signals should be benign. */ - result = pthread_kill(p -> id, SIG_SUSPEND); - } - } - } - } - pthread_mutex_unlock(&GC_suspend_lock); - /* GC_printf1("World stopped 0x%x\n", pthread_self()); */ -} - -/* Caller holds allocation lock. */ -void GC_start_world() -{ - GC_thread p; - unsigned i; - - /* GC_printf0("World starting\n"); */ - for (i = 0; i < THREAD_TABLE_SZ; i++) { - for (p = GC_threads[i]; p != 0; p = p -> next) { - p -> stop = NOT_STOPPED; - } - } - pthread_mutex_lock(&GC_suspend_lock); - /* All other threads are at pthread_cond_wait in signal handler. */ - /* Otherwise we couldn't have acquired the lock. */ - pthread_mutex_unlock(&GC_suspend_lock); - pthread_cond_broadcast(&GC_continue_cv); -} - -# ifdef MMAP_STACKS ---> not really supported yet. -int GC_is_thread_stack(ptr_t addr) -{ - register int i; - register GC_thread p; - - for (i = 0; i < THREAD_TABLE_SZ; i++) { - for (p = GC_threads[i]; p != 0; p = p -> next) { - if (p -> stack_size != 0) { - if (p -> stack <= addr && - addr < p -> stack + p -> stack_size) - return 1; - } - } - } - return 0; -} -# endif - -/* We hold allocation lock. We assume the world is stopped. */ -void GC_push_all_stacks() -{ - register int i; - register GC_thread p; - register ptr_t sp = GC_approx_sp(); - register ptr_t lo, hi; - pthread_t me = pthread_self(); - - if (!GC_thr_initialized) GC_thr_init(); - /* GC_printf1("Pushing stacks from thread 0x%x\n", me); */ - for (i = 0; i < THREAD_TABLE_SZ; i++) { - for (p = GC_threads[i]; p != 0; p = p -> next) { - if (p -> flags & FINISHED) continue; - if (pthread_equal(p -> id, me)) { - lo = GC_approx_sp(); - } else { - lo = p -> stack_ptr; - } - if (p -> stack_size != 0) { - hi = p -> stack + p -> stack_size; - } else { - /* The original stack. */ - hi = GC_stackbottom; - } - GC_push_all_stack(lo, hi); - } - } -} - - -/* We hold the allocation lock. */ -void GC_thr_init() -{ - GC_thread t; - struct sigaction act; - - if (GC_thr_initialized) return; - GC_thr_initialized = TRUE; - GC_min_stack_sz = HBLKSIZE; - GC_page_sz = sysconf(_SC_PAGESIZE); - (void) sigaction(SIG_SUSPEND, 0, &act); - if (act.sa_handler != SIG_DFL) - ABORT("Previously installed SIG_SUSPEND handler"); - /* Install handler. */ - act.sa_handler = GC_suspend_handler; - act.sa_flags = SA_RESTART; - (void) sigemptyset(&act.sa_mask); - if (0 != sigaction(SIG_SUSPEND, &act, 0)) - ABORT("Failed to install SIG_SUSPEND handler"); - /* Add the initial thread, so we can stop it. */ - t = GC_new_thread(pthread_self()); - t -> stack_size = 0; - t -> stack_ptr = (ptr_t)(&t); - t -> flags = DETACHED; -} - -int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) -{ - sigset_t fudged_set; - - if (set != NULL && (how == SIG_BLOCK || how == SIG_SETMASK)) { - fudged_set = *set; - sigdelset(&fudged_set, SIG_SUSPEND); - set = &fudged_set; - } - return(pthread_sigmask(how, set, oset)); -} - -struct start_info { - void *(*start_routine)(void *); - void *arg; - word flags; - ptr_t stack; - size_t stack_size; - sem_t registered; /* 1 ==> in our thread table, but */ - /* parent hasn't yet noticed. */ -}; - -void GC_thread_exit_proc(void *arg) -{ - GC_thread me; - - LOCK(); - me = GC_lookup_thread(pthread_self()); - if (me -> flags & DETACHED) { - GC_delete_thread(pthread_self()); - } else { - me -> flags |= FINISHED; - } - UNLOCK(); -} - -int GC_pthread_join(pthread_t thread, void **retval) -{ - int result; - GC_thread thread_gc_id; - - LOCK(); - thread_gc_id = GC_lookup_thread(thread); - /* This is guaranteed to be the intended one, since the thread id */ - /* cant have been recycled by pthreads. */ - UNLOCK(); - result = pthread_join(thread, retval); - /* Some versions of the Irix pthreads library can erroneously */ - /* return EINTR when the call succeeds. */ - if (EINTR == result) result = 0; - LOCK(); - /* Here the pthread thread id may have been recycled. */ - GC_delete_gc_thread(thread, thread_gc_id); - UNLOCK(); - return result; -} - -void * GC_start_routine(void * arg) -{ - struct start_info * si = arg; - void * result; - GC_thread me; - pthread_t my_pthread; - void *(*start)(void *); - void *start_arg; - - my_pthread = pthread_self(); - /* If a GC occurs before the thread is registered, that GC will */ - /* ignore this thread. That's fine, since it will block trying to */ - /* acquire the allocation lock, and won't yet hold interesting */ - /* pointers. */ - LOCK(); - /* We register the thread here instead of in the parent, so that */ - /* we don't need to hold the allocation lock during pthread_create. */ - /* Holding the allocation lock there would make REDIRECT_MALLOC */ - /* impossible. It probably still doesn't work, but we're a little */ - /* closer ... */ - /* This unfortunately means that we have to be careful the parent */ - /* doesn't try to do a pthread_join before we're registered. */ - me = GC_new_thread(my_pthread); - me -> flags = si -> flags; - me -> stack = si -> stack; - me -> stack_size = si -> stack_size; - me -> stack_ptr = (ptr_t)si -> stack + si -> stack_size - sizeof(word); - UNLOCK(); - start = si -> start_routine; - start_arg = si -> arg; - sem_post(&(si -> registered)); - pthread_cleanup_push(GC_thread_exit_proc, 0); - result = (*start)(start_arg); - me -> status = result; - me -> flags |= FINISHED; - pthread_cleanup_pop(1); - /* This involves acquiring the lock, ensuring that we can't exit */ - /* while a collection that thinks we're alive is trying to stop */ - /* us. */ - return(result); -} - -int -GC_pthread_create(pthread_t *new_thread, - const pthread_attr_t *attr, - void *(*start_routine)(void *), void *arg) -{ - int result; - GC_thread t; - void * stack; - size_t stacksize; - pthread_attr_t new_attr; - int detachstate; - word my_flags = 0; - struct start_info * si = GC_malloc(sizeof(struct start_info)); - /* This is otherwise saved only in an area mmapped by the thread */ - /* library, which isn't visible to the collector. */ - - if (0 == si) return(ENOMEM); - sem_init(&(si -> registered), 0, 0); - si -> start_routine = start_routine; - si -> arg = arg; - LOCK(); - if (!GC_thr_initialized) GC_thr_init(); - if (NULL == attr) { - stack = 0; - (void) pthread_attr_init(&new_attr); - } else { - new_attr = *attr; - pthread_attr_getstackaddr(&new_attr, &stack); - } - pthread_attr_getstacksize(&new_attr, &stacksize); - pthread_attr_getdetachstate(&new_attr, &detachstate); - if (stacksize < GC_min_stack_sz) ABORT("Stack too small"); - if (0 == stack) { - stack = (void *)GC_stack_alloc(&stacksize); - if (0 == stack) { - UNLOCK(); - return(ENOMEM); - } - pthread_attr_setstackaddr(&new_attr, stack); - } else { - my_flags |= CLIENT_OWNS_STACK; - } - if (PTHREAD_CREATE_DETACHED == detachstate) my_flags |= DETACHED; - si -> flags = my_flags; - si -> stack = stack; - si -> stack_size = stacksize; - result = pthread_create(new_thread, &new_attr, GC_start_routine, si); - if (0 == new_thread && !(my_flags & CLIENT_OWNS_STACK)) { - GC_stack_free(stack, stacksize); - } - UNLOCK(); - /* Wait until child has been added to the thread table. */ - /* This also ensures that we hold onto si until the child is done */ - /* with it. Thus it doesn't matter whether it is otherwise */ - /* visible to the collector. */ - if (0 != sem_wait(&(si -> registered))) ABORT("sem_wait failed"); - sem_destroy(&(si -> registered)); - /* pthread_attr_destroy(&new_attr); */ - return(result); -} - -GC_bool GC_collecting = 0; /* A hint that we're in the collector and */ - /* holding the allocation lock for an */ - /* extended period. */ - -/* Reasonably fast spin locks. Basically the same implementation */ -/* as STL alloc.h. This isn't really the right way to do this. */ -/* but until the POSIX scheduling mess gets straightened out ... */ - -unsigned long GC_allocate_lock = 0; - -#define SLEEP_THRESHOLD 3 - -void GC_lock() -{ -# define low_spin_max 30 /* spin cycles if we suspect uniprocessor */ -# define high_spin_max 1000 /* spin cycles for multiprocessor */ - static unsigned spin_max = low_spin_max; - unsigned my_spin_max; - static unsigned last_spins = 0; - unsigned my_last_spins; - volatile unsigned junk; -# define PAUSE junk *= junk; junk *= junk; junk *= junk; junk *= junk - int i; - - if (!GC_test_and_set(&GC_allocate_lock, 1)) { - return; - } - junk = 0; - my_spin_max = spin_max; - my_last_spins = last_spins; - for (i = 0; i < my_spin_max; i++) { - if (GC_collecting) goto yield; - if (i < my_last_spins/2 || GC_allocate_lock) { - PAUSE; - continue; - } - if (!GC_test_and_set(&GC_allocate_lock, 1)) { - /* - * got it! - * Spinning worked. Thus we're probably not being scheduled - * against the other process with which we were contending. - * Thus it makes sense to spin longer the next time. - */ - last_spins = i; - spin_max = high_spin_max; - return; - } - } - /* We are probably being scheduled against the other process. Sleep. */ - spin_max = low_spin_max; -yield: - for (i = 0;; ++i) { - if (!GC_test_and_set(&GC_allocate_lock, 1)) { - return; - } - if (i < SLEEP_THRESHOLD) { - sched_yield(); - } else { - struct timespec ts; - - if (i > 26) i = 26; - /* Don't wait for more than about 60msecs, even */ - /* under extreme contention. */ - ts.tv_sec = 0; - ts.tv_nsec = 1 << i; - nanosleep(&ts, 0); - } - } -} - - - -# else - -#ifndef LINT - int GC_no_Irix_threads; -#endif - -# endif /* IRIX_THREADS */ - diff --git a/boehm-gc/linux_threads.c b/boehm-gc/linux_threads.c index 4d98062d11c..d6cab0af4ce 100644 --- a/boehm-gc/linux_threads.c +++ b/boehm-gc/linux_threads.c @@ -36,16 +36,26 @@ # if defined(LINUX_THREADS) # include <pthread.h> +# include <sched.h> # include <time.h> # include <errno.h> # include <unistd.h> # include <sys/mman.h> # include <sys/time.h> # include <semaphore.h> +# include <signal.h> + +#ifdef USE_LD_WRAP +# define WRAP_FUNC(f) __wrap_##f +# define REAL_FUNC(f) __real_##f +#else +# define WRAP_FUNC(f) GC_##f +# define REAL_FUNC(f) f +# undef pthread_create +# undef pthread_sigmask +# undef pthread_join +#endif -#undef pthread_create -#undef pthread_sigmask -#undef pthread_join void GC_thr_init(); @@ -86,8 +96,12 @@ typedef struct GC_Thread_Rep { # define DETACHED 2 /* Thread is intended to be detached. */ # define MAIN_THREAD 4 /* True for the original thread only. */ - ptr_t stack_end; - ptr_t stack_ptr; /* Valid only when stopped. */ + ptr_t stack_end; /* Cold end of the stack. */ + ptr_t stack_ptr; /* Valid only when stopped. */ +# ifdef IA64 + ptr_t backing_store_end; + ptr_t backing_store_ptr; +# endif int signal; void * status; /* The value returned from the thread. */ /* Used only to avoid premature */ @@ -138,6 +152,10 @@ static inline ptr_t GC_linux_thread_top_of_stack(void) return tos; } +#ifdef IA64 + extern word GC_save_regs_in_stack(); +#endif + void GC_suspend_handler(int sig) { int dummy; @@ -160,7 +178,9 @@ void GC_suspend_handler(int sig) /* to stop the world. Thus concurrent modification of the */ /* data structure is impossible. */ me -> stack_ptr = (ptr_t)(&dummy); - me -> stack_end = GC_linux_thread_top_of_stack(); +# ifdef IA64 + me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack(); +# endif /* Tell the thread that wants to stop the world that this */ /* thread has been stopped. Note that sem_post() is */ @@ -173,11 +193,11 @@ void GC_suspend_handler(int sig) /* is no race. */ if (sigfillset(&mask) != 0) ABORT("sigfillset() failed"); if (sigdelset(&mask, SIG_RESTART) != 0) ABORT("sigdelset() failed"); -#ifdef NO_SIGNALS - if (sigdelset(&mask, SIGINT) != 0) ABORT("sigdelset() failed"); - if (sigdelset(&mask, SIGQUIT) != 0) ABORT("sigdelset() failed"); - if (sigdelset(&mask, SIGTERM) != 0) ABORT("sigdelset() failed"); -#endif +# ifdef NO_SIGNALS + if (sigdelset(&mask, SIGINT) != 0) ABORT("sigdelset() failed"); + if (sigdelset(&mask, SIGQUIT) != 0) ABORT("sigdelset() failed"); + if (sigdelset(&mask, SIGTERM) != 0) ABORT("sigdelset() failed"); +# endif do { me->signal = 0; sigsuspend(&mask); /* Wait for signal */ @@ -380,13 +400,21 @@ void GC_start_world() #endif } -/* We hold allocation lock. We assume the world is stopped. */ +# ifdef IA64 +# define IF_IA64(x) x +# else +# define IF_IA64(x) +# endif +/* We hold allocation lock. Should do exactly the right thing if the */ +/* world is stopped. Should not fail if it isn't. */ void GC_push_all_stacks() { - register int i; - register GC_thread p; - register ptr_t sp = GC_approx_sp(); - register ptr_t lo, hi; + int i; + GC_thread p; + ptr_t sp = GC_approx_sp(); + ptr_t lo, hi; + /* On IA64, we also need to scan the register backing store. */ + IF_IA64(ptr_t bs_lo; ptr_t bs_hi;) pthread_t me = pthread_self(); if (!GC_thr_initialized) GC_thr_init(); @@ -398,25 +426,33 @@ void GC_push_all_stacks() if (p -> flags & FINISHED) continue; if (pthread_equal(p -> id, me)) { lo = GC_approx_sp(); + IF_IA64(bs_hi = (ptr_t)GC_save_regs_in_stack();) } else { lo = p -> stack_ptr; + IF_IA64(bs_hi = p -> backing_store_ptr;) } if ((p -> flags & MAIN_THREAD) == 0) { - if (pthread_equal(p -> id, me)) { - hi = GC_linux_thread_top_of_stack(); - } else { - hi = p -> stack_end; - } + hi = p -> stack_end; + IF_IA64(bs_lo = p -> backing_store_end); } else { /* The original stack. */ hi = GC_stackbottom; + IF_IA64(bs_lo = BACKING_STORE_BASE;) } #if DEBUG_THREADS GC_printf3("Stack for thread 0x%lx = [%lx,%lx)\n", (unsigned long) p -> id, (unsigned long) lo, (unsigned long) hi); #endif + if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n"); GC_push_all_stack(lo, hi); +# ifdef IA64 + if (pthread_equal(p -> id, me)) { + GC_push_all_eager(bs_lo, bs_hi); + } else { + GC_push_all_stack(bs_lo, bs_hi); + } +# endif } } } @@ -425,6 +461,7 @@ void GC_push_all_stacks() /* We hold the allocation lock. */ void GC_thr_init() { + int dummy; GC_thread t; struct sigaction act; @@ -439,19 +476,13 @@ void GC_thr_init() ABORT("sigfillset() failed"); } -#ifdef NO_SIGNALS - if (sigdelset(&act.sa_mask, SIGINT) != 0) { - ABORT("sigdelset() failed"); - } - - if (sigdelset(&act.sa_mask, SIGQUIT) != 0) { - ABORT("sigdelset() failed"); - } - - if (sigdelset(&act.sa_mask, SIGTERM) != 0) { - ABORT("sigdelset() failed"); - } -#endif +# ifdef NO_SIGNALS + if (sigdelset(&act.sa_mask, SIGINT) != 0 + || sigdelset(&act.sa_mask, SIGQUIT != 0) + || sigdelset(&act.sa_mask, SIGTERM != 0)) { + ABORT("sigdelset() failed"); + } +# endif /* SIG_RESTART is unmasked by the handler when necessary. */ act.sa_handler = GC_suspend_handler; @@ -466,11 +497,11 @@ void GC_thr_init() /* Add the initial thread, so we can stop it. */ t = GC_new_thread(pthread_self()); - t -> stack_ptr = 0; + t -> stack_ptr = (ptr_t)(&dummy); t -> flags = DETACHED | MAIN_THREAD; } -int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) +int WRAP_FUNC(pthread_sigmask)(int how, const sigset_t *set, sigset_t *oset) { sigset_t fudged_set; @@ -479,7 +510,7 @@ int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) sigdelset(&fudged_set, SIG_SUSPEND); set = &fudged_set; } - return(pthread_sigmask(how, set, oset)); + return(REAL_FUNC(pthread_sigmask)(how, set, oset)); } struct start_info { @@ -503,10 +534,25 @@ void GC_thread_exit_proc(void *arg) } else { me -> flags |= FINISHED; } + if (GC_incremental && GC_collection_in_progress()) { + int old_gc_no = GC_gc_no; + + /* Make sure that no part of our stack is still on the mark stack, */ + /* since it's about to be unmapped. */ + while (GC_incremental && GC_collection_in_progress() + && old_gc_no == GC_gc_no) { + ENTER_GC(); + GC_collect_a_little_inner(1); + EXIT_GC(); + UNLOCK(); + sched_yield(); + LOCK(); + } + } UNLOCK(); } -int GC_pthread_join(pthread_t thread, void **retval) +int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval) { int result; GC_thread thread_gc_id; @@ -516,7 +562,7 @@ int GC_pthread_join(pthread_t thread, void **retval) /* This is guaranteed to be the intended one, since the thread id */ /* cant have been recycled by pthreads. */ UNLOCK(); - result = pthread_join(thread, retval); + result = REAL_FUNC(pthread_join)(thread, retval); LOCK(); /* Here the pthread thread id may have been recycled. */ GC_delete_gc_thread(thread, thread_gc_id); @@ -526,6 +572,7 @@ int GC_pthread_join(pthread_t thread, void **retval) void * GC_start_routine(void * arg) { + int dummy; struct start_info * si = arg; void * result; GC_thread me; @@ -534,22 +581,45 @@ void * GC_start_routine(void * arg) void *start_arg; my_pthread = pthread_self(); +# ifdef DEBUG_THREADS + GC_printf1("Starting thread 0x%lx\n", my_pthread); + GC_printf1("pid = %ld\n", (long) getpid()); + GC_printf1("sp = 0x%lx\n", (long) &arg); +# endif LOCK(); me = GC_new_thread(my_pthread); me -> flags = si -> flags; me -> stack_ptr = 0; - me -> stack_end = 0; + /* me -> stack_end = GC_linux_stack_base(); -- currently (11/99) */ + /* doesn't work because the stack base in /proc/self/stat is the */ + /* one for the main thread. There is a strong argument that that's */ + /* a kernel bug, but a pervasive one. */ +# ifdef STACK_GROWS_DOWN + me -> stack_end = (ptr_t)(((word)(&dummy) + (GC_page_size - 1)) + & ~(GC_page_size - 1)); + me -> stack_ptr = me -> stack_end - 0x10; + /* Needs to be plausible, since an asynchronous stack mark */ + /* should not crash. */ +# else + me -> stack_end = (ptr_t)(((word)(&dummy) & ~(GC_page_size - 1)); + me -> stack_ptr = me -> stack_end + 0x10; +# endif + /* This is dubious, since we may be more than a page into the stack, */ + /* and hence skip some of it, though it's not clear that matters. */ +# ifdef IA64 + me -> backing_store_end = (ptr_t) + (GC_save_regs_in_stack() & ~(GC_page_size - 1)); + /* This is also < 100% convincing. We should also read this */ + /* from /proc, but the hook to do so isn't there yet. */ +# endif /* IA64 */ UNLOCK(); start = si -> start_routine; - start_arg = si -> arg; - sem_post(&(si -> registered)); - pthread_cleanup_push(GC_thread_exit_proc, si); # ifdef DEBUG_THREADS - GC_printf1("Starting thread 0x%lx\n", pthread_self()); - GC_printf1("pid = %ld\n", (long) getpid()); - GC_printf1("sp = 0x%lx\n", (long) &arg); GC_printf1("start_routine = 0x%lx\n", start); # endif + start_arg = si -> arg; + sem_post(&(si -> registered)); + pthread_cleanup_push(GC_thread_exit_proc, si); result = (*start)(start_arg); #if DEBUG_THREADS GC_printf1("Finishing thread 0x%x\n", pthread_self()); @@ -564,7 +634,7 @@ void * GC_start_routine(void * arg) } int -GC_pthread_create(pthread_t *new_thread, +WRAP_FUNC(pthread_create)(pthread_t *new_thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) { @@ -596,7 +666,14 @@ GC_pthread_create(pthread_t *new_thread, if (PTHREAD_CREATE_DETACHED == detachstate) my_flags |= DETACHED; si -> flags = my_flags; UNLOCK(); - result = pthread_create(new_thread, &new_attr, GC_start_routine, si); +# ifdef DEBUG_THREADS + GC_printf1("About to start new thread from thread 0x%X\n", + pthread_self()); +# endif + result = REAL_FUNC(pthread_create)(new_thread, &new_attr, GC_start_routine, si); +# ifdef DEBUG_THREADS + GC_printf1("Started thread 0x%X\n", *new_thread); +# endif /* Wait until child has been added to the thread table. */ /* This also ensures that we hold onto si until the child is done */ /* with it. Thus it doesn't matter whether it is otherwise */ @@ -608,7 +685,9 @@ GC_pthread_create(pthread_t *new_thread, return(result); } -GC_bool GC_collecting = 0; +#if defined(USE_SPIN_LOCK) + +VOLATILE GC_bool GC_collecting = 0; /* A hint that we're in the collector and */ /* holding the allocation lock for an */ /* extended period. */ @@ -681,5 +760,7 @@ yield: } } +#endif /* known architecture */ + # endif /* LINUX_THREADS */ diff --git a/boehm-gc/mach_dep.c b/boehm-gc/mach_dep.c index 52f86346761..12c3f07603f 100644 --- a/boehm-gc/mach_dep.c +++ b/boehm-gc/mach_dep.c @@ -80,6 +80,24 @@ void GC_push_regs() # ifdef RT register long TMP_SP; /* must be bound to r11 */ # endif + +# if defined(MIPS) && defined(LINUX) + /* I'm not sure whether this has actually been tested. */ +# define call_push(x) asm("move $4," x ";"); asm("jal GC_push_one") + call_push("$2"); + call_push("$3"); + call_push("$16"); + call_push("$17"); + call_push("$18"); + call_push("$19"); + call_push("$20"); + call_push("$21"); + call_push("$22"); + call_push("$23"); + call_push("$30"); +# undef call_push +# endif /* MIPS && LINUX */ + # ifdef VAX /* VAX - generic code below does not work under 4.2 */ /* r1 through r5 are caller save, and therefore */ @@ -199,10 +217,11 @@ void GC_push_regs() # endif /* __MWERKS__ */ # endif /* MACOS */ -# if defined(I386) &&!defined(OS2) &&!defined(SVR4) &&!defined(MSWIN32) \ +# if defined(I386) &&!defined(OS2) &&!defined(SVR4) \ + && (defined(__MINGW32__) || !defined(MSWIN32)) \ && !defined(SCO) && !defined(SCO_ELF) \ && !(defined(LINUX) && defined(__ELF__)) \ - && !(defined(__FreeBSD__) && defined(__ELF__)) \ + && !(defined(FREEBSD) && defined(__ELF__)) \ && !defined(DOS4GW) /* I386 code, generic code does not appear to work */ /* It does appear to work under OS2, and asms dont */ @@ -217,20 +236,25 @@ void GC_push_regs() # endif # if ( defined(I386) && defined(LINUX) && defined(__ELF__) ) \ - || ( defined(I386) && defined(__FreeBSD__) && defined(__ELF__) ) + || ( defined(I386) && defined(FREEBSD) && defined(__ELF__) ) /* This is modified for Linux with ELF (Note: _ELF_ only) */ /* This section handles FreeBSD with ELF. */ - asm("pushl %eax"); asm("call GC_push_one"); asm("addl $4,%esp"); - asm("pushl %ecx"); asm("call GC_push_one"); asm("addl $4,%esp"); - asm("pushl %edx"); asm("call GC_push_one"); asm("addl $4,%esp"); - asm("pushl %ebp"); asm("call GC_push_one"); asm("addl $4,%esp"); - asm("pushl %esi"); asm("call GC_push_one"); asm("addl $4,%esp"); - asm("pushl %edi"); asm("call GC_push_one"); asm("addl $4,%esp"); - asm("pushl %ebx"); asm("call GC_push_one"); asm("addl $4,%esp"); + /* Eax is caller-save and dead here. Other caller-save */ + /* registers could also be skipped. We assume there are no */ + /* pointers in MMX registers, etc. */ + /* We combine instructions in a single asm to prevent gcc from */ + /* inserting code in the middle. */ + asm("pushl %ecx; call GC_push_one; addl $4,%esp"); + asm("pushl %edx; call GC_push_one; addl $4,%esp"); + asm("pushl %ebp; call GC_push_one; addl $4,%esp"); + asm("pushl %esi; call GC_push_one; addl $4,%esp"); + asm("pushl %edi; call GC_push_one; addl $4,%esp"); + asm("pushl %ebx; call GC_push_one; addl $4,%esp"); # endif -# if defined(I386) && defined(MSWIN32) && !defined(USE_GENERIC) +# if defined(I386) && defined(MSWIN32) && !defined(__MINGW32__) \ + && !defined(USE_GENERIC) /* I386 code, Microsoft variant */ __asm push eax __asm call GC_push_one @@ -274,11 +298,10 @@ void GC_push_regs() asm ("movd r7, tos"); asm ("bsr ?_GC_push_one"); asm ("adjspb $-4"); # endif -# if defined(SPARC) || defined(IA64) +# if defined(SPARC) { word GC_save_regs_in_stack(); - /* generic code will not work */ GC_save_regs_ret_val = GC_save_regs_in_stack(); } # endif @@ -351,8 +374,8 @@ void GC_push_regs() /* other machines... */ # if !(defined M68K) && !(defined VAX) && !(defined RT) # if !(defined SPARC) && !(defined I386) && !(defined NS32K) -# if !defined(POWERPC) && !defined(UTS4) && !defined(IA64) -# if !defined(PJ) +# if !defined(POWERPC) && !defined(UTS4) +# if !defined(PJ) && !(defined(MIPS) && defined(LINUX)) --> bad news <-- # endif # endif @@ -379,11 +402,24 @@ ptr_t cold_gc_frame; for (; (char *)i < lim; i++) { *i = 0; } -# if defined(POWERPC) || defined(MSWIN32) || defined(UTS4) +# if defined(POWERPC) || defined(MSWIN32) || defined(UTS4) || defined(LINUX) (void) setjmp(regs); # else (void) _setjmp(regs); # endif +# if defined(SPARC) || defined(IA64) + /* On a register window machine, we need to save register */ + /* contents on the stack for this to work. The setjmp */ + /* is probably not needed on SPARC, since pointers are */ + /* only stored in windowed or scratch registers. It is */ + /* needed on IA64, since some non-windowed registers are */ + /* preserved. */ + { + word GC_save_regs_in_stack(); + + GC_save_regs_ret_val = GC_save_regs_in_stack(); + } +# endif GC_push_current_stack(cold_gc_frame); } } diff --git a/boehm-gc/malloc.c b/boehm-gc/malloc.c index 66e62d29694..a5a93ad8119 100644 --- a/boehm-gc/malloc.c +++ b/boehm-gc/malloc.c @@ -81,6 +81,10 @@ register ptr_t *opp; /* but that's benign. */ /* Volatile declarations may need to be added */ /* to prevent the compiler from breaking things.*/ + /* If we only execute the second of the */ + /* following assignments, we lose the free */ + /* list, but that should still be OK, at least */ + /* for garbage collected memory. */ *opp = obj_link(op); obj_link(op) = 0; } else { diff --git a/boehm-gc/mallocx.c b/boehm-gc/mallocx.c index 8c07fa98846..c842665237e 100644 --- a/boehm-gc/mallocx.c +++ b/boehm-gc/mallocx.c @@ -134,22 +134,14 @@ void GC_incr_mem_freed(size_t n) /* Analogous to the above, but assumes a small object size, and */ /* bypasses MERGE_SIZES mechanism. Used by gc_inline.h. */ -#ifdef __STDC__ - ptr_t GC_generic_malloc_words_small(size_t lw, int k) -#else - ptr_t GC_generic_malloc_words_small(lw, k) - register word lw; - register int k; -#endif +ptr_t GC_generic_malloc_words_small_inner(lw, k) +register word lw; +register int k; { register ptr_t op; register ptr_t *opp; register struct obj_kind * kind = GC_obj_kinds + k; -DCL_LOCK_STATE; - GC_INVOKE_FINALIZERS(); - DISABLE_SIGNALS(); - LOCK(); opp = &(kind -> ok_freelist[lw]); if( (op = *opp) == 0 ) { if (!GC_is_initialized) { @@ -167,6 +159,26 @@ DCL_LOCK_STATE; *opp = obj_link(op); obj_link(op) = 0; GC_words_allocd += lw; + return((ptr_t)op); +} + +/* Analogous to the above, but assumes a small object size, and */ +/* bypasses MERGE_SIZES mechanism. Used by gc_inline.h. */ +#ifdef __STDC__ + ptr_t GC_generic_malloc_words_small(size_t lw, int k) +#else + ptr_t GC_generic_malloc_words_small(lw, k) + register word lw; + register int k; +#endif +{ +register ptr_t op; +DCL_LOCK_STATE; + + GC_INVOKE_FINALIZERS(); + DISABLE_SIGNALS(); + LOCK(); + op = GC_generic_malloc_words_small_inner(lw, k); UNLOCK(); ENABLE_SIGNALS(); return((ptr_t)op); diff --git a/boehm-gc/mark.c b/boehm-gc/mark.c index ef0e0c088e0..d164702bf1e 100644 --- a/boehm-gc/mark.c +++ b/boehm-gc/mark.c @@ -38,7 +38,7 @@ word x; /* mark_proc GC_mark_procs[MAX_MARK_PROCS] = {0} -- declared in gc_priv.h */ -word GC_n_mark_procs = 0; +word GC_n_mark_procs = GC_RESERVED_MARK_PROCS; /* Initialize GC_obj_kinds properly and standard free lists properly. */ /* This must be done statically since they may be accessed before */ @@ -365,20 +365,20 @@ GC_bool GC_mark_stack_empty() /* with IGNORE_OFF_PAGE set. */ /*ARGSUSED*/ # ifdef PRINT_BLACK_LIST - word GC_find_start(current, hhdr, source) + ptr_t GC_find_start(current, hhdr, source) word source; # else - word GC_find_start(current, hhdr) + ptr_t GC_find_start(current, hhdr) # define source 0 # endif -register word current; +register ptr_t current; register hdr * hhdr; { # ifdef ALL_INTERIOR_POINTERS if (hhdr != 0) { - register word orig = current; + register ptr_t orig = current; - current = (word)HBLKPTR(current) + HDR_BYTES; + current = (ptr_t)HBLKPTR(current) + HDR_BYTES; do { current = current - HBLKSIZE*(word)hhdr; hhdr = HDR(current); @@ -429,6 +429,12 @@ mse * msp; * is never 0. A mark stack entry never has size 0. * We try to traverse on the order of a hblk of memory before we return. * Caller is responsible for calling this until the mark stack is empty. + * Note that this is the most performance critical routine in the + * collector. Hence it contains all sorts of ugly hacks to speed + * things up. In particular, we avoid procedure calls on the common + * path, we take advantage of peculiarities of the mark descriptor + * encoding, we optionally maintain a cache for the block address to + * header mapping, we prefetch when an object is "grayed", etc. */ void GC_mark_from_mark_stack() { @@ -443,9 +449,12 @@ void GC_mark_from_mark_stack() register word descr; register ptr_t greatest_ha = GC_greatest_plausible_heap_addr; register ptr_t least_ha = GC_least_plausible_heap_addr; + DECLARE_HDR_CACHE; + # define SPLIT_RANGE_WORDS 128 /* Must be power of 2. */ GC_objects_are_marked = TRUE; + INIT_HDR_CACHE; # ifdef OS2 /* Use untweaked version to circumvent compiler problem */ while (GC_mark_stack_top_reg >= GC_mark_stack_reg && credit >= 0) { # else @@ -453,8 +462,13 @@ void GC_mark_from_mark_stack() >= 0) { # endif current_p = GC_mark_stack_top_reg -> mse_start; - retry: descr = GC_mark_stack_top_reg -> mse_descr; + retry: + /* current_p and descr describe the current object. */ + /* *GC_mark_stack_top_reg is vacant. */ + /* The following is 0 only for small objects described by a simple */ + /* length descriptor. For many applications this is the common */ + /* case, so we try to detect it quickly. */ if (descr & ((~(WORDS_TO_BYTES(SPLIT_RANGE_WORDS) - 1)) | DS_TAGS)) { word tag = descr & DS_TAGS; @@ -465,8 +479,8 @@ void GC_mark_from_mark_stack() /* stack. */ GC_mark_stack_top_reg -> mse_start = limit = current_p + SPLIT_RANGE_WORDS-1; - GC_mark_stack_top_reg -> mse_descr -= - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1); + GC_mark_stack_top_reg -> mse_descr = + descr - WORDS_TO_BYTES(SPLIT_RANGE_WORDS-1); /* Make sure that pointers overlapping the two ranges are */ /* considered. */ limit = (word *)((char *)limit + sizeof(word) - ALIGNMENT); @@ -479,8 +493,8 @@ void GC_mark_from_mark_stack() if ((signed_word)descr < 0) { current = *current_p; if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) { - PUSH_CONTENTS(current, GC_mark_stack_top_reg, mark_stack_limit, - current_p, exit1); + PUSH_CONTENTS((ptr_t)current, GC_mark_stack_top_reg, + mark_stack_limit, current_p, exit1); } } descr <<= 1; @@ -499,24 +513,94 @@ void GC_mark_from_mark_stack() mark_stack_limit, ENV(descr)); continue; case DS_PER_OBJECT: - GC_mark_stack_top_reg -> mse_descr = - *(word *)((ptr_t)current_p + descr - tag); + if ((signed_word)descr >= 0) { + /* Descriptor is in the object. */ + descr = *(word *)((ptr_t)current_p + descr - DS_PER_OBJECT); + } else { + /* Descriptor is in type descriptor pointed to by first */ + /* word in object. */ + ptr_t type_descr = *(ptr_t *)current_p; + /* type_descr is either a valid pointer to the descriptor */ + /* structure, or this object was on a free list. If it */ + /* it was anything but the last object on the free list, */ + /* we will misinterpret the next object on the free list as */ + /* the type descriptor, and get a 0 GC descriptor, which */ + /* is ideal. Unfortunately, we need to check for the last */ + /* object case explicitly. */ + if (0 == type_descr) { + /* Rarely executed. */ + GC_mark_stack_top_reg--; + continue; + } + descr = *(word *)(type_descr + - (descr - (DS_PER_OBJECT - INDIR_PER_OBJ_BIAS))); + } goto retry; } - } else { + } else /* Small object with length descriptor */ { GC_mark_stack_top_reg--; limit = (word *)(((ptr_t)current_p) + (word)descr); } /* The simple case in which we're scanning a range. */ credit -= (ptr_t)limit - (ptr_t)current_p; limit -= 1; - while (current_p <= limit) { - current = *current_p; - if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) { - PUSH_CONTENTS(current, GC_mark_stack_top_reg, - mark_stack_limit, current_p, exit2); + { +# define PREF_DIST 4 + +# ifndef SMALL_CONFIG + word deferred; + + /* Try to prefetch the next pointer to be examined asap. */ + /* Empirically, this also seems to help slightly without */ + /* prefetches, at least on linux/X86. Presumably this loop */ + /* ends up with less register pressure, and gcc thus ends up */ + /* generating slightly better code. Overall gcc code quality */ + /* for this loop is still not great. */ + for(;;) { + PREFETCH((ptr_t)limit - PREF_DIST*CACHE_LINE_SIZE); + deferred = *limit; + limit = (word *)((char *)limit - ALIGNMENT); + if ((ptr_t)deferred >= least_ha && (ptr_t)deferred < greatest_ha) { + PREFETCH(deferred); + break; + } + if (current_p > limit) goto next_object; + /* Unroll once, so we don't do too many of the prefetches */ + /* based on limit. */ + deferred = *limit; + limit = (word *)((char *)limit - ALIGNMENT); + if ((ptr_t)deferred >= least_ha && (ptr_t)deferred < greatest_ha) { + PREFETCH(deferred); + break; + } + if (current_p > limit) goto next_object; + } +# endif + + while (current_p <= limit) { + /* Empirically, unrolling this loop doesn't help a lot. */ + /* Since HC_PUSH_CONTENTS expands to a lot of code, */ + /* we don't. */ + current = *current_p; + PREFETCH((ptr_t)current_p + PREF_DIST*CACHE_LINE_SIZE); + if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) { + /* Prefetch the contents of the object we just pushed. It's */ + /* likely we will need them soon. */ + PREFETCH(current); + HC_PUSH_CONTENTS((ptr_t)current, GC_mark_stack_top_reg, + mark_stack_limit, current_p, exit2); + } + current_p = (word *)((char *)current_p + ALIGNMENT); } - current_p = (word *)((char *)current_p + ALIGNMENT); + +# ifndef SMALL_CONFIG + /* We still need to mark the entry we previously prefetched. */ + /* We alrady know that it passes the preliminary pointer */ + /* validity test. */ + HC_PUSH_CONTENTS((ptr_t)deferred, GC_mark_stack_top_reg, + mark_stack_limit, current_p, exit4); + next_object:; +# endif } } GC_mark_stack_top = GC_mark_stack_top_reg; @@ -689,7 +773,7 @@ word p; return; } # endif - GC_PUSH_ONE_STACK(p, 0); + GC_PUSH_ONE_STACK(p, MARKED_FROM_REGISTER); } # ifdef __STDC__ diff --git a/boehm-gc/mark_rts.c b/boehm-gc/mark_rts.c index 0e84f2732fc..5bafd07ed89 100644 --- a/boehm-gc/mark_rts.c +++ b/boehm-gc/mark_rts.c @@ -412,9 +412,8 @@ ptr_t cold_gc_frame; if (0 == cold_gc_frame) return; # ifdef STACK_GROWS_DOWN GC_push_all_eager(GC_approx_sp(), cold_gc_frame); -# ifdef IA64 - --> fix this -# endif + /* For IA64, the register stack backing store is handled */ + /* in the thread-specific code. */ # else GC_push_all_eager( cold_gc_frame, GC_approx_sp() ); # endif @@ -505,6 +504,9 @@ ptr_t cold_gc_frame; /* In the USE_GENERIC_PUSH_REGS case, this is done inside */ /* GC_push_regs, so that we catch callee-save registers saved */ /* inside the GC_push_regs frame. */ + /* In the case of linux threads on Ia64, the hot section of */ + /* the main stack is marked here, but the register stack */ + /* backing store is handled in the threads-specific code. */ # endif if (GC_push_other_roots != 0) (*GC_push_other_roots)(); /* In the threads case, this also pushes thread stacks. */ diff --git a/boehm-gc/misc.c b/boehm-gc/misc.c index 60505af16af..dd42961c4b3 100644 --- a/boehm-gc/misc.c +++ b/boehm-gc/misc.c @@ -42,11 +42,12 @@ # ifdef WIN32_THREADS GC_API CRITICAL_SECTION GC_allocate_ml; # else -# if defined(IRIX_THREADS) || defined(LINUX_THREADS) \ - || defined(IRIX_JDK_THREADS) +# if defined(IRIX_THREADS) || defined(IRIX_JDK_THREADS) \ + || (defined(LINUX_THREADS) && defined(USE_SPIN_LOCK)) pthread_t GC_lock_holder = NO_THREAD; # else -# if defined(HPUX_THREADS) +# if defined(HPUX_THREADS) \ + || defined(LINUX_THREADS) && !defined(USE_SPIN_LOCK) pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER; # else --> declare allocator lock here @@ -123,6 +124,15 @@ extern signed_word GC_mem_found; for (i = 8*sizeof(word) + 1; i <= 16 * sizeof(word); i++) { GC_size_map[i] = (ROUNDED_UP_WORDS(i) + 1) & (~1); } +# ifdef GC_GCJ_SUPPORT + /* Make all sizes up to 32 words predictable, so that a */ + /* compiler can statically perform the same computation, */ + /* or at least a computation that results in similar size */ + /* classes. */ + for (i = 16*sizeof(word) + 1; i <= 32 * sizeof(word); i++) { + GC_size_map[i] = (ROUNDED_UP_WORDS(i) + 3) & (~3); + } +# endif /* We leave the rest of the array to be filled in on demand. */ } @@ -443,7 +453,8 @@ void GC_init_inner() # ifdef MSWIN32 GC_init_win32(); # endif -# if defined(LINUX) && (defined(SPARC) || defined(IA64)) +# if defined(SEARCH_FOR_DATA_START) + /* This doesn't really work if the collector is in a shared library. */ GC_init_linux_data_start(); # endif # ifdef SOLARIS_THREADS @@ -819,6 +830,8 @@ struct callinfo info[NFRAMES]; #endif /* SAVE_CALL_CHAIN */ +/* Needed by SRC_M3, gcj, and should perhaps be the official interface */ +/* to GC_dont_gc. */ void GC_enable() { GC_dont_gc--; diff --git a/boehm-gc/new_hblk.c b/boehm-gc/new_hblk.c index 9f32ae0dcb4..1e1273f854e 100644 --- a/boehm-gc/new_hblk.c +++ b/boehm-gc/new_hblk.c @@ -103,10 +103,10 @@ ptr_t ofl; p[3] = 0; p += 4; for (; p < lim; p += 4) { + PREFETCH_FOR_WRITE(p+64); p[0] = (word)(p-4); p[1] = 0; - p[2] = 0; - p[3] = 0; + CLEAR_DOUBLE(p+2); }; return((ptr_t)(p-4)); } @@ -141,6 +141,7 @@ ptr_t ofl; p[4] = (word)p; p += 8; for (; p < lim; p += 8) { + PREFETCH_FOR_WRITE(p+64); p[0] = (word)(p-4); p[4] = (word)p; }; @@ -179,6 +180,10 @@ int kind; /* Mark all objects if appropriate. */ if (IS_UNCOLLECTABLE(kind)) GC_set_hdr_marks(HDR(h)); + PREFETCH_FOR_WRITE((char *)h); + PREFETCH_FOR_WRITE((char *)h + 128); + PREFETCH_FOR_WRITE((char *)h + 256); + PREFETCH_FOR_WRITE((char *)h + 378); /* Handle small objects sizes more efficiently. For larger objects */ /* the difference is less significant. */ # ifndef SMALL_CONFIG diff --git a/boehm-gc/os_dep.c b/boehm-gc/os_dep.c index 5bc41f1ddf6..a972dec805c 100644 --- a/boehm-gc/os_dep.c +++ b/boehm-gc/os_dep.c @@ -66,7 +66,7 @@ # define NEED_FIND_LIMIT # endif -# if (defined(SUNOS4) & defined(DYNAMIC_LOADING)) && !defined(PCR) +# if (defined(SUNOS4) && defined(DYNAMIC_LOADING)) && !defined(PCR) # define NEED_FIND_LIMIT # endif @@ -75,7 +75,8 @@ # endif # if defined(LINUX) && \ - (defined(SPARC) || defined(IA64)) + (defined(POWERPC) || defined(SPARC) || defined(ALPHA) || defined(IA64) \ + || defined(MIPS)) # define NEED_FIND_LIMIT # endif @@ -142,7 +143,8 @@ # define OPT_PROT_EXEC 0 #endif -#if defined(LINUX) && (defined(SPARC) || defined(IA64)) +#if defined(SEARCH_FOR_DATA_START) + /* The following doesn't work if the GC is in a dynamic library. */ /* The I386 case can be handled without a search. The Alpha case */ /* used to be handled differently as well, but the rules changed */ /* for recent Linux versions. This seems to be the easiest way to */ @@ -641,19 +643,17 @@ ptr_t GC_get_stack_base() #ifdef LINUX_STACKBOTTOM # define STAT_SKIP 27 /* Number of fields preceding startstack */ - /* field in /proc/<pid>/stat */ + /* field in /proc/self/stat */ ptr_t GC_linux_stack_base(void) { - char buf[50]; FILE *f; char c; word result = 0; int i; - sprintf(buf, "/proc/%d/stat", getpid()); - f = fopen(buf, "r"); - if (NULL == f) ABORT("Couldn't open /proc/<pid>/stat"); + f = fopen("/proc/self/stat", "r"); + if (NULL == f) ABORT("Couldn't open /proc/self/stat"); c = getc(f); /* Skip the required number of fields. This number is hopefully */ /* constant across all Linux implementations. */ @@ -1874,6 +1874,9 @@ SIG_PF GC_old_segv_handler; /* Also old MSWIN32 ACCESS_VIOLATION filter */ # else # ifdef IA64 char * addr = si -> si_addr; + /* I believe this is claimed to work on all platforms for */ + /* Linux 2.3.47 and later. Hopefully we don't have to */ + /* worry about earlier kernels on IA64. */ # else # if defined(POWERPC) char * addr = (char *) (sc.regs->dar); @@ -2178,12 +2181,13 @@ word len; ((ptr_t)end_block - (ptr_t)start_block) + HBLKSIZE); } -#ifndef MSWIN32 +#if !defined(MSWIN32) && !defined(LINUX_THREADS) /* Replacement for UNIX system call. */ /* Other calls that write to the heap */ /* should be handled similarly. */ # if defined(__STDC__) && !defined(SUNOS4) # include <unistd.h> +# include <sys/uio.h> ssize_t read(int fd, void *buf, size_t nbyte) # else # ifndef LINT @@ -2200,10 +2204,12 @@ word len; GC_begin_syscall(); GC_unprotect_range(buf, (word)nbyte); -# ifdef IRIX5 +# if defined(IRIX5) || defined(LINUX_THREADS) /* Indirect system call may not always be easily available. */ /* We could call _read, but that would interfere with the */ /* libpthread interception of read. */ + /* On Linux, we have to be careful with the linuxthreads */ + /* read interception. */ { struct iovec iov; @@ -2217,7 +2223,29 @@ word len; GC_end_syscall(); return(result); } -#endif /* !MSWIN32 */ +#endif /* !MSWIN32 && !LINUX */ + +#ifdef USE_LD_WRAP + /* We use the GNU ld call wrapping facility. */ + /* This requires that the linker be invoked with "--wrap read". */ + /* This can be done by passing -Wl,"--wrap read" to gcc. */ + /* I'm not sure that this actually wraps whatever version of read */ + /* is called by stdio. That code also mentions __read. */ +# include <unistd.h> + ssize_t __wrap_read(int fd, void *buf, size_t nbyte) + { + int result; + + GC_begin_syscall(); + GC_unprotect_range(buf, (word)nbyte); + result = __real_read(fd, buf, nbyte); + GC_end_syscall(); + return(result); + } + + /* We should probably also do this for __read, or whatever stdio */ + /* actually calls. */ +#endif /*ARGSUSED*/ GC_bool GC_page_was_ever_dirty(h) diff --git a/boehm-gc/reclaim.c b/boehm-gc/reclaim.c index 6e0f53bb058..1847e590aad 100644 --- a/boehm-gc/reclaim.c +++ b/boehm-gc/reclaim.c @@ -241,9 +241,18 @@ register word sz; /* Clear object, advance p to next object in the process */ q = p + sz; p++; /* Skip link field */ - while (p < q) { +# if defined(SMALL_CONFIG) && defined(ALIGN_DOUBLE) + /* We assert that sz must be even */ + *p++ = 0; + while (p < q) { + CLEAR_DOUBLE(p); + p += 2; + } +# else + while (p < q) { *p++ = 0; - } + } +# endif } word_no += sz; } @@ -321,8 +330,7 @@ register ptr_t list; p[start_displ] = (word)list; \ list = (ptr_t)(p+start_displ); \ p[start_displ+1] = 0; \ - p[start_displ+2] = 0; \ - p[start_displ+3] = 0; \ + CLEAR_DOUBLE(p + start_displ + 2); \ INCR_WORDS(4); \ } @@ -814,6 +822,12 @@ int report_if_found; /* Abort if a GC_reclaimable object is found */ /* Go through all heap blocks (in hblklist) and reclaim unmarked objects */ /* or enqueue the block for later processing. */ GC_apply_to_all_blocks(GC_reclaim_block, (word)report_if_found); + +# ifdef EAGER_SWEEP + /* This is a very stupid thing to do. We make it possible anyway, */ + /* so that you can convince yourself that it really is very stupid. */ + GC_reclaim_all((GC_stop_func)0, FALSE); +# endif } @@ -847,7 +861,7 @@ int kind; * Abort and return FALSE when/if (*stop_func)() returns TRUE. * If this returns TRUE, then it's safe to restart the world * with incorrectly cleared mark bits. - * If ignore_old is TRUE, then reclain only blocks that have been + * If ignore_old is TRUE, then reclaim only blocks that have been * recently reclaimed, and discard the rest. * Stop_func may be 0. */ diff --git a/boehm-gc/solaris_pthreads.c b/boehm-gc/solaris_pthreads.c index 91a0f01b33f..97ab13bbd46 100644 --- a/boehm-gc/solaris_pthreads.c +++ b/boehm-gc/solaris_pthreads.c @@ -76,14 +76,16 @@ GC_pthread_create(pthread_t *new_thread, pthread_attr_t attr; word my_flags = 0; int flag; - void * stack; - size_t stack_size; + void * stack = 0; + size_t stack_size = 0; int n; struct sched_param schedparam; - (void)pthread_attr_getstacksize(attr_in, &stack_size); - (void)pthread_attr_getstackaddr(attr_in, &stack); (void)pthread_attr_init(&attr); + if (attr_in != 0) { + (void)pthread_attr_getstacksize(attr_in, &stack_size); + (void)pthread_attr_getstackaddr(attr_in, &stack); + } LOCK(); if (!GC_thr_initialized) { @@ -93,7 +95,11 @@ GC_pthread_create(pthread_t *new_thread, if (stack == 0) { if (stack_size == 0) - stack_size = GC_min_stack_sz; + stack_size = 1048576; + /* ^-- 1 MB (this was GC_min_stack_sz, but that + * violates the pthread_create documentation which + * says the default value if none is supplied is + * 1MB) */ else stack_size += thr_min_stack(); @@ -109,20 +115,22 @@ GC_pthread_create(pthread_t *new_thread, } (void)pthread_attr_setstacksize(&attr, stack_size); (void)pthread_attr_setstackaddr(&attr, stack); - (void)pthread_attr_getscope(attr_in, &n); - (void)pthread_attr_setscope(&attr, n); - (void)pthread_attr_getschedparam(attr_in, &schedparam); - (void)pthread_attr_setschedparam(&attr, &schedparam); - (void)pthread_attr_getschedpolicy(attr_in, &n); - (void)pthread_attr_setschedpolicy(&attr, n); - (void)pthread_attr_getinheritsched(attr_in, &n); - (void)pthread_attr_setinheritsched(&attr, n); - - (void)pthread_attr_getdetachstate(attr_in, &flag); - if (flag == PTHREAD_CREATE_DETACHED) { - my_flags |= DETACHED; + if (attr_in != 0) { + (void)pthread_attr_getscope(attr_in, &n); + (void)pthread_attr_setscope(&attr, n); + (void)pthread_attr_getschedparam(attr_in, &schedparam); + (void)pthread_attr_setschedparam(&attr, &schedparam); + (void)pthread_attr_getschedpolicy(attr_in, &n); + (void)pthread_attr_setschedpolicy(&attr, n); + (void)pthread_attr_getinheritsched(attr_in, &n); + (void)pthread_attr_setinheritsched(&attr, n); + + (void)pthread_attr_getdetachstate(attr_in, &flag); + if (flag == PTHREAD_CREATE_DETACHED) { + my_flags |= DETACHED; + } + (void)pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); } - (void)pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); /* * thr_create can call malloc(), which if redirected will * attempt to acquire the allocation lock. diff --git a/boehm-gc/solaris_threads.c b/boehm-gc/solaris_threads.c index 65b2c6517b1..c3b0b15b97c 100644 --- a/boehm-gc/solaris_threads.c +++ b/boehm-gc/solaris_threads.c @@ -661,7 +661,8 @@ void GC_my_stack_limits() } -/* We hold allocation lock. We assume the world is stopped. */ +/* We hold allocation lock. Should do exactly the right thing if the */ +/* world is stopped. Should not fail if it isn't. */ void GC_push_all_stacks() { register int i; @@ -900,7 +901,7 @@ GC_thr_create(void *stack_base, size_t stack_size, } GC_multithreaded++; if (stack == 0) { - if (stack_size == 0) stack_size = GC_min_stack_sz; + if (stack_size == 0) stack_size = 1024*1024; stack = (void *)GC_stack_alloc(&stack_size); if (stack == 0) { GC_multithreaded--; diff --git a/boehm-gc/test.c b/boehm-gc/test.c index 43b09010f80..96a54150a86 100644 --- a/boehm-gc/test.c +++ b/boehm-gc/test.c @@ -15,6 +15,8 @@ /* An incomplete test for the garbage collector. */ /* Some more obscure entry points are not tested at all. */ +# undef GC_BUILD + # if defined(mips) && defined(SYSTYPE_BSD43) /* MIPS RISCOS 4 */ # else @@ -147,7 +149,6 @@ sexpr y; register sexpr r; r = (sexpr) GC_MALLOC_UNCOLLECTABLE(sizeof(struct SEXPR)); -assert(GC_is_marked(r)); if (r == 0) { (void)GC_printf0("Out of memory\n"); exit(1); @@ -157,6 +158,76 @@ assert(GC_is_marked(r)); return(r); } +#ifdef GC_GCJ_SUPPORT + +#include "gc_mark.h" +#include "dbg_mlc.h" +#include "include/gc_gcj.h" + +/* The following struct emulates the vtable in gcj. */ +/* This assumes the default value of MARK_DESCR_OFFSET. */ +struct fake_vtable { + void * dummy; /* class pointer in real gcj. */ + size_t descr; +}; + +struct fake_vtable gcj_class_struct1 = { 0, sizeof(struct SEXPR) + + sizeof(struct fake_vtable *) }; + /* length based descriptor. */ +struct fake_vtable gcj_class_struct2 = + { 0, (3l << (CPP_WORDSZ - 3)) | DS_BITMAP}; + /* Bitmap based descriptor. */ + +struct ms_entry * fake_gcj_mark_proc(word * addr, + struct ms_entry *mark_stack_ptr, + struct ms_entry *mark_stack_limit, + word env ) +{ + sexpr x; + if (1 == env) { + /* Object allocated with debug allocator. */ + addr = (word *)USR_PTR_FROM_BASE(addr); + } + x = (sexpr)(addr + 1); /* Skip the vtable pointer. */ + /* We could just call PUSH_CONTENTS directly here. But any real */ + /* real client would try to filter out the obvious misses. */ + if (0 != x -> sexpr_cdr) { + PUSH_CONTENTS((ptr_t)(x -> sexpr_cdr), mark_stack_ptr, + mark_stack_limit, &(x -> sexpr_cdr), exit1); + } + if ((ptr_t)(x -> sexpr_car) > GC_least_plausible_heap_addr) { + PUSH_CONTENTS((ptr_t)(x -> sexpr_car), mark_stack_ptr, + mark_stack_limit, &(x -> sexpr_car), exit2); + } + return(mark_stack_ptr); +} + +sexpr gcj_cons(x, y) +sexpr x; +sexpr y; +{ + GC_word * r; + sexpr result; + static int count = 0; + + if (++count & 1) { + r = (GC_word *) GC_GCJ_FAST_MALLOC(3, &gcj_class_struct1); + } else { + r = (GC_word *) GC_GCJ_MALLOC(sizeof(struct SEXPR) + + sizeof(struct fake_vtable*), + &gcj_class_struct2); + } + if (r == 0) { + (void)GC_printf0("Out of memory\n"); + exit(1); + } + result = (sexpr)(r + 1); + result -> sexpr_car = x; + result -> sexpr_cdr = y; + return(result); +} +#endif + /* Return reverse(x) concatenated with y */ sexpr reverse1(x, y) sexpr x, y; @@ -184,6 +255,35 @@ int low, up; } } +#ifdef GC_GCJ_SUPPORT +/* Return reverse(x) concatenated with y */ +sexpr gcj_reverse1(x, y) +sexpr x, y; +{ + if (is_nil(x)) { + return(y); + } else { + return( gcj_reverse1(cdr(x), gcj_cons(car(x), y)) ); + } +} + +sexpr gcj_reverse(x) +sexpr x; +{ + return( gcj_reverse1(x, nil) ); +} + +sexpr gcj_ints(low, up) +int low, up; +{ + if (low > up) { + return(nil); + } else { + return(gcj_cons(gcj_cons(INT_TO_SEXPR(low), nil), gcj_ints(low+1, up))); + } +} +#endif /* GC_GCJ_SUPPORT */ + /* To check uncollectable allocation we build lists with disguised cdr */ /* pointers, and make sure they don't go away. */ sexpr uncollectable_ints(low, up) @@ -367,7 +467,12 @@ void reverse_test() g[799] = ints(1,18); h = (sexpr *)GC_MALLOC(1025 * sizeof(sexpr)); h = (sexpr *)GC_REALLOC((GC_PTR)h, 2000 * sizeof(sexpr)); - h[1999] = ints(1,19); +# ifdef GC_GCJ_SUPPORT + h[1999] = gcj_ints(1,200); + h[1999] = gcj_reverse(h[1999]); +# else + h[1999] = ints(1,200); +# endif /* Try to force some collections and reuse of small list elements */ for (i = 0; i < 10; i++) { (void)ints(1, BIG); @@ -412,7 +517,10 @@ void reverse_test() check_uncollectable_ints(d, 1, 100); check_ints(f[5], 1,17); check_ints(g[799], 1,18); - check_ints(h[1999], 1,19); +# ifdef GC_GCJ_SUPPORT + h[1999] = gcj_reverse(h[1999]); +# endif + check_ints(h[1999], 1,200); # ifndef THREADS a = 0; # endif @@ -759,6 +867,10 @@ void typed_test() old = 0; for (i = 0; i < 4000; i++) { new = (GC_word *) GC_malloc_explicitly_typed(4 * sizeof(GC_word), d1); + if (0 != new[0] || 0 != new[1]) { + GC_printf0("Bad initialization by GC_malloc_explicitly_typed\n"); + FAIL; + } new[0] = 17; new[1] = (GC_word)old; old = new; @@ -782,6 +894,10 @@ void typed_test() new = (GC_word *) GC_calloc_explicitly_typed(1001, 3 * sizeof(GC_word), d2); + if (0 != new[0] || 0 != new[1]) { + GC_printf0("Bad initialization by GC_malloc_explicitly_typed\n"); + FAIL; + } } new[0] = 17; new[1] = (GC_word)old; @@ -906,6 +1022,10 @@ void run_one_test() /* Test floating point alignment */ *(double *)GC_MALLOC(sizeof(double)) = 1.0; *(double *)GC_MALLOC(sizeof(double)) = 1.0; +# ifdef GC_GCJ_SUPPORT + GC_REGISTER_DISPLACEMENT(sizeof(struct fake_vtable *)); + GC_init_gcj_malloc(0, (void *)fake_gcj_mark_proc); +# endif /* Repeated list reversal test. */ reverse_test(); # ifdef PRINTSTATS @@ -1032,7 +1152,7 @@ void SetMinimumStack(long minSize) #if !defined(PCR) && !defined(SOLARIS_THREADS) && !defined(WIN32_THREADS) \ && !defined(IRIX_THREADS) && !defined(LINUX_THREADS) \ && !defined(HPUX_THREADS) || defined(LINT) -#ifdef MSWIN32 +#if defined(MSWIN32) && !defined(__MINGW32__) int APIENTRY WinMain(HINSTANCE instance, HINSTANCE prev, LPSTR cmd, int n) #else int main() @@ -1114,19 +1234,24 @@ int APIENTRY WinMain(HINSTANCE instance, HINSTANCE prev, LPSTR cmd, int n) # endif InitializeCriticalSection(&incr_cs); (void) GC_set_warn_proc(warn_proc); - for (i = 0; i < NTEST; i++) { +# if NTEST > 0 + for (i = 0; i < NTEST; i++) { h[i] = (HANDLE)_beginthreadex(NULL, 0, thr_run_one_test, 0, 0, &thread_id); if (h[i] == (HANDLE)-1) { (void)GC_printf1("Thread creation failed %lu\n", (unsigned long)GetLastError()); FAIL; } - } + } +# endif /* NTEST > 0 */ run_one_test(); - for (i = 0; i < NTEST; i++) +# if NTEST > 0 + for (i = 0; i < NTEST; i++) { if (WaitForSingleObject(h[i], INFINITE) != WAIT_OBJECT_0) { (void)GC_printf1("Thread wait failed %lu\n", (unsigned long)GetLastError()); FAIL; } + } +# endif /* NTEST > 0 */ check_heap_stats(); (void)fflush(stdout); return(0); diff --git a/boehm-gc/test_cpp.cc b/boehm-gc/test_cpp.cc index 3160b0984bb..0d45077658f 100644 --- a/boehm-gc/test_cpp.cc +++ b/boehm-gc/test_cpp.cc @@ -37,6 +37,12 @@ extern "C" { #ifdef MSWIN32 # include <windows.h> #endif +#ifdef GC_NAME_CONFLICT +# define USE_GC UseGC + struct foo * GC; +#else +# define USE_GC GC +#endif #define my_assert( e ) \ @@ -214,7 +220,7 @@ int APIENTRY WinMain( for (i = 0; i < 1000; i++) { C* c = new C( 2 ); C c1( 2 ); /* stack allocation should work too */ - D* d = ::new (GC, D::CleanUp, (void*) i) D( i ); + D* d = ::new (USE_GC, D::CleanUp, (void*) i) D( i ); F* f = new F; if (0 == i % 10) delete c;} @@ -222,9 +228,9 @@ int APIENTRY WinMain( drop the references to them immediately, forcing many collections. */ for (i = 0; i < 1000000; i++) { - A* a = new (GC) A( i ); + A* a = new (USE_GC) A( i ); B* b = new B( i ); - b = new (GC) B( i ); + b = new (USE_GC) B( i ); if (0 == i % 10) { B::Deleting( 1 ); delete b; diff --git a/boehm-gc/threadlibs.c b/boehm-gc/threadlibs.c index df4eb77bb1d..b2e6a10b0b9 100644 --- a/boehm-gc/threadlibs.c +++ b/boehm-gc/threadlibs.c @@ -3,7 +3,16 @@ int main() { -# if defined(IRIX_THREADS) || defined(LINUX_THREADS) +# if defined(LINUX_THREADS) +# ifdef USE_LD_WRAP + printf("-Wl,\"--wrap read\" -Wl,\"--wrap dlopen\" " + "-Wl,\"--wrap pthread_create\" -Wl,\"--wrap pthread_join\" " + "-Wl,\"--wrap pthread_sigmask\" -lpthread\n"); +# else + printf("-lpthread\n"); +# endif +# endif +# if defined(IRIX_THREADS) printf("-lpthread\n"); # endif # if defined(HPUX_THREADS) diff --git a/boehm-gc/typd_mlc.c b/boehm-gc/typd_mlc.c index 74f455d926c..ce769d60fec 100644 --- a/boehm-gc/typd_mlc.c +++ b/boehm-gc/typd_mlc.c @@ -430,7 +430,7 @@ word env; if (bm & 1) { current = *current_p; if ((ptr_t)current >= least_ha && (ptr_t)current <= greatest_ha) { - PUSH_CONTENTS(current, mark_stack_ptr, + PUSH_CONTENTS((ptr_t)current, mark_stack_ptr, mark_stack_limit, current_p, exit1); } } @@ -665,6 +665,7 @@ DCL_LOCK_STATE; # endif } else { *opp = obj_link(op); + obj_link(op) = 0; GC_words_allocd += lw; FASTUNLOCK(); } @@ -708,6 +709,7 @@ DCL_LOCK_STATE; # endif } else { *opp = obj_link(op); + obj_link(op) = 0; GC_words_allocd += lw; FASTUNLOCK(); } @@ -717,7 +719,7 @@ DCL_LOCK_STATE; lw = BYTES_TO_WORDS(GC_size(op)); } if (op != NULL) - ((word *)op)[lw - 1] = d; + ((word *)op)[lw - 1] = d; return((GC_PTR) op); } @@ -772,6 +774,7 @@ DCL_LOCK_STATE; # endif } else { *opp = obj_link(op); + obj_link(op) = 0; GC_words_allocd += lw; FASTUNLOCK(); } diff --git a/boehm-gc/version.h b/boehm-gc/version.h index df0770c9b04..c7095488bd5 100644 --- a/boehm-gc/version.h +++ b/boehm-gc/version.h @@ -1,9 +1,12 @@ #define GC_VERSION_MAJOR 5 #define GC_VERSION_MINOR 0 -#define GC_ALPHA_VERSION 4 +#define GC_ALPHA_VERSION 6 # define GC_NOT_ALPHA 0xff +/* This is really an unreleased version which doesn't have a real version */ +/* number. */ + #ifndef GC_NO_VERSION_VAR unsigned GC_version = ((GC_VERSION_MAJOR << 16) | (GC_VERSION_MINOR << 8) | GC_ALPHA_VERSION); diff --git a/boehm-gc/win32_threads.c b/boehm-gc/win32_threads.c index f6f74bd1111..469fd232003 100644 --- a/boehm-gc/win32_threads.c +++ b/boehm-gc/win32_threads.c @@ -2,8 +2,10 @@ #include "gc_priv.h" +#if 0 #define STRICT #include <windows.h> +#endif #define MAX_THREADS 64 @@ -61,7 +63,7 @@ ptr_t GC_current_stackbottom() ABORT("no thread table entry for current thread"); } -ptr_t GC_get_lo_stack_addr(ptr_t s) +static ptr_t GC_get_lo_stack_addr(ptr_t s) { ptr_t bottom; MEMORY_BASIC_INFORMATION info; @@ -81,7 +83,7 @@ void GC_push_all_stacks() if (thread_table[i].stack) { ptr_t bottom = GC_get_lo_stack_addr(thread_table[i].stack); if (thread_table[i].id == thread_id) - GC_push_all(&i, thread_table[i].stack); + GC_push_all_stack(&i, thread_table[i].stack); else { thread_table[i].context.ContextFlags = (CONTEXT_INTEGER|CONTEXT_CONTROL); |