summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile27
-rw-r--r--Makefile.direct27
-rw-r--r--Makefile.in1
-rw-r--r--NT_STATIC_THREADS_MAKEFILE4
-rw-r--r--allchblk.c12
-rw-r--r--alloc.c10
-rwxr-xr-xconfigure36
-rw-r--r--configure.ac18
-rw-r--r--doc/README2
-rw-r--r--doc/README.changes56
-rw-r--r--doc/README.linux8
-rw-r--r--doc/doc.am1
-rw-r--r--doc/gcdescr.html63
-rw-r--r--doc/overview.html446
-rw-r--r--headers.c2
-rw-r--r--include/gc.h29
-rw-r--r--include/gc_config_macros.h21
-rw-r--r--include/gc_inline.h15
-rw-r--r--include/private/gc_locks.h104
-rw-r--r--include/private/gc_pmark.h45
-rw-r--r--include/private/gc_priv.h78
-rw-r--r--include/private/gcconfig.h22
-rw-r--r--include/private/pthread_support.h5
-rw-r--r--include/private/thread_local_alloc.h58
-rw-r--r--mach_dep.c80
-rw-r--r--malloc.c16
-rw-r--r--mallocx.c79
-rw-r--r--mark.c168
-rw-r--r--mark_rts.c2
-rw-r--r--misc.c35
-rw-r--r--os_dep.c126
-rw-r--r--pthread_stop_world.c12
-rw-r--r--pthread_support.c54
-rw-r--r--reclaim.c11
-rw-r--r--setjmp_t.c26
-rw-r--r--sparc_mach_dep.S4
-rw-r--r--tests/leak_test.c4
-rw-r--r--tests/test.c100
-rw-r--r--tests/test_cpp.cc8
-rw-r--r--tests/thread_leak_test.c1
-rw-r--r--thread_local_alloc.c47
-rw-r--r--threadlibs.c6
-rw-r--r--typd_mlc.c24
-rw-r--r--version.h2
-rwxr-xr-xwin32_threads.c954
45 files changed, 1851 insertions, 998 deletions
diff --git a/Makefile b/Makefile
index 24326c00..78e81596 100644
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,7 @@ VPATH= $(srcdir)
# Atomic_ops installation directory. If this doesn't exist, we create
# it from the included libatomic_ops distribution.
-AO_VERSION=1.0
+AO_VERSION=1.1
AO_SRC_DIR=$(srcdir)/libatomic_ops-$(AO_VERSION)
AO_INSTALL_DIR=$(srcdir)/libatomic_ops-install
@@ -349,7 +349,7 @@ SRCS= $(CSRCS) mips_sgi_mach_dep.s rs6000_mach_dep.s alpha_mach_dep.S \
DOC_FILES= README.QUICK doc/README.Mac doc/README.MacOSX doc/README.OS2 \
doc/README.amiga doc/README.cords doc/debugging.html \
- doc/porting.html \
+ doc/porting.html doc/overview.html \
doc/README.dj doc/README.hp doc/README.linux doc/README.rs6000 \
doc/README.sgi doc/README.solaris2 doc/README.uts \
doc/README.win32 doc/barrett_diagram doc/README \
@@ -450,8 +450,9 @@ $(OBJS) tests/test.o dyn_load.o dyn_load_sunos53.o: \
mark.o typd_mlc.o finalize.o ptr_chck.o: $(srcdir)/include/gc_mark.h \
$(srcdir)/include/private/gc_pmark.h
-specific.o pthread_support.o: $(srcdir)/include/private/specific.h \
- $(srcdir)/include/gc_inline.h
+specific.o pthread_support.o thread_local_alloc.o win32_threads.o: \
+ $(srcdir)/include/private/specific.h $(srcdir)/include/gc_inline.h \
+ $(srcdir)/include/private/thread_local_alloc.h
dbg_mlc.o gcj_mlc.o: $(srcdir)/include/private/dbg_mlc.h
@@ -465,6 +466,7 @@ tests:
base_lib gc.a: $(OBJS) dyn_load.o $(UTILS)
echo > base_lib
rm -f dont_ar_1
+ cp $(AO_INSTALL_DIR)/lib/libatomic_ops.a gc.a
./if_mach SPARC SUNOS5 touch dont_ar_1
./if_mach SPARC SUNOS5 $(AR) rus gc.a $(OBJS) dyn_load.o
./if_mach M68K AMIGA touch dont_ar_1
@@ -513,7 +515,7 @@ dyn_load_sunos53.o: dyn_load.c
# SunOS5 shared library version of the collector
sunos5gc.so: $(OBJS) dyn_load_sunos53.o
- $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o -ldl
+ $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o $(AO_INSTALL_DIR)/lib/libatomic_ops.a -ldl
ln sunos5gc.so libgc.so
# Alpha/OSF shared library version of the collector
@@ -556,14 +558,11 @@ mach_dep.o: $(srcdir)/mach_dep.c $(srcdir)/mips_sgi_mach_dep.s \
$(srcdir)/ia64_save_regs_in_stack.s \
$(srcdir)/sparc_netbsd_mach_dep.s $(UTILS)
rm -f mach_dep.o
- ./if_mach MIPS IRIX5 $(CC) -c -o mach_dep.o $(srcdir)/mips_sgi_mach_dep.s
- ./if_mach MIPS RISCOS $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
- ./if_mach MIPS ULTRIX $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
- ./if_mach POWERPC DARWIN $(AS) -o mach_dep.o $(srcdir)/powerpc_darwin_mach_dep.s
- ./if_mach ALPHA LINUX $(CC) -c -o mach_dep.o $(srcdir)/alpha_mach_dep.S
- ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep.o $(srcdir)/sparc_mach_dep.S
- ./if_mach SPARC OPENBSD $(AS) -o mach_dep.o $(srcdir)/sparc_sunos4_mach_dep.s
- ./if_mach SPARC NETBSD $(AS) -o mach_dep.o $(srcdir)/sparc_netbsd_mach_dep.s
+ ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep2.o $(srcdir)/sparc_mach_dep.S
+ ./if_mach SPARC OPENBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_sunos4_mach_dep.s
+ ./if_mach SPARC NETBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_netbsd_mach_dep.s
+ ./if_mach SPARC "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
+ ./if_mach SPARC "" ld -r -o mach_dep.o mach_dep1.o mach_dep2.o
./if_mach IA64 "" as $(AS_ABI_FLAG) -o ia64_save_regs_in_stack.o $(srcdir)/ia64_save_regs_in_stack.s
./if_mach IA64 "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
./if_mach IA64 "" ld -r -o mach_dep.o mach_dep1.o ia64_save_regs_in_stack.o
@@ -638,7 +637,7 @@ gctest: tests/test.o gc.a $(UTILS)
# If an optimized setjmp_test generates a segmentation fault,
# odds are your compiler is broken. Gctest may still work.
# Try compiling setjmp_t.c unoptimized.
-setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS)
+setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) $(AO_INSTALL_DIR)
$(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_t.c
test: KandRtest cord/cordtest
diff --git a/Makefile.direct b/Makefile.direct
index 24326c00..78e81596 100644
--- a/Makefile.direct
+++ b/Makefile.direct
@@ -32,7 +32,7 @@ VPATH= $(srcdir)
# Atomic_ops installation directory. If this doesn't exist, we create
# it from the included libatomic_ops distribution.
-AO_VERSION=1.0
+AO_VERSION=1.1
AO_SRC_DIR=$(srcdir)/libatomic_ops-$(AO_VERSION)
AO_INSTALL_DIR=$(srcdir)/libatomic_ops-install
@@ -349,7 +349,7 @@ SRCS= $(CSRCS) mips_sgi_mach_dep.s rs6000_mach_dep.s alpha_mach_dep.S \
DOC_FILES= README.QUICK doc/README.Mac doc/README.MacOSX doc/README.OS2 \
doc/README.amiga doc/README.cords doc/debugging.html \
- doc/porting.html \
+ doc/porting.html doc/overview.html \
doc/README.dj doc/README.hp doc/README.linux doc/README.rs6000 \
doc/README.sgi doc/README.solaris2 doc/README.uts \
doc/README.win32 doc/barrett_diagram doc/README \
@@ -450,8 +450,9 @@ $(OBJS) tests/test.o dyn_load.o dyn_load_sunos53.o: \
mark.o typd_mlc.o finalize.o ptr_chck.o: $(srcdir)/include/gc_mark.h \
$(srcdir)/include/private/gc_pmark.h
-specific.o pthread_support.o: $(srcdir)/include/private/specific.h \
- $(srcdir)/include/gc_inline.h
+specific.o pthread_support.o thread_local_alloc.o win32_threads.o: \
+ $(srcdir)/include/private/specific.h $(srcdir)/include/gc_inline.h \
+ $(srcdir)/include/private/thread_local_alloc.h
dbg_mlc.o gcj_mlc.o: $(srcdir)/include/private/dbg_mlc.h
@@ -465,6 +466,7 @@ tests:
base_lib gc.a: $(OBJS) dyn_load.o $(UTILS)
echo > base_lib
rm -f dont_ar_1
+ cp $(AO_INSTALL_DIR)/lib/libatomic_ops.a gc.a
./if_mach SPARC SUNOS5 touch dont_ar_1
./if_mach SPARC SUNOS5 $(AR) rus gc.a $(OBJS) dyn_load.o
./if_mach M68K AMIGA touch dont_ar_1
@@ -513,7 +515,7 @@ dyn_load_sunos53.o: dyn_load.c
# SunOS5 shared library version of the collector
sunos5gc.so: $(OBJS) dyn_load_sunos53.o
- $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o -ldl
+ $(CC) -G -o sunos5gc.so $(OBJS) dyn_load_sunos53.o $(AO_INSTALL_DIR)/lib/libatomic_ops.a -ldl
ln sunos5gc.so libgc.so
# Alpha/OSF shared library version of the collector
@@ -556,14 +558,11 @@ mach_dep.o: $(srcdir)/mach_dep.c $(srcdir)/mips_sgi_mach_dep.s \
$(srcdir)/ia64_save_regs_in_stack.s \
$(srcdir)/sparc_netbsd_mach_dep.s $(UTILS)
rm -f mach_dep.o
- ./if_mach MIPS IRIX5 $(CC) -c -o mach_dep.o $(srcdir)/mips_sgi_mach_dep.s
- ./if_mach MIPS RISCOS $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
- ./if_mach MIPS ULTRIX $(AS) -o mach_dep.o $(srcdir)/mips_ultrix_mach_dep.s
- ./if_mach POWERPC DARWIN $(AS) -o mach_dep.o $(srcdir)/powerpc_darwin_mach_dep.s
- ./if_mach ALPHA LINUX $(CC) -c -o mach_dep.o $(srcdir)/alpha_mach_dep.S
- ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep.o $(srcdir)/sparc_mach_dep.S
- ./if_mach SPARC OPENBSD $(AS) -o mach_dep.o $(srcdir)/sparc_sunos4_mach_dep.s
- ./if_mach SPARC NETBSD $(AS) -o mach_dep.o $(srcdir)/sparc_netbsd_mach_dep.s
+ ./if_mach SPARC SUNOS5 $(CC) -c -o mach_dep2.o $(srcdir)/sparc_mach_dep.S
+ ./if_mach SPARC OPENBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_sunos4_mach_dep.s
+ ./if_mach SPARC NETBSD $(AS) -o mach_dep2.o $(srcdir)/sparc_netbsd_mach_dep.s
+ ./if_mach SPARC "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
+ ./if_mach SPARC "" ld -r -o mach_dep.o mach_dep1.o mach_dep2.o
./if_mach IA64 "" as $(AS_ABI_FLAG) -o ia64_save_regs_in_stack.o $(srcdir)/ia64_save_regs_in_stack.s
./if_mach IA64 "" $(CC) -c -o mach_dep1.o $(SPECIALCFLAGS) $(srcdir)/mach_dep.c
./if_mach IA64 "" ld -r -o mach_dep.o mach_dep1.o ia64_save_regs_in_stack.o
@@ -638,7 +637,7 @@ gctest: tests/test.o gc.a $(UTILS)
# If an optimized setjmp_test generates a segmentation fault,
# odds are your compiler is broken. Gctest may still work.
# Try compiling setjmp_t.c unoptimized.
-setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS)
+setjmp_test: $(srcdir)/setjmp_t.c $(srcdir)/include/gc.h $(UTILS) $(AO_INSTALL_DIR)
$(CC) $(CFLAGS) -o setjmp_test $(srcdir)/setjmp_t.c
test: KandRtest cord/cordtest
diff --git a/Makefile.in b/Makefile.in
index 0c881dbb..68708efa 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -527,6 +527,7 @@ dist_pkgdata_DATA = \
doc/README.solaris2 \
doc/README.uts \
doc/README.win32 \
+ doc/overview.html \
doc/tree.html \
doc/leak.html \
doc/gcinterface.html \
diff --git a/NT_STATIC_THREADS_MAKEFILE b/NT_STATIC_THREADS_MAKEFILE
index 91fb7f6b..f37d6d15 100644
--- a/NT_STATIC_THREADS_MAKEFILE
+++ b/NT_STATIC_THREADS_MAKEFILE
@@ -10,8 +10,8 @@ CPU=$(MY_CPU)
# should do, since we only need the headers.
# We assume this was manually unpacked, since I'm not sure there is
# a Windows standard command line tool to do this.
-AO_VERSION=0.6
-AO_SRC_DIR=$(srcdir)/atomic_ops-$(AO_VERSION)
+AO_VERSION=1.1
+AO_SRC_DIR=libatomic_ops-$(AO_VERSION)/src
AO_INCLUDE_DIR=$(AO_SRC_DIR)
OBJS= alloc.obj reclaim.obj allchblk.obj misc.obj mach_dep.obj os_dep.obj mark_rts.obj headers.obj mark.obj obj_map.obj blacklst.obj finalize.obj new_hblk.obj dbg_mlc.obj malloc.obj stubborn.obj dyn_load.obj typd_mlc.obj ptr_chck.obj gc_cpp.obj mallocx.obj win32_threads.obj
diff --git a/allchblk.c b/allchblk.c
index ad55beda..997580ac 100644
--- a/allchblk.c
+++ b/allchblk.c
@@ -560,7 +560,7 @@ int index; /* Index of free list */
/* free blocks in GC_add_to_fl. */
# endif
# ifdef USE_MUNMAP
- hhdr -> hb_last_reclaimed = GC_gc_no;
+ hhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
# endif
hhdr -> hb_sz = h_size;
GC_add_to_fl(h, hhdr);
@@ -568,7 +568,7 @@ int index; /* Index of free list */
}
struct hblk *
-GC_allochblk_nth(word sz/* bytes */, int kind, unsigned char flags, int n);
+GC_allochblk_nth(size_t sz/* bytes */, int kind, unsigned char flags, int n);
/*
* Allocate (and return pointer to) a heap block
@@ -580,7 +580,7 @@ GC_allochblk_nth(word sz/* bytes */, int kind, unsigned char flags, int n);
* The client is responsible for clearing the block, if necessary.
*/
struct hblk *
-GC_allochblk(size_t sz, int kind, unsigned flags/* IGNORE_OFF_PAGE or 0 */)
+GC_allochblk(size_t sz, int kind, unsigned char flags/* IGNORE_OFF_PAGE or 0 */)
{
word blocks;
int start_list;
@@ -603,7 +603,7 @@ GC_allochblk(size_t sz, int kind, unsigned flags/* IGNORE_OFF_PAGE or 0 */)
* Unlike the above, sz is in bytes.
*/
struct hblk *
-GC_allochblk_nth(word sz, int kind, unsigned char flags, int n)
+GC_allochblk_nth(size_t sz, int kind, unsigned char flags, int n)
{
struct hblk *hbp;
hdr * hhdr; /* Header corr. to hbp */
@@ -822,7 +822,7 @@ signed_word size;
GC_remove_counts(hbp, (word)size);
hhdr->hb_sz = size;
# ifdef USE_MUNMAP
- hhdr -> hb_last_reclaimed = GC_gc_no;
+ hhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
# endif
/* Check for duplicate deallocation in the easy case */
@@ -849,7 +849,7 @@ signed_word size;
GC_remove_from_fl(prevhdr, FL_UNKNOWN);
prevhdr -> hb_sz += hhdr -> hb_sz;
# ifdef USE_MUNMAP
- prevhdr -> hb_last_reclaimed = GC_gc_no;
+ prevhdr -> hb_last_reclaimed = (unsigned short)GC_gc_no;
# endif
GC_remove_header(hbp);
hbp = prev;
diff --git a/alloc.c b/alloc.c
index 1be45164..0e292f7b 100644
--- a/alloc.c
+++ b/alloc.c
@@ -569,8 +569,16 @@ void GC_clear_fl_marks(ptr_t q)
}
bit_no = MARK_BIT_NO((ptr_t)p - (ptr_t)h, sz);
if (mark_bit_from_hdr(hhdr, bit_no)) {
+ int n_marks = hhdr -> hb_n_marks - 1;
clear_mark_bit_from_hdr(hhdr, bit_no);
- --hhdr -> hb_n_marks;
+# ifdef PARALLEL_MARK
+ /* Appr. count, don't decrement to zero! */
+ if (0 != n_marks) {
+ hhdr -> hb_n_marks = n_marks;
+ }
+# else
+ hhdr -> hb_n_marks = n_marks;
+# endif
}
GC_bytes_found -= sz;
}
diff --git a/configure b/configure
index 36c8a6f5..7e58904c 100755
--- a/configure
+++ b/configure
@@ -1,7 +1,7 @@
#! /bin/sh
# From configure.ac Revision: 1.2 .
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59 for gc 7.0alpha4.
+# Generated by GNU Autoconf 2.59 for gc 7.0alpha5.
#
# Report bugs to <Hans.Boehm@hp.com>.
#
@@ -429,8 +429,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='gc'
PACKAGE_TARNAME='gc'
-PACKAGE_VERSION='7.0alpha4'
-PACKAGE_STRING='gc 7.0alpha4'
+PACKAGE_VERSION='7.0alpha5'
+PACKAGE_STRING='gc 7.0alpha5'
PACKAGE_BUGREPORT='Hans.Boehm@hp.com'
ac_unique_file="gcj_mlc.c"
@@ -957,7 +957,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gc 7.0alpha4 to adapt to many kinds of systems.
+\`configure' configures gc 7.0alpha5 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1024,7 +1024,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gc 7.0alpha4:";;
+ short | recursive ) echo "Configuration of gc 7.0alpha5:";;
esac
cat <<\_ACEOF
@@ -1174,7 +1174,7 @@ fi
test -n "$ac_init_help" && exit 0
if $ac_init_version; then
cat <<\_ACEOF
-gc configure 7.0alpha4
+gc configure 7.0alpha5
generated by GNU Autoconf 2.59
Copyright (C) 2003 Free Software Foundation, Inc.
@@ -1188,7 +1188,7 @@ cat >&5 <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gc $as_me 7.0alpha4, which was
+It was created by gc $as_me 7.0alpha5, which was
generated by GNU Autoconf 2.59. Invocation command line was
$ $0 $@
@@ -1960,7 +1960,7 @@ fi
# Define the identity of the package.
PACKAGE='gc'
- VERSION='7.0alpha4'
+ VERSION='7.0alpha5'
cat >>confdefs.h <<_ACEOF
@@ -4595,14 +4595,14 @@ echo $ECHO_N "checking which machine-dependent code should be used... $ECHO_C" >
machdep=
case "$host" in
alpha-*-openbsd*)
- machdep="alpha_mach_dep.lo"
+ machdep="mach_dep.lo"
if test x"${ac_cv_lib_dl_dlopen}" != xyes ; then
{ echo "$as_me:$LINENO: WARNING: OpenBSD/Alpha without dlopen(). Shared library support is disabled" >&5
echo "$as_me: WARNING: OpenBSD/Alpha without dlopen(). Shared library support is disabled" >&2;}
fi
;;
alpha*-*-linux*)
- machdep="alpha_mach_dep.lo"
+ machdep="mach_dep.lo"
;;
i?86-*-solaris2.[89] | i?86-*-solaris2.1?)
cat >>confdefs.h <<\_ACEOF
@@ -4611,7 +4611,7 @@ _ACEOF
;;
mipstx39-*-elf*)
- machdep="mips_ultrix_mach_dep.lo"
+ machdep="mach_dep.lo"
cat >>confdefs.h <<\_ACEOF
#define STACKBASE __stackbase
_ACEOF
@@ -4622,31 +4622,31 @@ _ACEOF
;;
mips-dec-ultrix*)
- machdep="mips_ultrix_mach-dep.lo"
+ machdep="mach-dep.lo"
;;
mips-nec-sysv*|mips-unknown-sysv*)
;;
mips*-*-linux*)
;;
mips-*-*)
- machdep="mips_sgi_mach_dep.lo"
+ machdep="mach_dep.lo"
cat >>confdefs.h <<\_ACEOF
#define NO_EXECUTE_PERMISSION 1
_ACEOF
;;
sparc-*-netbsd*)
- machdep="sparc_netbsd_mach_dep.lo"
+ machdep="mach_dep.lo sparc_netbsd_mach_dep.lo"
;;
sparc-sun-solaris2.3)
- machdep="sparc_mach_dep.lo"
+ machdep="mach_dep.lo sparc_mach_dep.lo"
cat >>confdefs.h <<\_ACEOF
#define SUNOS53_SHARED_LIB 1
_ACEOF
;;
sparc*-sun-solaris2.*)
- machdep="sparc_mach_dep.lo"
+ machdep="mach_dep.lo sparc_mach_dep.lo"
;;
ia64-*-*)
machdep="mach_dep.lo ia64_save_regs_in_stack.lo"
@@ -10714,7 +10714,7 @@ _ASBOX
} >&5
cat >&5 <<_CSEOF
-This file was extended by gc $as_me 7.0alpha4, which was
+This file was extended by gc $as_me 7.0alpha5, which was
generated by GNU Autoconf 2.59. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -10772,7 +10772,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-gc config.status 7.0alpha4
+gc config.status 7.0alpha5
configured by $0, generated by GNU Autoconf 2.59,
with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
diff --git a/configure.ac b/configure.ac
index 9becbad7..34108046 100644
--- a/configure.ac
+++ b/configure.ac
@@ -17,7 +17,7 @@ dnl Process this file with autoconf to produce configure.
# Initialization
# ==============
-AC_INIT(gc,7.0alpha4,Hans.Boehm@hp.com)
+AC_INIT(gc,7.0alpha5,Hans.Boehm@hp.com)
## version must conform to [0-9]+[.][0-9]+(alpha[0-9]+)?
AC_CONFIG_SRCDIR(gcj_mlc.c)
AC_CANONICAL_TARGET
@@ -276,42 +276,42 @@ AC_MSG_CHECKING(which machine-dependent code should be used)
machdep=
case "$host" in
alpha-*-openbsd*)
- machdep="alpha_mach_dep.lo"
+ machdep="mach_dep.lo"
if test x"${ac_cv_lib_dl_dlopen}" != xyes ; then
AC_MSG_WARN(OpenBSD/Alpha without dlopen(). Shared library support is disabled)
fi
;;
alpha*-*-linux*)
- machdep="alpha_mach_dep.lo"
+ machdep="mach_dep.lo"
;;
i?86-*-solaris2.[[89]] | i?86-*-solaris2.1?)
AC_DEFINE(SOLARIS25_PROC_VDB_BUG_FIXED)
;;
mipstx39-*-elf*)
- machdep="mips_ultrix_mach_dep.lo"
+ machdep="mach_dep.lo"
AC_DEFINE(STACKBASE, __stackbase)
AC_DEFINE(DATASTART_IS_ETEXT)
;;
mips-dec-ultrix*)
- machdep="mips_ultrix_mach-dep.lo"
+ machdep="mach-dep.lo"
;;
mips-nec-sysv*|mips-unknown-sysv*)
;;
mips*-*-linux*)
;;
mips-*-*)
- machdep="mips_sgi_mach_dep.lo"
+ machdep="mach_dep.lo"
AC_DEFINE(NO_EXECUTE_PERMISSION)
;;
sparc-*-netbsd*)
- machdep="sparc_netbsd_mach_dep.lo"
+ machdep="mach_dep.lo sparc_netbsd_mach_dep.lo"
;;
sparc-sun-solaris2.3)
- machdep="sparc_mach_dep.lo"
+ machdep="mach_dep.lo sparc_mach_dep.lo"
AC_DEFINE(SUNOS53_SHARED_LIB)
;;
sparc*-sun-solaris2.*)
- machdep="sparc_mach_dep.lo"
+ machdep="mach_dep.lo sparc_mach_dep.lo"
;;
ia64-*-*)
machdep="mach_dep.lo ia64_save_regs_in_stack.lo"
diff --git a/doc/README b/doc/README
index 2230f411..159fa89a 100644
--- a/doc/README
+++ b/doc/README
@@ -31,7 +31,7 @@ are GPL'ed, but with an exception that should cover all uses in the
collector. (If you are concerned about such things, I recommend you look
at the notice in config.guess or ltmain.sh.)
-This is version 7.0alpha2 of a conservative garbage collector for C and C++.
+This is version 7.0alpha5 of a conservative garbage collector for C and C++.
You might find a more recent version of this at
diff --git a/doc/README.changes b/doc/README.changes
index da79786d..25b61c4b 100644
--- a/doc/README.changes
+++ b/doc/README.changes
@@ -2236,8 +2236,21 @@ Since gc6.5:
there.
- More consistently define HBLKSIZE to 4096 on 64 bit architectures with
4K pages. (Thanks to Andrew Haley.)
-
-Since gc6.6:
+ - With win32 threads, GC_stop_world needs to acquire GC_write_cs. (Thanks
+ to Ben Hutchings for the observation and patch.)
+ - Move up struct callinfo declaration to make gcc 4.0.2. happy.
+
+Since 6.6:
+ - Add "int" to Solaris "end" and "etext" declaration in gc.h. Declared
+ the symbols with underscores and as arrays, since that's what's actually
+ used. Perhaps this could all just be removed? (Thanks to John Bowman.)
+ - Fixed ARM GC_test_and_set code. (Thanks to Kazu Hirata and Paul Brook.)
+ - Added casts for assignments to hb_last_reclaimed, which truncate the
+ value. Added a cast to GC_adj_words_allocd. Use GetModuleHandleA
+ when retrieving a handle to kernel32.dll under win32. (Thanks to the
+ Visual Prolog developers.)
+
+Since gc6.7:
- Remove GC_PROTO, VOLATILE, GC_PTR, and GC_CONST. Assume ANSI C compiler
and use ANSI constructs unconditionally.
- Introduce #elif and #error in some of the appropriate places.
@@ -2383,9 +2396,45 @@ Since gc7.0alpha3
- Added GC_getattr_np-based GC_get_stack_base (untested).
- Separated thread local allocation into a separate file and added the
beginning of win32 support for that.
+
+Since gc7.0alpha4
+ (more 6.6, 6.7 changes)
+ - Some Solaris fixes, including some more general changes in how
+ the assembly pieces of mach_dep.c are handled.
+ - Removed a lot of SOLARIS_THREADS-specific code that was only
+ needed with the old implementation. This included many (mostly no-op)
+ versions of GC_is_fresh.
+ - Don't use atomic_ops in gc_locks.h unless we need threads.
+ - Fixed USE_MARK_BITS, which is once againthe default without PARALLEL_MARK.
+ - Removed Solaris GC_INIT hack. It's a workaround for a long dead bug,
+ and it seemed to be wrong anyway.
+ - Changed win32_threads.c to require preprocessor-based interception
+ of thread routines by default. A client call to GC_use_DllMain is
+ now required to get the old behavior in which DllMain is used to implicitly
+ register threads. This was doen for uniformity with other platforms, and
+ because the DllMain solution seemed to require very tricky code which,
+ at least in the past, imposed hard bounds onthe number of threads.
+ - Many small changes to make thread support work again on Cygwin.
+ - Moved definition of allocator lock etc. to pthread_support.c and
+ win32_threads.c for those two cases.
+ - Got rid of the FASTLOCK() machinery. It doesn't seem useful on modern
+ platforms.
+ - Cleaned up the uncollectable allocation routines, speeding up the
+ slower paths. The code did enough unnecessary work off the critical path
+ that the underlying logic was getting hard to extract.
+ - No longer turn off THREAD_LOCAL_ALLOC with DBG_HDRS_ALL. Indications
+ are it just works, and I think the reasons for it not working disappeared
+ a while ago.
+ - Fixed bugs in hb_n_marks calculation and assertion.
+ - Don't use __builtin_expect for pre-3.0 gcc.
+ - Define GWW_VDB only for recent Microsoft tool chains.
+ - Add overview.html to doc directory.
+ - Fix NT_STATIC_THREADS_MAKEFILE, various compiler warnings.
+ - Made thread local allocation sort of work with Cygwin. The code should
+ be there to deal with other Windows variants, But non-Cygwin Windows
+ threads need more bug fixes.
To do:
- - Fix USE_MARK_BITS.
- REDIRECT_MALLOC and threads combination is getting closer, but currently
usually fails because the DTV (dynamic thread vector) used to access
thread-local storage is referenced only from the base of a thread stack,
@@ -2394,7 +2443,6 @@ To do:
Typically large heap sections end up cleared.
- Clone marker inner loop to support arch-dependent prefetching,
and counting of objects marked for finalization.
- - function wrapping??
- The USE_MUNMAP code should really use a separate data structure
indexed by physical page to keep track of time since last use of
a page. Using hblk headers means we lose track of ages when
diff --git a/doc/README.linux b/doc/README.linux
index 1d0fd4c3..99f4bbcd 100644
--- a/doc/README.linux
+++ b/doc/README.linux
@@ -19,15 +19,15 @@ Linux threads. These should not be touched by the client program.
To use threads, you need to abide by the following requirements:
-1) You need to use LinuxThreads (which are included in libc6).
+1) You need to use LinuxThreads or NPTL (which are included in libc6).
The collector relies on some implementation details of the LinuxThreads
- package. It is unlikely that this code will work on other
+ package. This code may not work on other
pthread implementations (in particular it will *not* work with
MIT pthreads).
-2) You must compile the collector with -DGC_LINUX_THREADS and -D_REENTRANT
- specified in the Makefile.
+2) You must compile the collector with -DGC_LINUX_THREADS (or
+ just -DGC_THREADS) and -D_REENTRANT specified in the Makefile.
3a) Every file that makes thread calls should define GC_LINUX_THREADS and
_REENTRANT and then include gc.h. Gc.h redefines some of the
diff --git a/doc/doc.am b/doc/doc.am
index a90e05df..d95fa172 100644
--- a/doc/doc.am
+++ b/doc/doc.am
@@ -43,6 +43,7 @@ dist_pkgdata_DATA = \
doc/README.solaris2 \
doc/README.uts \
doc/README.win32 \
+ doc/overview.html \
doc/tree.html \
doc/leak.html \
doc/gcinterface.html \
diff --git a/doc/gcdescr.html b/doc/gcdescr.html
index cab6bde4..dc08470e 100644
--- a/doc/gcdescr.html
+++ b/doc/gcdescr.html
@@ -1,7 +1,7 @@
<HTML>
<HEAD>
<TITLE> Conservative GC Algorithmic Overview </TITLE>
- <AUTHOR> Hans-J. Boehm, HP Labs (Much of this was written at SGI)</author>
+ <AUTHOR> Hans-J. Boehm, HP Labs (Some of this was written at SGI)</author>
</HEAD>
<BODY>
<H1> <I>This is under construction, and may always be.</i> </h1>
@@ -549,6 +549,67 @@ by using ld's function call wrapping mechanism under Linux.
Recent versions of the collector support several facilites to enhance
the processor-scalability and thread performance of the collector.
These are discussed in more detail <A HREF="scale.html">here</a>.
+We briefly outline the data approach to thread-local allocation in the
+next section.
+<H2>Thread-local allocation</h2>
+If thread-local allocation is enabled, the collector keeps separate
+arrays of free lists for each thread. Thread-local allocation
+is currently only supported on a few platforms.
+<P>
+The free list arrays associated
+with each thread are only used to satisfy requests for objects that
+are both very small, and belong to one of a small number of well-known
+kinds. These currently include "normal" and pointer-free objects.
+Depending onthe configuration, "gcj" objects may also be included.
+<P>
+Thread-local free list entries contain either a pointer to the first
+element of a free list, or they contain a counter of the number of
+allocation "granules" allocated so far. Initially they contain the
+value one, i.e. a small counter value.
+<P>
+Thread-local allocation allocates directly through the global
+allocator, if the object is of a size or kind not covered by the
+local free lists.
+<P>
+If there is an appropriate local free list, the allocator checks whether it
+contains a sufficiently small counter value. If so, the counter is simply
+incremented by the counter value, and the global allocator is used.
+In this way, the initial few allocations of a given size bypass the local
+allocator. A thread that only allocates a handful of objects of a given
+size will not build up its own free list for that size. This avoids
+wasting space for unpopular objects sizes or kinds.
+<P>
+Once the counter passes a threshold, <TT>GC_malloc_many</tt> is called
+to allocate roughly <TT>HBLKSIZE</tt> space and put it on the corresponding
+local free list. Further allocations of that size and kind then use
+this free list, and no longer need to acquire the allocation lock.
+The allocation procedure is otherwise similar to the global free lists.
+The local free lists are also linked using the first word in the object.
+In most cases this means they require considerably less time.
+<P>
+Local free lists are treated buy most of the rest of the collector
+as though they were in-use reachable data. This requires some care,
+since pointer-free objects are not normally traced, and hence a special
+tracing procedure is required to mark all objects on pointer-free and
+gcj local free lists.
+<P>
+On thread exit, any remaining thread-local free list entries are
+transferred back to the global free list.
+<P>
+Note that if the collector is configured for thread-local allocation,
+GC versions before 7 do not invoke the thread-local allocator by default.
+<TT>GC_malloc</tt> only uses thread-local allocation in version 7 and later.
+In earlier versions, <TT>GC_MALLOC</tt> (all caps) may be directed
+to use thread-local allocation by defining <TT>GC_REDIRECT_TO_LOCAL</tt>
+and then include <TT>gc_local_alloc.h</tt>.
+<P>
+For some more details see <A HREF="scale.html">here</a>, and the
+technical report entitled
+<A HREF="http://www.hpl.hp.com/techreports/2000/HPL-2000-165.html">
+``Fast Multiprocessor Memory Allocation and Garbage Collection''
+</a>
+<P>
+<HR>
<P>
Comments are appreciated. Please send mail to
<A HREF="mailto:boehm@acm.org"><TT>boehm@acm.org</tt></a> or
diff --git a/doc/overview.html b/doc/overview.html
new file mode 100644
index 00000000..d31f9370
--- /dev/null
+++ b/doc/overview.html
@@ -0,0 +1,446 @@
+<!DOCTYPE HTML>
+<html><head><title>A garbage collector for C and C++</title></head>
+<body>
+<table bgcolor="#f0f0ff" cellpadding="10%">
+ <tbody><tr>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">Interface Overview</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/04tutorial.pdf">Tutorial Slides</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">FAQ</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">Example</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source">Download</a></td>
+ <td><a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/license.txt">License</a></td>
+ </tr>
+</tbody></table>
+<h1>A garbage collector for C and C++</h1>
+<ul>
+<li><a href="#platforms">Platforms</a>
+</li><li><a href="#multiprocessors">Scalable multiprocessor versions</a>
+</li><li><a href="#details">Some collector details</a>
+</li><li><a href="#further">Further reading</a>
+</li><li><a href="#users">Current users</a>
+</li><li><a href="#collector">Local Links for this collector</a>
+</li><li><a href="#background">Local Background Links</a>
+</li><li><a href="#contacts">Contacts and Mailing List</a>
+</li></ul>
+[ This is an updated version of the page formerly at
+<tt>http://reality.sgi.com/boehm/gc.html</tt>
+and before that at
+<a href="ftp://parcftp.xerox.com/pub/gc/gc.html">
+<tt>ftp://parcftp.xerox.com/pub/gc/gc.html</tt></a>.]
+<p>
+The <a href="http://www.hpl.hp.com/personal/Hans_Boehm">Boehm</a>-<a href="http://www.cs.cornell.edu/annual_report/00-01/bios.htm#demers">Demers</a>-<a href="http://www-sul.stanford.edu/weiser/">Weiser</a>
+conservative garbage collector can
+be used as a garbage collecting
+replacement for C <tt>malloc</tt> or C++ <tt>new</tt>.
+It allows you to allocate memory basically as you normally would,
+without explicitly deallocating memory that is no longer useful.
+The collector automatically recycles memory when it determines
+that it can no longer be otherwise accessed.
+A simple example of such a use is given
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">here</a>.
+</p><p>
+The collector is also used by a number of programming language
+implementations that either use C as intermediate code, want
+to facilitate easier interoperation with C libraries, or
+just prefer the simple collector interface.
+For a more detailed description of the interface, see
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">here</a>.
+</p><p>
+Alternatively, the garbage collector may be used as
+a <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/leak.html">leak detector</a>
+for C or C++ programs, though that is not its primary goal.
+</p><p>
+Typically several versions will be available.
+Usually you should first try to use
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc.tar.gz"><tt>gc_source/gc.tar.gz</tt></a>,
+which is normally an older, more stable version.
+</p><p>
+If that fails, try the latest explicitly numbered version
+in <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/">
+<tt>gc_source/</tt></a>.
+Later versions may contain additional features, platform support,
+or bug fixes, but are likely to be less well tested.
+Note that versions containing the letters <tt>alpha</tt> are even less
+well tested than others, especially on non-HP platforms.
+</p><p>
+A slightly older version of the garbage collector is now also
+included as part of the
+<a href="http://gcc.gnu.org/">GNU compiler</a>
+distribution. The source
+code for that version is available for browsing
+<a href="http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/boehm-gc/">here</a>.
+</p><p>
+The arguments for and against conservative garbage collection
+in C and C++ are briefly
+discussed in
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">issues.html</a>. The beginnings of
+a frequently-asked-questions list are <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">here</a>.
+</p><p>
+The garbage collector code is copyrighted by
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm">Hans-J. Boehm</a>,
+Alan J. Demers,
+<a href="http://www.xerox.com/">Xerox Corporation</a>,
+<a href="http://www.sgi.com/">Silicon Graphics</a>,
+and
+<a href="http://www.hp.com/">Hewlett-Packard Company</a>.
+It may be used and copied without payment of a fee under minimal restrictions.
+See the README file in the distribution or the
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/license.txt">license</a> for more details.
+<b>IT IS PROVIDED AS IS,
+WITH ABSOLUTELY NO WARRANTY EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK</b>.
+</p><p>
+Empirically, this collector works with most unmodified C programs,
+simply by replacing
+<tt>malloc</tt> with <tt>GC_malloc</tt> calls,
+replacing <tt>realloc</tt> with <tt>GC_realloc</tt> calls, and removing
+free calls. Exceptions are discussed
+in <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">issues.html</a>.
+</p><h2><a name="platforms">Platforms</a></h2>
+The collector is not completely portable, but the distribution
+includes ports to most standard PC and UNIX/Linux platforms.
+The collector should work on Linux, *BSD, recent Windows versions,
+MacOS X, HP/UX, Solaris,
+Tru64, Irix and a few other operating systems.
+Some ports are more polished than others.
+<p>
+Irix pthreads, Linux threads, Win32 threads, Solaris threads
+(old style and pthreads),
+HP/UX 11 pthreads, Tru64 pthreads, and MacOS X threads are supported
+in recent versions.
+</p><h3>Separately distributed ports</h3>
+For MacOS 9/Classic use, Patrick Beard's latest port is available from
+<a href="http://homepage.mac.com/pcbeard/gc/">
+<tt>http://homepage.mac.com/pcbeard/gc/</tt></a>.
+(Unfortunately, that's now quite dated.
+I'm not in a position to test under MacOS. Although I try to
+incorporate changes, it is impossible for
+me to update the project file.)
+<p>
+Precompiled versions of the collector for NetBSD are available
+<a href="ftp://ftp.netbsd.org/pub/NetBSD/packages/pkgsrc/devel/boehm-gc/README.html">here</a>
+or
+<a href="http://www.netbsd.org/packages/devel/boehm-gc/README.html">here</a>.
+</p><p>
+<a href="http://www.debian.org/">Debian Linux</a> includes prepackaged
+versions of the collector.
+</p><h2><a name="multiprocessors">Scalable multiprocessor versions</a></h2>
+Kenjiro Taura, Toshio Endo, and Akinori Yonezawa have made available
+a <a href="http://www.yl.is.s.u-tokyo.ac.jp/gc/">parallel collector</a>
+based on this one. Their collector takes advantage of multiple processors
+during a collection. Starting with collector version 6.0alpha1
+we also do this, though with more modest processor scalability goals.
+Our approach is discussed briefly in
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/scale.html"><tt>scale.html</tt></a>.
+<h2><a name="details">Some Collector Details</a></h2>
+The collector uses a <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/complexity.html">mark-sweep</a> algorithm.
+It provides incremental and generational
+collection under operating systems which provide the right kind of
+virtual memory support. (Currently this includes SunOS[45], IRIX,
+OSF/1, Linux, and Windows, with varying restrictions.)
+It allows <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/finalization.html"><i>finalization</i></a> code
+to be invoked when an object is collected.
+It can take advantage of type information to locate pointers if such
+information is provided, but it is usually used without such information.
+ee the README and
+<tt>gc.h</tt> files in the distribution for more details.
+<p>
+For an overview of the implementation, see <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">here</a>.
+</p><p>
+The garbage collector distribution includes a C string
+(<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/cordh.txt"><i>cord</i></a>) package that provides
+for fast concatenation and substring operations on long strings.
+A simple curses- and win32-based editor that represents the entire file
+as a cord is included as a
+sample application.
+</p><p>
+Performance of the nonincremental collector is typically competitive
+with malloc/free implementations. Both space and time overhead are
+likely to be only slightly higher
+for programs written for malloc/free
+(see Detlefs, Dosser and Zorn's
+<a href="ftp://ftp.cs.colorado.edu/pub/techreports/zorn/CU-CS-665-93.ps.Z">Memory Allocation Costs in Large C and C++ Programs</a>.)
+For programs allocating primarily very small objects, the collector
+may be faster; for programs allocating primarily large objects it will
+be slower. If the collector is used in a multithreaded environment
+and configured for thread-local allocation, it may in some cases
+significantly outperform malloc/free allocation in time.
+</p><p>
+We also expect that in many cases any additional overhead
+will be more than compensated for by decreased copying etc.
+if programs are written
+and tuned for garbage collection.
+</p><h1><a name="further">Further Reading:</a></h1>
+<b>The beginnings of a frequently asked questions list for this
+collector are <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">here</a></b>.
+<p>
+<b>The following provide information on garbage collection in general</b>:
+</p><p>
+Paul Wilson's <a href="ftp://ftp.cs.utexas.edu/pub/garbage">garbage collection ftp archive</a> and <a href="ftp://ftp.cs.utexas.edu/pub/garbage/gcsurvey.ps">GC survey</a>.
+</p><p>
+The Ravenbrook <a href="http://www.memorymanagement.org/">
+Memory Management Reference</a>.
+</p><p>
+David Chase's
+<a href="http://www.iecc.com/gclist/GC-faq.html">GC FAQ</a>.
+</p><p>
+Richard Jones'
+<a href="http://www.ukc.ac.uk/computer_science/Html/Jones/gc.html">
+GC page</a> and
+<a href="http://www.cs.kent.ac.uk/people/staff/rej/gcbook/gcbook.html">
+his book</a>.
+</p><p>
+<b>The following papers describe the collector algorithms we use
+and the underlying design decisions at
+a higher level.</b>
+</p><p>
+(Some of the lower level details can be found
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">here</a>.)
+</p><p>
+The first one is not available
+electronically due to copyright considerations. Most of the others are
+subject to ACM copyright.
+</p><p>
+Boehm, H., "Dynamic Memory Allocation and Garbage Collection", <i>Computers in Physics
+9</i>, 3, May/June 1995, pp. 297-303. This is directed at an otherwise sophisticated
+audience unfamiliar with memory allocation issues. The algorithmic details differ
+from those in the implementation. There is a related letter to the editor and a minor
+correction in the next issue.
+</p><p>
+Boehm, H., and <a href="http://www.ubiq.com/hypertext/weiser/weiser.html">M. Weiser</a>,
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/spe_gc_paper">"Garbage Collection in an Uncooperative Environment"</a>,
+<i>Software Practice &amp; Experience</i>, September 1988, pp. 807-820.
+</p><p>
+Boehm, H., A. Demers, and S. Shenker, <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi91.ps.Z">"Mostly Parallel Garbage Collection"</a>, Proceedings
+of the ACM SIGPLAN '91 Conference on Programming Language Design and Implementation,
+<i>SIGPLAN Notices 26</i>, 6 (June 1991), pp. 157-164.
+</p><p>
+Boehm, H., <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi93.ps.Z">"Space Efficient Conservative Garbage Collection"</a>, Proceedings of the ACM
+SIGPLAN '93 Conference on Programming Language Design and Implementation, <i>SIGPLAN
+Notices 28</i>, 6 (June 1993), pp. 197-206.
+</p><p>
+Boehm, H., "Reducing Garbage Collector Cache Misses",
+<i> Proceedings of the 2000 International Symposium on Memory Management </i>.
+<a href="http://portal.acm.org/citation.cfm?doid=362422.362438">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-99.html">
+Technical report version.</a> Describes the prefetch strategy
+incorporated into the collector for some platforms. Explains why
+the sweep phase of a "mark-sweep" collector should not really be
+a distinct phase.
+</p><p>
+M. Serrano, H. Boehm,
+"Understanding Memory Allocation of Scheme Programs",
+<i>Proceedings of the Fifth ACM SIGPLAN International Conference on
+Functional Programming</i>, 2000, Montreal, Canada, pp. 245-256.
+<a href="http://www.acm.org/pubs/citations/proceedings/fp/351240/p245-serrano/">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-62.html">
+Earlier Technical Report version.</a> Includes some discussion of the
+collector debugging facilities for identifying causes of memory retention.
+</p><p>
+Boehm, H.,
+"Fast Multiprocessor Memory Allocation and Garbage Collection",
+<a href="http://www.hpl.hp.com/techreports/2000/HPL-2000-165.html">
+HP Labs Technical Report HPL 2000-165</a>. Discusses the parallel
+collection algorithms, and presents some performance results.
+</p><p>
+Boehm, H., "Bounding Space Usage of Conservative Garbage Collectors",
+<i>Proceeedings of the 2002 ACM SIGPLAN-SIGACT Symposium on Principles of
+Programming Languages</i>, Jan. 2002, pp. 93-100.
+<a href="http://portal.acm.org/citation.cfm?doid=503272.503282">
+Official version.</a>
+<a href="http://www.hpl.hp.com/techreports/2001/HPL-2001-251.html">
+Technical report version.</a>
+Includes a discussion of a collector facility to much more reliably test for
+the potential of unbounded heap growth.
+</p><p>
+<b>The following papers discuss language and compiler restrictions necessary to guaranteed
+safety of conservative garbage collection.</b>
+</p><p>
+We thank John Levine and JCLT for allowing
+us to make the second paper available electronically, and providing PostScript for the final
+version.
+</p><p>
+Boehm, H., <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi96.ps.gz">``Simple
+Garbage-Collector-Safety''</a>, Proceedings
+of the ACM SIGPLAN '96 Conference on Programming Language Design
+and Implementation.
+</p><p>
+Boehm, H., and D. Chase, <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/boecha.ps.gz">
+``A Proposal for Garbage-Collector-Safe C Compilation''</a>,
+<i>Journal of C Language Translation 4</i>, 2 (Decemeber 1992), pp. 126-141.
+</p><p>
+<b>Other related information: </b>
+</p><p>
+The Detlefs, Dosser and Zorn's <a href="ftp://ftp.cs.colorado.edu/pub/techreports/zorn/CU-CS-665-93.ps.Z">Memory Allocation Costs in Large C and C++ Programs</a>.
+ This is a performance comparison of the Boehm-Demers-Weiser collector to malloc/free,
+using programs written for malloc/free.
+</p><p>
+Joel Bartlett's <a href="ftp://ftp.digital.com/pub/DEC/CCgc">mostly copying conservative garbage collector for C++</a>.
+</p><p>
+John Ellis and David Detlef's <a href="ftp://parcftp.xerox.com/pub/ellis/gc/gc.ps">Safe Efficient Garbage Collection for C++</a> proposal.
+</p><p>
+Henry Baker's <a href="http://home.pipeline.com/%7Ehbaker1/">paper collection</a>.
+</p><p>
+Slides for Hans Boehm's <a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/myths.ps">Allocation and GC Myths</a> talk.
+</p><h1><a name="users">Current users:</a></h1>
+Known current users of some variant of this collector include:
+<p>
+The runtime system for <a href="http://gcc.gnu.org/java">GCJ</a>,
+the static GNU java compiler.
+</p><p>
+<a href="http://w3m.sourceforge.net/">W3m</a>, a text-based web browser.
+</p><p>
+Some versions of the Xerox DocuPrint printer software.
+</p><p>
+The <a href="http://www.mozilla.org/">Mozilla</a> project, as leak
+detector.
+</p><p>
+The <a href="http://www.go-mono.com/">Mono</a> project,
+an open source implementation of the .NET development framework.
+</p><p>
+The <a href="http://www.gnu.org/projects/dotgnu/">DotGNU Portable.NET
+project</a>, another open source .NET implementation.
+</p><p>
+The <a href="http://irssi.org/">Irssi IRC client</a>.
+</p><p>
+<a href="http://titanium.cs.berkeley.edu/">The Berkeley Titanium project</a>.
+</p><p>
+<a href="http://www.nag.co.uk/nagware_fortran_compilers.asp">The NAGWare f90 Fortran 90 compiler</a>.
+</p><p>
+Elwood Corporation's <a href="http://www.elwood.com/eclipse-info/index.htm">
+Eclipse</a> Common Lisp system, C library, and translator.
+</p><p>
+The <a href="http://www-sop.inria.fr/mimosa/fp/Bigloo/">Bigloo
+Scheme</a>
+and <a href="http://kaolin.unice.fr/%7Eserrano/camloo.html">Camloo ML
+compilers</a>
+written by Manuel Serrano and others.
+</p><p>
+Brent Benson's <a href="http://ftp.cs.indiana.edu/pub/scheme-repository/imp/">libscheme</a>.
+</p><p>
+The <a href="http://www.cs.rice.edu/CS/PLT/packages/mzscheme/index.html">MzScheme</a> scheme implementation.
+</p><p>
+The <a href="http://www.cs.washington.edu/research/projects/cecil/www/cecil-home.html">University of Washington Cecil Implementation</a>.
+</p><p>
+<a href="http://www.icsi.berkeley.edu/Sather/">The Berkeley Sather implementation</a>.
+</p><p>
+<a href="http://www.cs.berkeley.edu/%7Eharmonia/">The Berkeley Harmonia Project</a>.
+</p><p>
+The <a href="http://www.cs.arizona.edu/sumatra/toba/">Toba</a> Java Virtual
+Machine to C translator.
+</p><p>
+The <a href="http://www.gwydiondylan.org/">Gwydion Dylan compiler</a>.
+</p><p>
+The <a href="http://gcc.gnu.org/onlinedocs/gcc/Objective-C.html">
+GNU Objective C runtime</a>.
+</p><p>
+<a href="http://www.math.uiuc.edu/Macaulay2">Macaulay 2</a>, a system to support
+research in algebraic geometry and commutative algebra.
+</p><p>
+The <a href="http://www.vestasys.org/">Vesta</a> configuration management
+system.
+</p><p>
+<a href="http://www.visual-prolog.com/vip6">Visual Prolog 6</a>.
+</p><p>
+<a href="http://asymptote.sf.net/">Asymptote LaTeX-compatible
+vector graphics language.</a>
+
+</p><h1><a name="collector">More collector information at this site</a></h1>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/simple_example.html">A simple illustration of how to build and
+use the collector.</a>.
+<p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcinterface.html">Description of alternate interfaces to the
+garbage collector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/04tutorial.pdf">Slides from an ISMM 2004 tutorial about the GC.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/faq.html">A FAQ (frequently asked questions) list.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/leak.html">How to use the garbage collector as a leak detector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/debugging.html">Some hints on debugging garbage collected
+applications.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gcdescr.html">An overview of the implementation of the
+garbage collector.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/tree.html">The data structure used for fast pointer lookups.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/scale.html">Scalability of the collector to multiprocessors.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source">Directory containing garbage collector source.</a>
+
+</p><h1><a name="background">More background information at this site</a></h1>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/bounds.html">An attempt to establish a bound on space usage of
+conservative garbage collectors.</a>
+<p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/complexity.html">Mark-sweep versus copying garbage collectors
+and their complexity.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/conservative.html">Pros and cons of conservative garbage collectors,
+in comparison to other collectors.
+</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/issues.html">Issues related to garbage collection vs.
+manual memory management in C/C++.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/example.html">An example of a case in which garbage collection
+results in a much faster implementation as a result of reduced
+synchronization.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/nonmoving">Slide set discussing performance of nonmoving
+garbage collectors.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/popl03/web">
+Slide set discussing <i>Destructors, Finalizers, and Synchronization</i>
+(POPL 2003).</a>
+</p><p>
+<a href="http://portal.acm.org/citation.cfm?doid=604131.604153">
+Paper corresponding to above slide set.</a>
+(<a href="http://www.hpl.hp.com/techreports/2002/HPL-2002-335.html">
+Technical Report version</a>.)
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_bench.html">A Java/Scheme/C/C++ garbage collection benchmark.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/myths.ps">Slides for talk on memory allocation myths.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gctalk.ps">Slides for OOPSLA 98 garbage collection talk.</a>
+</p><p>
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers">Related papers.</a>
+</p><h1><a name="contacts">Contacts and Mailing List</a><a></a></h1>
+<a>We have recently set up two mailing list for collector announcements
+and discussions:
+</a><ul>
+<li><a href="mailto:gc-announce@linux.hpl.hp.com">gc-announce@linux.hpl.hp.com</a>
+is used for announcements of new versions. Postings are restricted.
+We expect this to always remain a very low volume list.
+</li><li><a href="mailto:gc@linux.hpl.hp.com">gc@linux.hpl.hp.com</a> is used for
+discussions, bug reports, and the like. Subscribers may post.
+On-topic posts by nonsubscribers will usually also be accepted, but
+it may take some time to review them.
+</li></ul>
+To subscribe to these lists, send a mail message containing the
+word "subscribe" to
+<a href="mailto:gc-announce-request@linux.hpl.hp.com?subject=subscribe">gc-announce-request@linux.hpl.hp.com</a>
+or to
+<a href="mailto:gc-request@linux.hpl.hp.com?subject=subscribe">gc-request@linux.hpl.hp.com</a>.
+(Please ignore the instructions about web-based subscription.
+The listed web site is behind the HP firewall.)
+<p>
+The archives for these lists appear
+<a href="http://www.hpl.hp.com/hosted/linux/mail-archives">here</a>.
+The gc list archive may also be read at
+<a href="http://dir.gmane.org/gmane.comp.programming.garbage-collection.boehmgc">gmane.org</a>.
+</p><p>
+Some prior discussion of the collector has taken place on the gcc
+java mailing list, whose archives appear
+<a href="http://gcc.gnu.org/ml/java/">here</a>, and also on
+<a href="http://lists.tunes.org/mailman/listinfo/gclist">gclist@iecc.com</a>.
+</p><p>
+Comments and bug reports may also be sent to
+(<a href="mailto:Hans_Boehm@hp.com">Hans.Boehm@hp.com</a>) or
+(<a href="mailto:boehm@acm.org">boehm@acm.org</a>), but the gc
+mailing list is usually preferred.
+
+</p></body></html>
diff --git a/headers.c b/headers.c
index 8b14b4be..1a0ce887 100644
--- a/headers.c
+++ b/headers.c
@@ -254,7 +254,7 @@ struct hblkhdr * GC_install_header(struct hblk *h)
result = alloc_hdr();
SET_HDR(h, result);
# ifdef USE_MUNMAP
- result -> hb_last_reclaimed = GC_gc_no;
+ result -> hb_last_reclaimed = (unsigned short)GC_gc_no;
# endif
return(result);
}
diff --git a/include/gc.h b/include/gc.h
index a47dc4a1..5f049c51 100644
--- a/include/gc.h
+++ b/include/gc.h
@@ -962,17 +962,7 @@ extern void GC_thr_init(void); /* Needed for Solaris/X86 */
* A GC_INIT call is required if the collector is built with THREAD_LOCAL_ALLOC
* defined and the initial allocation call is not to GC_malloc().
*/
-#if (defined(sparc) || defined(__sparc)) && defined(sun)
- /*
- * If you are planning on putting
- * the collector in a SunOS 5 dynamic library, you need to call GC_INIT()
- * from the statically loaded program section.
- * This circumvents a Solaris 2.X (X<=4) linker bug.
- */
-# define GC_INIT() { extern end, etext; \
- GC_noop(&end, &etext); \
- GC_init();}
-#elif defined(__CYGWIN32__) && defined(GC_DLL) || defined (_AIX)
+#if defined(__CYGWIN32__) || defined (_AIX)
/*
* Similarly gnu-win32 DLLs need explicit initialization from
* the main program, as does AIX.
@@ -984,15 +974,22 @@ extern void GC_thr_init(void); /* Needed for Solaris/X86 */
extern int _bss_end__[];
# define GC_MAX(x,y) ((x) > (y) ? (x) : (y))
# define GC_MIN(x,y) ((x) < (y) ? (x) : (y))
-# define GC_DATASTART ((GC_PTR) GC_MIN(_data_start__, _bss_start__))
-# define GC_DATAEND ((GC_PTR) GC_MAX(_data_end__, _bss_end__))
+# define GC_DATASTART ((void *) GC_MIN(_data_start__, _bss_start__))
+# define GC_DATAEND ((void *) GC_MAX(_data_end__, _bss_end__))
+# if defined(GC_DLL)
+# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
+# else
+ /* Main program init not required, but other defined needed for */
+ /* uniformity. */
+# define GC_INIT() { GC_init(); }
+# endif
# endif
# if defined(_AIX)
extern int _data[], _end[];
-# define GC_DATASTART ((GC_PTR)((ulong)_data))
-# define GC_DATAEND ((GC_PTR)((ulong)_end))
+# define GC_DATASTART ((void *)((ulong)_data))
+# define GC_DATAEND ((void *)((ulong)_end))
+# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
# endif
-# define GC_INIT() { GC_add_roots(GC_DATASTART, GC_DATAEND); GC_init(); }
#else
# define GC_INIT() { GC_init(); }
#endif
diff --git a/include/gc_config_macros.h b/include/gc_config_macros.h
index 2cfa6c22..f3b5ef48 100644
--- a/include/gc_config_macros.h
+++ b/include/gc_config_macros.h
@@ -5,12 +5,12 @@
* Some tests for old macros. These violate our namespace rules and will
* disappear shortly. Use the GC_ names.
*/
-#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS)
+#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS) \
+ || defined(_SOLARIS_PTHREADS) || defined(GC_SOLARIS_PTHREADS)
+ /* We no longer support old style Solaris threads. */
+ /* GC_SOLARIS_THREADS now means pthreads. */
# define GC_SOLARIS_THREADS
#endif
-#if defined(_SOLARIS_PTHREADS)
-# define GC_SOLARIS_PTHREADS
-#endif
#if defined(IRIX_THREADS)
# define GC_IRIX_THREADS
#endif
@@ -39,7 +39,6 @@
#endif
#if !defined(_REENTRANT) && (defined(GC_SOLARIS_THREADS) \
- || defined(GC_SOLARIS_PTHREADS) \
|| defined(GC_HPUX_THREADS) \
|| defined(GC_AIX_THREADS) \
|| defined(GC_LINUX_THREADS))
@@ -52,7 +51,7 @@
# define _POSIX4A_DRAFT10_SOURCE 1
#endif
-# if defined(GC_SOLARIS_PTHREADS) || defined(GC_FREEBSD_THREADS) || \
+# if defined(GC_SOLARIS_THREADS) || defined(GC_FREEBSD_THREADS) || \
defined(GC_IRIX_THREADS) || defined(GC_LINUX_THREADS) || \
defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) || \
defined(GC_DGUX386_THREADS) || defined(GC_DARWIN_THREADS) || \
@@ -79,10 +78,12 @@
# define GC_IRIX_THREADS
# define GC_PTHREADS
# endif
-# if defined(__sparc) && !defined(__linux__)
-# define GC_SOLARIS_PTHREADS
+# if defined(__sparc) && !defined(__linux__) \
+ || defined(sun) && (defined(i386) || defined(__i386__))
+# define GC_SOLARIS_THREADS
# define GC_PTHREADS
# endif
+
# if defined(__APPLE__) && defined(__MACH__) && defined(__ppc__)
# define GC_DARWIN_THREADS
# define GC_PTHREADS
@@ -111,10 +112,6 @@
# endif
#endif
-#if defined(GC_SOLARIS_PTHREADS) && !defined(GC_SOLARIS_THREADS)
-# define GC_SOLARIS_THREADS
-#endif
-
# define __GC
# ifndef _WIN32_WCE
# include <stddef.h>
diff --git a/include/gc_inline.h b/include/gc_inline.h
index d2008cf6..5f6b6bb1 100644
--- a/include/gc_inline.h
+++ b/include/gc_inline.h
@@ -26,9 +26,12 @@
#include "gc.h"
#include "gc_tiny_fl.h"
-#ifndef __GNUC__
-# define __builtin_expect(x, y) (x)
-#endif
+#if __GNUC__ >= 3
+# define GC_EXPECT(expr, outcome) __builtin_expect(expr,outcome)
+ /* Equivalent to (expr), but predict that usually (expr)==outcome. */
+#else
+# define GC_EXPECT(expr, outcome) (expr)
+#endif /* __GNUC__ */
/* The ultimately general inline allocation macro. Allocate an object */
/* of size bytes, putting the resulting pointer in result. Tiny_fl is */
@@ -49,14 +52,14 @@
# define GC_FAST_MALLOC_GRANS(result,granules,tiny_fl,num_direct,\
kind,default_expr,init) \
{ \
- if (__builtin_expect(granules >= GC_TINY_FREELISTS,0)) { \
+ if (GC_EXPECT(granules >= GC_TINY_FREELISTS,0)) { \
result = default_expr; \
} else { \
void **my_fl = tiny_fl + granules; \
void *my_entry=*my_fl; \
void *next; \
\
- while (__builtin_expect((word)my_entry \
+ while (GC_EXPECT((word)my_entry \
<= num_direct + GC_TINY_FREELISTS + 1, 0)) { \
/* Entry contains counter or NULL */ \
if ((word)my_entry - 1 < num_direct) { \
@@ -81,7 +84,7 @@
init; \
PREFETCH_FOR_WRITE(next); \
GC_ASSERT(GC_size(result) >= bytes + EXTRA_BYTES); \
- GC_ASSERT(((word *)result)[1] == 0); \
+ GC_ASSERT((kind) == PTRFREE || ((word *)result)[1] == 0); \
out: ; \
} \
}
diff --git a/include/private/gc_locks.h b/include/private/gc_locks.h
index 4dcba2b2..5eecc501 100644
--- a/include/private/gc_locks.h
+++ b/include/private/gc_locks.h
@@ -18,22 +18,10 @@
#ifndef GC_LOCKS_H
#define GC_LOCKS_H
-#include <atomic_ops.h>
-
/*
* Mutual exclusion between allocator/collector routines.
* Needed if there is more than one allocator thread.
- * FASTLOCK() is assumed to try to acquire the lock in a cheap and
- * dirty way that is acceptable for a few instructions, e.g. by
- * inhibiting preemption. This is assumed to have succeeded only
- * if a subsequent call to FASTLOCK_SUCCEEDED() returns TRUE.
- * FASTUNLOCK() is called whether or not FASTLOCK_SUCCEEDED().
- * If signals cannot be tolerated with the FASTLOCK held, then
- * FASTLOCK should disable signals. The code executed under
- * FASTLOCK is otherwise immune to interruption, provided it is
- * not restarted.
- * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK
- * and/or FASTLOCK.
+ * DCL_LOCK_STATE declares any local variables needed by LOCK and UNLOCK.
*
* In the PARALLEL_MARK case, we also need to define a number of
* other inline finctions here:
@@ -44,21 +32,9 @@
*
*/
# ifdef THREADS
+# include <atomic_ops.h>
+
void GC_noop1(word);
-# ifdef PCR_OBSOLETE /* Faster, but broken with multiple lwp's */
-# include "th/PCR_Th.h"
-# include "th/PCR_ThCrSec.h"
- extern struct PCR_Th_MLRep GC_allocate_ml;
-# define DCL_LOCK_STATE PCR_sigset_t GC_old_sig_mask
-# define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml)
-# define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-# define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-# define FASTLOCK() PCR_ThCrSec_EnterSys()
- /* Here we cheat (a lot): */
-# define FASTLOCK_SUCCEEDED() (*(int *)(&GC_allocate_ml) == 0)
- /* TRUE if nobody currently holds the lock */
-# define FASTUNLOCK() PCR_ThCrSec_ExitSys()
-# endif
# ifdef PCR
# include <base/PCR_Base.h>
# include <th/PCR_Th.h>
@@ -67,18 +43,37 @@
PCR_ERes GC_fastLockRes; PCR_sigset_t GC_old_sig_mask
# define LOCK() PCR_Th_ML_Acquire(&GC_allocate_ml)
# define UNLOCK() PCR_Th_ML_Release(&GC_allocate_ml)
-# define FASTLOCK() (GC_fastLockRes = PCR_Th_ML_Try(&GC_allocate_ml))
-# define FASTLOCK_SUCCEEDED() (GC_fastLockRes == PCR_ERes_okay)
-# define FASTUNLOCK() {\
- if( FASTLOCK_SUCCEEDED() ) PCR_Th_ML_Release(&GC_allocate_ml); }
# endif
# if !defined(AO_have_test_and_set_acquire)
# define USE_PTHREAD_LOCKS
# endif
+# if defined(GC_WIN32_THREADS) && defined(GC_PTHREADS)
+# define USE_PTHREAD_LOCKS
+# endif
-# if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
+# if defined(GC_WIN32_THREADS) && !defined(USE_PTHREAD_LOCKS)
+# include <windows.h>
+# define NO_THREAD (DWORD)(-1)
+ extern DWORD GC_lock_holder;
+ extern CRITICAL_SECTION GC_allocate_ml;
+# ifdef GC_ASSERTIONS
+# define UNCOND_LOCK() \
+ { EnterCriticalSection(&GC_allocate_ml); \
+ SET_LOCK_HOLDER(); }
+# define UNCOND_UNLOCK() \
+ { GC_ASSERT(I_HOLD_LOCK()); UNSET_LOCK_HOLDER(); \
+ LeaveCriticalSection(&GC_allocate_ml); }
+# else
+# define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml);
+# define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml);
+# endif /* !GC_ASSERTIONS */
+# define SET_LOCK_HOLDER() GC_lock_holder = GetCurrentThreadId()
+# define UNSET_LOCK_HOLDER() GC_lock_holder = NO_THREAD
+# define I_HOLD_LOCK() (!GC_need_to_lock \
+ || GC_lock_holder == GetCurrentThreadId())
+# elif defined(GC_PTHREADS)
# define NO_THREAD (pthread_t)(-1)
# include <pthread.h>
@@ -144,29 +139,16 @@
# endif
# endif /* GC_PTHREADS with linux_threads.c implementation */
-# if defined(GC_WIN32_THREADS)
-# if defined(GC_PTHREADS)
-# include <pthread.h>
- extern pthread_mutex_t GC_allocate_ml;
-# define UNCOND_LOCK() pthread_mutex_lock(&GC_allocate_ml)
-# define UNCOND_UNLOCK() pthread_mutex_unlock(&GC_allocate_ml)
-# else
-# include <windows.h>
- GC_API CRITICAL_SECTION GC_allocate_ml;
-# define UNCOND_LOCK() EnterCriticalSection(&GC_allocate_ml);
-# define UNCOND_UNLOCK() LeaveCriticalSection(&GC_allocate_ml);
-# endif
-# endif
-# ifndef SET_LOCK_HOLDER
-# define SET_LOCK_HOLDER()
-# define UNSET_LOCK_HOLDER()
-# define I_HOLD_LOCK() FALSE
- /* Used on platforms were locks can be reacquired, */
- /* so it doesn't matter if we lie. */
-# endif
+
# else /* !THREADS */
-# define LOCK()
-# define UNLOCK()
+# define LOCK()
+# define UNLOCK()
+# define SET_LOCK_HOLDER()
+# define UNSET_LOCK_HOLDER()
+# define I_HOLD_LOCK() TRUE
+ /* Used only in positive assertions or to test whether */
+ /* we still need to acaquire the lock. TRUE works in */
+ /* either case. */
# endif /* !THREADS */
#if defined(UNCOND_LOCK) && !defined(LOCK)
@@ -176,14 +158,6 @@
# define UNLOCK() if (GC_need_to_lock) { UNCOND_UNLOCK(); }
#endif
-# ifndef SET_LOCK_HOLDER
-# define SET_LOCK_HOLDER()
-# define UNSET_LOCK_HOLDER()
-# define I_HOLD_LOCK() FALSE
- /* Used on platforms were locks can be reacquired, */
- /* so it doesn't matter if we lie. */
-# endif
-
# ifndef ENTER_GC
# define ENTER_GC()
# define EXIT_GC()
@@ -193,10 +167,4 @@
# define DCL_LOCK_STATE
# endif
-# ifndef FASTLOCK
-# define FASTLOCK() LOCK()
-# define FASTLOCK_SUCCEEDED() TRUE
-# define FASTUNLOCK() UNLOCK()
-# endif
-
#endif /* GC_LOCKS_H */
diff --git a/include/private/gc_pmark.h b/include/private/gc_pmark.h
index 8a79b9df..1e96f184 100644
--- a/include/private/gc_pmark.h
+++ b/include/private/gc_pmark.h
@@ -167,23 +167,26 @@ exit_label: ; \
/* Set mark bit, exit if it was already set. */
# ifdef USE_MARK_BITS
-/* FIXME: untested */
-# if defined(THREADS)
- /* Introduces a benign race as in the byte case. */
-# define OR_WORD_EXIT_IF_SET(addr, mask, label) \
- if (!(*(addr) & (mask))) { \
- AO_or((AO_t *)(addr), (mask); \
- } else { \
- goto label; \
- }
-# else /* !THREADS */
-# define OR_WORD_EXIT_IF_SET(addr, mask, label) \
- if (!(*(addr) & (mask))) { \
- *(addr) |= (mask); \
- } else { \
- goto label; \
- }
-# endif
+# ifdef PARALLEL_MARK
+ /* The following may fail to exit even if the bit was already set. */
+ /* For our uses, that's benign: */
+# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
+ { \
+ if (!(*(addr) & (mask))) { \
+ AO_or((AO_t *)(addr), (mask); \
+ } else { \
+ goto label; \
+ } \
+ }
+# else
+# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
+ { \
+ word old = *(addr); \
+ word my_bits = (bits); \
+ if (old & my_bits) goto exit_label; \
+ *(addr) = (old | my_bits); \
+ }
+# endif /* !PARALLEL_MARK */
# define SET_MARK_BIT_EXIT_IF_SET(hhdr,bit_no,exit_label) \
{ \
word * mark_word_addr = hhdr -> hb_marks + divWORDSZ(bit_no); \
@@ -194,18 +197,19 @@ exit_label: ; \
# endif
-#if defined(I386) && defined(__GNUC__)
+#ifdef USE_MARK_BYTES
+# if defined(I386) && defined(__GNUC__)
# define LONG_MULT(hprod, lprod, x, y) { \
asm("mull %2" : "=a"(lprod), "=d"(hprod) : "g"(y), "0"(x)); \
}
-#else /* No in-line X86 assembly code */
+# else /* No in-line X86 assembly code */
# define LONG_MULT(hprod, lprod, x, y) { \
unsigned long long prod = (unsigned long long)x \
* (unsigned long long)y; \
hprod = prod >> 32; \
lprod = (unsigned32)prod; \
}
-#endif
+# endif
/* There is a race here, and we may set */
/* the bit twice in the concurrent case. This can result in the */
@@ -218,6 +222,7 @@ exit_label: ; \
if (mark_byte) goto exit_label; \
*mark_byte_addr = 1; \
}
+#endif /* USE_MARK_BYTES */
#ifdef PARALLEL_MARK
# define INCR_MARKS(hhdr) \
diff --git a/include/private/gc_priv.h b/include/private/gc_priv.h
index d65a393d..b55a6738 100644
--- a/include/private/gc_priv.h
+++ b/include/private/gc_priv.h
@@ -63,9 +63,6 @@ typedef char * ptr_t; /* A generic pointer to which we can add */
# ifndef GCCONFIG_H
# include "gcconfig.h"
-# ifndef USE_MARK_BYTES
-# define USE_MARK_BYTES
-# endif
# endif
# ifndef HEADERS_H
@@ -74,8 +71,8 @@ typedef char * ptr_t; /* A generic pointer to which we can add */
#if __GNUC__ >= 3
# define EXPECT(expr, outcome) __builtin_expect(expr,outcome)
-# define INLINE inline
/* Equivalent to (expr), but predict that usually (expr)==outcome. */
+# define INLINE inline
#else
# define EXPECT(expr, outcome) (expr)
# define INLINE
@@ -192,17 +189,6 @@ typedef char * ptr_t; /* A generic pointer to which we can add */
/* */
/*********************************/
-#ifdef SAVE_CALL_CHAIN
-
-/* Fill in the pc and argument information for up to NFRAMES of my */
-/* callers. Ignore my frame and my callers frame. */
-struct callinfo;
-void GC_save_callers(struct callinfo info[NFRAMES]);
-
-void GC_print_callers(struct callinfo info[NFRAMES]);
-
-#endif
-
#ifdef NEED_CALLINFO
struct callinfo {
word ci_pc; /* Caller, not callee, pc */
@@ -216,6 +202,16 @@ void GC_print_callers(struct callinfo info[NFRAMES]);
};
#endif
+#ifdef SAVE_CALL_CHAIN
+
+/* Fill in the pc and argument information for up to NFRAMES of my */
+/* callers. Ignore my frame and my callers frame. */
+void GC_save_callers(struct callinfo info[NFRAMES]);
+
+void GC_print_callers(struct callinfo info[NFRAMES]);
+
+#endif
+
/*********************************/
/* */
@@ -331,10 +327,10 @@ void GC_print_callers(struct callinfo info[NFRAMES]);
# define ABORT(s) PCR_Base_Panic(s)
# else
# ifdef SMALL_CONFIG
-# define ABORT(msg) abort();
+# define ABORT(msg) abort()
# else
GC_API void GC_abort(const char * msg);
-# define ABORT(msg) GC_abort(msg);
+# define ABORT(msg) GC_abort(msg)
# endif
# endif
@@ -660,10 +656,20 @@ struct hblkhdr {
counter_t hb_n_marks; /* Number of set mark bits, excluding */
/* the one always set at the end. */
/* Currently it is concurrently */
- /* updated and hence only a lower bound.*/
- /* But a zero value does gurantee that */
+ /* updated and hence only approximate. */
+ /* But a zero value does guarantee that */
/* the block contains no marked */
/* objects. */
+ /* Ensuring this property means that we */
+ /* never decrement it to zero during a */
+ /* collection, and hence the count may */
+ /* be one too high. Due to concurrent */
+ /* updates, and arbitrary number of */
+ /* increments, but not all of them (!) */
+ /* may be lost, hence it may in theory */
+ /* be much too low. */
+ /* Without parallel marking, the count */
+ /* is accurate. */
# ifdef USE_MARK_BYTES
union {
char _hb_marks[MARK_BITS_SZ];
@@ -676,12 +682,13 @@ struct hblkhdr {
word dummy; /* Force word alignment of mark bytes. */
} _mark_byte_union;
# define hb_marks _mark_byte_union._hb_marks
-# define ANY_INDEX 23 /* Random mark bit index for assertions */
# else
word hb_marks[MARK_BITS_SZ];
# endif /* !USE_MARK_BYTES */
};
+# define ANY_INDEX 23 /* "Random" mark bit index for assertions */
+
/* heap block body */
# define HBLK_WORDS (HBLKSIZE/sizeof(word))
@@ -1156,28 +1163,9 @@ extern long GC_large_alloc_warn_suppressed;
/* accessed. */
#ifdef PARALLEL_MARK
# define OR_WORD(addr, bits) \
- { word old; \
- do { \
- old = *((volatile word *)addr); \
- } while (!GC_compare_and_exchange((addr), old, old | (bits))); \
- }
-# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
- { word old; \
- word my_bits = (bits); \
- do { \
- old = *((volatile word *)addr); \
- if (old & my_bits) goto exit_label; \
- } while (!GC_compare_and_exchange((addr), old, old | my_bits)); \
- }
+ { AO_or((volatile AO_t *)(addr), (AO_t)bits); }
#else
# define OR_WORD(addr, bits) *(addr) |= (bits)
-# define OR_WORD_EXIT_IF_SET(addr, bits, exit_label) \
- { \
- word old = *(addr); \
- word my_bits = (bits); \
- if (old & my_bits) goto exit_label; \
- *(addr) = (old | my_bits); \
- }
#endif
/* Mark bit operations */
@@ -1338,7 +1326,7 @@ void GC_with_callee_saves_pushed(void (*fn)(ptr_t, void *),
# if defined(SPARC) || defined(IA64)
/* Cause all stacked registers to be saved in memory. Return a */
/* pointer to the top of the corresponding memory stack. */
- word GC_save_regs_in_stack(void);
+ ptr_t GC_save_regs_in_stack(void);
# endif
/* Push register contents onto mark stack. */
/* If NURSERY is defined, the default push */
@@ -1504,7 +1492,8 @@ ptr_t GC_build_fl(struct hblk *h, size_t words, GC_bool clear, ptr_t list);
/* called by GC_new_hblk, but also */
/* called explicitly without GC lock. */
-struct hblk * GC_allochblk (size_t size_in_bytes, int kind, unsigned flags);
+struct hblk * GC_allochblk (size_t size_in_bytes, int kind,
+ unsigned char flags);
/* Allocate a heap block, inform */
/* the marker that block is valid */
/* for objects of indicated size. */
@@ -1766,9 +1755,6 @@ GC_bool GC_page_was_dirty(struct hblk *h);
/* Read retrieved dirty bits. */
GC_bool GC_page_was_ever_dirty(struct hblk *h);
/* Could the page contain valid heap pointers? */
-void GC_is_fresh(struct hblk *h, word n);
- /* Assert the region currently contains no */
- /* valid pointers. */
void GC_remove_protection(struct hblk *h, word nblocks,
GC_bool pointerfree);
/* h is about to be writteni or allocated. Ensure */
@@ -1896,7 +1882,7 @@ void GC_err_puts(const char *s);
/* some other reason. */
# endif /* PARALLEL_MARK */
-# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS)
+# if defined(GC_PTHREADS)
/* We define the thread suspension signal here, so that we can refer */
/* to it in the dirty bit implementation, if necessary. Ideally we */
/* would allocate a (real-time ?) signal using the standard mechanism.*/
diff --git a/include/private/gcconfig.h b/include/private/gcconfig.h
index 9fe04198..9b80cbe3 100644
--- a/include/private/gcconfig.h
+++ b/include/private/gcconfig.h
@@ -854,6 +854,7 @@
# define ALIGNMENT 4 /* Required by hardware */
# define CPP_WORDSZ 32
# endif
+# define USE_ASM_PUSH_REGS
# ifdef SUNOS5
# define OS_TYPE "SUNOS5"
extern int _etext[];
@@ -1146,7 +1147,11 @@
# if !defined(__WATCOMC__) && !defined(GC_WIN32_THREADS)
# define MPROTECT_VDB
# endif
-# define GWW_VDB
+# if _MSC_VER >= 1300 /* .NET, i.e. > VisualStudio 6 */
+# define GWW_VDB
+# else
+# define MPROTECT_VDB
+# endif
# define DATAEND /* not needed */
# endif
# ifdef MSWINCE
@@ -1490,13 +1495,6 @@
# ifdef IA64
# define MACH_TYPE "IA64"
- /* We need to get preserved registers in addition to register */
- /* windows. That's easiest to do with setjmp. */
-# ifdef PARALLEL_MARK
-# define USE_MARK_BYTES
- /* Compare-and-exchange is too expensive to use for */
- /* setting mark bits. */
-# endif
# ifdef HPUX
# ifdef _ILP32
# define CPP_WORDSZ 32
@@ -1992,6 +1990,14 @@
# define THREADS
# endif
+# if !defined(USE_MARK_BITS) && !defined(USE_MARK_BYTES)
+# if defined(THREADS) && defined(PARALLEL_MARK)
+# define USE_MARK_BYTES
+# else
+# define USE_MARK_BITS
+# endif
+# endif
+
# if defined(MSWINCE)
# define NO_GETENV
# endif
diff --git a/include/private/pthread_support.h b/include/private/pthread_support.h
index b2ef68ea..77f1ad1a 100644
--- a/include/private/pthread_support.h
+++ b/include/private/pthread_support.h
@@ -3,8 +3,7 @@
# include "private/gc_priv.h"
-# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
- && !defined(GC_WIN32_THREADS)
+# if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS)
#if defined(GC_DARWIN_THREADS)
# include "private/darwin_stop_world.h"
@@ -67,7 +66,7 @@ typedef struct GC_Thread_Rep {
# endif
} * GC_thread;
-# define THREAD_TABLE_SZ 128 /* Must be power of 2 */
+# define THREAD_TABLE_SZ 256 /* Must be power of 2 */
extern volatile GC_thread GC_threads[THREAD_TABLE_SZ];
extern GC_bool GC_thr_initialized;
diff --git a/include/private/thread_local_alloc.h b/include/private/thread_local_alloc.h
index 32cbb080..3416931d 100644
--- a/include/private/thread_local_alloc.h
+++ b/include/private/thread_local_alloc.h
@@ -19,6 +19,45 @@
/* implementation also exports GC_malloc and friends, which */
/* are declared in gc.h. */
+#include "private/gc_priv.h"
+
+#if defined(THREAD_LOCAL_ALLOC)
+
+#include "gc_inline.h"
+
+
+# if defined USE_HPUX_TLS
+# error USE_HPUX_TLS macro was replaced by USE_COMPILER_TLS
+# endif
+
+# if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC) && \
+ !defined(USE_WIN32_COMPILER_TLS) && !defined(USE_COMPILER_TLS) && \
+ !defined(USE_CUSTOM_SPECIFIC)
+# if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
+# if defined(__GNUC__) /* Fixed for versions past 2.95? */
+# define USE_WIN32_SPECIFIC
+# else
+# define USE_WIN32_COMPILER_TLS
+# endif /* !GNU */
+# elif defined(LINUX) && defined(__GNUC__)
+# define USE_COMPILER_TLS
+# elif (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \
+ defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS))
+# define USE_PTHREAD_SPECIFIC
+# elif defined(GC_HPUX_THREADS)
+# ifdef __GNUC__
+# define USE_PTHREAD_SPECIFIC
+ /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work. */
+# else
+# define USE_COMPILER_TLS
+# endif
+# else
+# define USE_CUSTOM_SPECIFIC /* Use our own. */
+# endif
+# endif
+
+# include <stdlib.h>
+
/* One of these should be declared as the tlfs field in the */
/* structure pointed to by a GC_thread. */
typedef struct thread_local_freelists {
@@ -52,22 +91,27 @@ typedef struct thread_local_freelists {
# define GC_key_create pthread_key_create
# define GC_remove_specific() /* No need for cleanup on exit. */
typedef pthread_key_t GC_key_t;
-# elif defined(USE_COMPILER_TLS)
+# elif defined(USE_COMPILER_TLS) || defined(USE_WIN32_COMPILER_TLS)
# define GC_getspecific(x) (x)
# define GC_setspecific(key, v) ((key) = (v), 0)
# define GC_key_create(key, d) 0
# define GC_remove_specific() /* No need for cleanup on exit. */
typedef void * GC_key_t;
# elif defined(USE_WIN32_SPECIFIC)
+# include <windows.h>
# define GC_getspecific TlsGetValue
-# define GC_setspecific TlsSetValue
+# define GC_setspecific(key, v) !TlsSetValue(key, v)
+ /* We assume 0 == success, msft does the opposite. */
# define GC_key_create(key, d) \
((d) != 0? (ABORT("Destructor unsupported by TlsAlloc"),0) \
: (*(key) = TlsAlloc(), 0))
# define GC_remove_specific() /* No need for cleanup on thread exit. */
/* Need TlsFree on process exit/detach ? */
-# else
+ typedef DWORD GC_key_t;
+# elif defined(USE_CUSTOM_SPECIFIC)
# include "private/specific.h"
+# else
+# error implement me
# endif
@@ -86,14 +130,18 @@ void GC_destroy_thread_local(GC_tlfs p);
/* we take care of an individual thread freelist structure. */
void GC_mark_thread_local_fls_for(GC_tlfs p);
-#ifdef USE_COMPILER_TLS
+extern
+#if defined(USE_COMPILER_TLS)
__thread
+#elif defined(USE_WIN32_COMPILER_TLS)
+ declspec(thread)
#endif
GC_key_t GC_thread_key;
+
/* This is set up by the thread_local_alloc implementation. But the */
/* thread support layer calls GC_remove_specific(GC_thread_key) */
/* before a thread exits. */
/* And the thread support layer makes sure that GC_thread_key is traced,*/
/* if necessary. */
-
+#endif /* THREAD_LOCAL_ALLOC */
diff --git a/mach_dep.c b/mach_dep.c
index 50b56652..ca1ace1b 100644
--- a/mach_dep.c
+++ b/mach_dep.c
@@ -65,7 +65,7 @@ asm static void PushMacRegisters()
# if defined(SPARC) || defined(IA64)
/* Value returned from register flushing routine; either sp (SPARC) */
/* or ar.bsp (IA64) */
- word GC_save_regs_ret_val;
+ ptr_t GC_save_regs_ret_val;
# endif
/* Routine to mark from registers that are preserved by the C compiler. */
@@ -265,88 +265,12 @@ ptr_t cold_gc_frame;
GC_with_callee_saves_pushed(GC_push_current_stack, cold_gc_frame);
}
-/* On register window machines, we need a way to force registers into */
-/* the stack. Return sp. */
-# ifdef SPARC
- asm(" .seg \"text\"");
-# if defined(SVR4) || defined(NETBSD) || defined(FREEBSD)
- asm(" .globl GC_save_regs_in_stack");
- asm("GC_save_regs_in_stack:");
- asm(" .type GC_save_regs_in_stack,#function");
-# else
- asm(" .globl _GC_save_regs_in_stack");
- asm("_GC_save_regs_in_stack:");
-# endif
-# if defined(__arch64__) || defined(__sparcv9)
- asm(" save %sp,-128,%sp");
- asm(" flushw");
- asm(" ret");
- asm(" restore %sp,2047+128,%o0");
-# else
- asm(" ta 0x3 ! ST_FLUSH_WINDOWS");
- asm(" retl");
- asm(" mov %sp,%o0");
-# endif
-# ifdef SVR4
- asm(" .GC_save_regs_in_stack_end:");
- asm(" .size GC_save_regs_in_stack,.GC_save_regs_in_stack_end-GC_save_regs_in_stack");
-# endif
-# ifdef LINT
- word GC_save_regs_in_stack() { return(0 /* sp really */);}
-# endif
-# endif
-
-/* GC_clear_stack_inner(arg, limit) clears stack area up to limit and */
-/* returns arg. Stack clearing is crucial on SPARC, so we supply */
-/* an assembly version that's more careful. Assumes limit is hotter */
-/* than sp, and limit is 8 byte aligned. */
#if defined(ASM_CLEAR_CODE)
-#ifndef SPARC
- --> fix it
-#endif
- asm(".globl GC_clear_stack_inner");
- asm("GC_clear_stack_inner:");
- asm(".type GC_save_regs_in_stack,#function");
-#if defined(__arch64__) || defined(__sparcv9)
- asm("mov %sp,%o2"); /* Save sp */
- asm("add %sp,2047-8,%o3"); /* p = sp+bias-8 */
- asm("add %o1,-2047-192,%sp"); /* Move sp out of the way, */
- /* so that traps still work. */
- /* Includes some extra words */
- /* so we can be sloppy below. */
- asm("loop:");
- asm("stx %g0,[%o3]"); /* *(long *)p = 0 */
- asm("cmp %o3,%o1");
- asm("bgu,pt %xcc, loop"); /* if (p > limit) goto loop */
- asm("add %o3,-8,%o3"); /* p -= 8 (delay slot) */
- asm("retl");
- asm("mov %o2,%sp"); /* Restore sp., delay slot */
-#else
- asm("mov %sp,%o2"); /* Save sp */
- asm("add %sp,-8,%o3"); /* p = sp-8 */
- asm("clr %g1"); /* [g0,g1] = 0 */
- asm("add %o1,-0x60,%sp"); /* Move sp out of the way, */
- /* so that traps still work. */
- /* Includes some extra words */
- /* so we can be sloppy below. */
- asm("loop:");
- asm("std %g0,[%o3]"); /* *(long long *)p = 0 */
- asm("cmp %o3,%o1");
- asm("bgu loop "); /* if (p > limit) goto loop */
- asm("add %o3,-8,%o3"); /* p -= 8 (delay slot) */
- asm("retl");
- asm("mov %o2,%sp"); /* Restore sp., delay slot */
-#endif /* old SPARC */
- /* First argument = %o0 = return value */
-# ifdef SVR4
- asm(" .GC_clear_stack_inner_end:");
- asm(" .size GC_clear_stack_inner,.GC_clear_stack_inner_end-GC_clear_stack_inner");
-# endif
-
# ifdef LINT
/*ARGSUSED*/
ptr_t GC_clear_stack_inner(arg, limit)
ptr_t arg; word limit;
{ return(arg); }
+ /* The real version is in a .S file */
# endif
#endif /* ASM_CLEAR_CODE */
diff --git a/malloc.c b/malloc.c
index a36956ab..1513735f 100644
--- a/malloc.c
+++ b/malloc.c
@@ -215,14 +215,14 @@ void * GC_generic_malloc(size_t lb, int k)
if(SMALL_OBJ(lb)) {
lg = GC_size_map[lb];
opp = &(GC_aobjfreelist[lg]);
- FASTLOCK();
- if( EXPECT(!FASTLOCK_SUCCEEDED() || (op = *opp) == 0, 0) ) {
- FASTUNLOCK();
+ LOCK();
+ if( EXPECT((op = *opp) == 0, 0) ) {
+ UNLOCK();
return(GENERAL_MALLOC((word)lb, PTRFREE));
}
*opp = obj_link(op);
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
return((void *) op);
} else {
return(GENERAL_MALLOC((word)lb, PTRFREE));
@@ -244,9 +244,9 @@ void * GC_generic_malloc(size_t lb, int k)
if(SMALL_OBJ(lb)) {
lg = GC_size_map[lb];
opp = (void **)&(GC_objfreelist[lg]);
- FASTLOCK();
- if( EXPECT(!FASTLOCK_SUCCEEDED() || (op = *opp) == 0, 0) ) {
- FASTUNLOCK();
+ LOCK();
+ if( EXPECT((op = *opp) == 0, 0) ) {
+ UNLOCK();
return(GENERAL_MALLOC((word)lb, NORMAL));
}
/* See above comment on signals. */
@@ -258,7 +258,7 @@ void * GC_generic_malloc(size_t lb, int k)
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
return op;
} else {
return(GENERAL_MALLOC(lb, NORMAL));
diff --git a/mallocx.c b/mallocx.c
index 761514da..91e41d5e 100644
--- a/mallocx.c
+++ b/mallocx.c
@@ -451,8 +451,8 @@ void * GC_malloc_uncollectable(size_t lb)
/* collected anyway. */
lg = GC_size_map[lb];
opp = &(GC_uobjfreelist[lg]);
- FASTLOCK();
- if( FASTLOCK_SUCCEEDED() && (op = *opp) != 0 ) {
+ LOCK();
+ if( (op = *opp) != 0 ) {
/* See above comment on signals. */
*opp = obj_link(op);
obj_link(op) = 0;
@@ -461,28 +461,31 @@ void * GC_malloc_uncollectable(size_t lb)
/* cleared only temporarily during a collection, as a */
/* result of the normal free list mark bit clearing. */
GC_non_gc_bytes += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
- return((void *) op);
- }
- FASTUNLOCK();
- op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+ UNLOCK();
+ } else {
+ UNLOCK();
+ op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+ /* For small objects, the free lists are completely marked. */
+ }
+ GC_ASSERT(0 == op || GC_is_marked(op));
+ return((void *) op);
} else {
- op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
- }
- if (0 == op) return(0);
- /* We don't need the lock here, since we have an undisguised */
- /* pointer. We do need to hold the lock while we adjust */
- /* mark bits. */
- {
- register struct hblk * h;
size_t lb;
+ hdr * hhdr;
- h = HBLKPTR(op);
- lb = HDR(h) -> hb_sz;
+ op = (ptr_t)GC_generic_malloc((word)lb, UNCOLLECTABLE);
+ if (0 == op) return(0);
+ GC_ASSERT(((word)op & (HBLKSIZE - 1)) == 0); /* large block */
+ hhdr = HDR((struct hbklk *)op);
+ /* We don't need the lock here, since we have an undisguised */
+ /* pointer. We do need to hold the lock while we adjust */
+ /* mark bits. */
+ lb = hhdr -> hb_sz;
LOCK();
- GC_set_mark_bit(op);
- GC_non_gc_bytes += lb;
+ set_mark_bit_from_hdr(hhdr, 0); /* Only object. */
+ GC_ASSERT(hhdr -> hb_n_marks == 0);
+ hhdr -> hb_n_marks = 1;
UNLOCK();
return((void *) op);
}
@@ -538,36 +541,36 @@ void * GC_malloc_atomic_uncollectable(size_t lb)
/* collected anyway. */
lg = GC_size_map[lg];
opp = &(GC_auobjfreelist[lg]);
- FASTLOCK();
- if( FASTLOCK_SUCCEEDED() && (op = *opp) != 0 ) {
+ LOCK();
+ if( (op = *opp) != 0 ) {
/* See above comment on signals. */
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
/* Mark bit was already set while object was on free list. */
GC_non_gc_bytes += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
- return((void *) op);
- }
- FASTUNLOCK();
- op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+ UNLOCK();
+ } else {
+ UNLOCK();
+ op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+ }
+ GC_ASSERT(0 == op || GC_is_marked(op));
+ return((void *) op);
} else {
- op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
- }
- if (0 == op) return(0);
- /* We don't need the lock here, since we have an undisguised */
- /* pointer. We do need to hold the lock while we adjust */
- /* mark bits. */
- {
- struct hblk * h;
size_t lb;
+ hdr * hhdr;
- h = HBLKPTR(op);
- lb = HDR(h) -> hb_sz;
+ op = (ptr_t)GC_generic_malloc(lb, AUNCOLLECTABLE);
+ if (0 == op) return(0);
+
+ GC_ASSERT(((word)op & (HBLKSIZE - 1)) == 0);
+ hhdr = HDR((struct hbklk *)op);
+ lb = hhdr -> hb_sz;
LOCK();
- GC_set_mark_bit(op);
- GC_non_gc_bytes += lb;
+ set_mark_bit_from_hdr(hhdr, 0); /* Only object. */
+ GC_ASSERT(hhdr -> hb_n_marks == 0);
+ hhdr -> hb_n_marks = 1;
UNLOCK();
return((void *) op);
}
diff --git a/mark.c b/mark.c
index 641f0d6f..d46c1ac7 100644
--- a/mark.c
+++ b/mark.c
@@ -156,7 +156,7 @@ void GC_clear_hdr_marks(hdr *hhdr)
/* Set all mark bits in the header. Used for uncollectable blocks. */
void GC_set_hdr_marks(hdr *hhdr)
{
- int i;
+ unsigned i;
size_t sz = hhdr -> hb_sz;
int n_marks = FINAL_MARK_BIT(sz);
@@ -214,7 +214,7 @@ void GC_clear_mark_bit(ptr_t p)
int n_marks;
clear_mark_bit_from_hdr(hhdr, bit_no);
n_marks = hhdr -> hb_n_marks - 1;
-# ifdef THREADS
+# ifdef PARALLEL_MARK
if (n_marks != 0)
hhdr -> hb_n_marks = n_marks;
/* Don't decrement to zero. The counts are approximate due to */
@@ -1473,7 +1473,7 @@ void GC_push_all_eager(ptr_t bottom, ptr_t top)
word * b = (word *)(((word) bottom + ALIGNMENT-1) & ~(ALIGNMENT-1));
word * t = (word *)(((word) top) & ~(ALIGNMENT-1));
register word *p;
- register word q;
+ register ptr_t q;
register word *lim;
register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
register ptr_t least_ha = GC_least_plausible_heap_addr;
@@ -1485,7 +1485,7 @@ void GC_push_all_eager(ptr_t bottom, ptr_t top)
/* to be valid. */
lim = t - 1 /* longword */;
for (p = b; p <= lim; p = (word *)(((ptr_t)p) + ALIGNMENT)) {
- q = *p;
+ q = (ptr_t)(*p);
GC_PUSH_ONE_STACK((ptr_t)q, p);
}
# undef GC_greatest_plausible_heap_addr
@@ -1508,7 +1508,6 @@ void GC_push_all_stack_partially_eager(ptr_t bottom, ptr_t top,
ptr_t cold_gc_frame)
{
if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) {
-# define EAGER_BYTES 1024
/* Push the hot end of the stack eagerly, so that register values */
/* saved inside GC frames are marked before they disappear. */
/* The rest of the marking can be deferred until later. */
@@ -1546,21 +1545,52 @@ void GC_push_all_stack(ptr_t bottom, ptr_t top)
# endif
}
-#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES)
+#if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES) && \
+ defined(MARK_BIT_PER_GRANULE)
+# if GC_GRANULE_WORDS == 1
+# define USE_PUSH_MARKED_ACCELERATORS
+# define PUSH_GRANULE(q) \
+ { ptr_t qcontents = (ptr_t)((q)[0]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)); }
+# elif GC_GRANULE_WORDS == 2
+# define USE_PUSH_MARKED_ACCELERATORS
+# define PUSH_GRANULE(q) \
+ { ptr_t qcontents = (ptr_t)((q)[0]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)); \
+ qcontents = (ptr_t)((q)[1]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)+1); }
+# elif GC_GRANULE_WORDS == 4
+# define USE_PUSH_MARKED_ACCELERATORS
+# define PUSH_GRANULE(q) \
+ { ptr_t qcontents = (ptr_t)((q)[0]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)); \
+ qcontents = (ptr_t)((q)[1]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)+1); \
+ qcontents = (ptr_t)((q)[2]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)+2); \
+ qcontents = (ptr_t)((q)[3]); \
+ GC_PUSH_ONE_HEAP(qcontents, (q)+3); }
+# endif
+#endif
+
+#ifdef USE_PUSH_MARKED_ACCELERATORS
/* Push all objects reachable from marked objects in the given block */
-/* of size 1 objects. */
+/* containing objects of size 1 granule. */
void GC_push_marked1(struct hblk *h, hdr *hhdr)
{
word * mark_word_addr = &(hhdr->hb_marks[0]);
- register word *p;
+ word *p;
word *plim;
- register int i;
- register word q;
- register word mark_word;
- register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
- register ptr_t least_ha = GC_least_plausible_heap_addr;
- register mse * mark_stack_top = GC_mark_stack_top;
- register mse * mark_stack_limit = GC_mark_stack_limit;
+ word *q;
+ word mark_word;
+
+ /* Allow registers to be used for some frequently acccessed */
+ /* global variables. Otherwise aliasing issues are likely */
+ /* to prevent that. */
+ ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+ ptr_t least_ha = GC_least_plausible_heap_addr;
+ mse * mark_stack_top = GC_mark_stack_top;
+ mse * mark_stack_limit = GC_mark_stack_limit;
# define GC_mark_stack_top mark_stack_top
# define GC_mark_stack_limit mark_stack_limit
# define GC_greatest_plausible_heap_addr greatest_ha
@@ -1572,21 +1602,22 @@ void GC_push_marked1(struct hblk *h, hdr *hhdr)
/* go through all words in block */
while( p < plim ) {
mark_word = *mark_word_addr++;
- i = 0;
+ q = p;
while(mark_word != 0) {
if (mark_word & 1) {
- q = p[i];
- GC_PUSH_ONE_HEAP(q, p + i);
+ PUSH_GRANULE(q);
}
- i++;
+ q += GC_GRANULE_WORDS;
mark_word >>= 1;
}
- p += WORDSZ;
+ p += WORDSZ*GC_GRANULE_WORDS;
}
+
# undef GC_greatest_plausible_heap_addr
# undef GC_least_plausible_heap_addr
# undef GC_mark_stack_top
# undef GC_mark_stack_limit
+
GC_mark_stack_top = mark_stack_top;
}
@@ -1594,19 +1625,20 @@ void GC_push_marked1(struct hblk *h, hdr *hhdr)
#ifndef UNALIGNED
/* Push all objects reachable from marked objects in the given block */
-/* of size 2 objects. */
+/* of size 2 (granules) objects. */
void GC_push_marked2(struct hblk *h, hdr *hhdr)
{
word * mark_word_addr = &(hhdr->hb_marks[0]);
- register word *p;
+ word *p;
word *plim;
- register int i;
- register word q;
- register word mark_word;
- register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
- register ptr_t least_ha = GC_least_plausible_heap_addr;
- register mse * mark_stack_top = GC_mark_stack_top;
- register mse * mark_stack_limit = GC_mark_stack_limit;
+ word *q;
+ word mark_word;
+
+ ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+ ptr_t least_ha = GC_least_plausible_heap_addr;
+ mse * mark_stack_top = GC_mark_stack_top;
+ mse * mark_stack_limit = GC_mark_stack_limit;
+
# define GC_mark_stack_top mark_stack_top
# define GC_mark_stack_limit mark_stack_limit
# define GC_greatest_plausible_heap_addr greatest_ha
@@ -1618,42 +1650,43 @@ void GC_push_marked2(struct hblk *h, hdr *hhdr)
/* go through all words in block */
while( p < plim ) {
mark_word = *mark_word_addr++;
- i = 0;
+ q = p;
while(mark_word != 0) {
if (mark_word & 1) {
- q = p[i];
- GC_PUSH_ONE_HEAP(q, p + i);
- q = p[i+1];
- GC_PUSH_ONE_HEAP(q, p + i);
+ PUSH_GRANULE(q);
+ PUSH_GRANULE(q + GC_GRANULE_WORDS);
}
- i += 2;
+ q += 2 * GC_GRANULE_WORDS;
mark_word >>= 2;
}
- p += WORDSZ;
+ p += WORDSZ*GC_GRANULE_WORDS;
}
+
# undef GC_greatest_plausible_heap_addr
# undef GC_least_plausible_heap_addr
# undef GC_mark_stack_top
# undef GC_mark_stack_limit
+
GC_mark_stack_top = mark_stack_top;
}
+# if GC_GRANULE_WORDS < 4
/* Push all objects reachable from marked objects in the given block */
-/* of size 4 objects. */
+/* of size 4 (granules) objects. */
/* There is a risk of mark stack overflow here. But we handle that. */
/* And only unmarked objects get pushed, so it's not very likely. */
void GC_push_marked4(struct hblk *h, hdr *hhdr)
{
word * mark_word_addr = &(hhdr->hb_marks[0]);
- register word *p;
+ word *p;
word *plim;
- register int i;
- register word q;
- register word mark_word;
- register ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
- register ptr_t least_ha = GC_least_plausible_heap_addr;
- register mse * mark_stack_top = GC_mark_stack_top;
- register mse * mark_stack_limit = GC_mark_stack_limit;
+ word *q;
+ word mark_word;
+
+ ptr_t greatest_ha = GC_greatest_plausible_heap_addr;
+ ptr_t least_ha = GC_least_plausible_heap_addr;
+ mse * mark_stack_top = GC_mark_stack_top;
+ mse * mark_stack_limit = GC_mark_stack_limit;
# define GC_mark_stack_top mark_stack_top
# define GC_mark_stack_limit mark_stack_limit
# define GC_greatest_plausible_heap_addr greatest_ha
@@ -1665,22 +1698,18 @@ void GC_push_marked4(struct hblk *h, hdr *hhdr)
/* go through all words in block */
while( p < plim ) {
mark_word = *mark_word_addr++;
- i = 0;
+ q = p;
while(mark_word != 0) {
if (mark_word & 1) {
- q = p[i];
- GC_PUSH_ONE_HEAP(q, p + i);
- q = p[i+1];
- GC_PUSH_ONE_HEAP(q, p + i + 1);
- q = p[i+2];
- GC_PUSH_ONE_HEAP(q, p + i + 2);
- q = p[i+3];
- GC_PUSH_ONE_HEAP(q, p + i + 3);
+ PUSH_GRANULE(q);
+ PUSH_GRANULE(q + GC_GRANULE_WORDS);
+ PUSH_GRANULE(q + 2*GC_GRANULE_WORDS);
+ PUSH_GRANULE(q + 3*GC_GRANULE_WORDS);
}
- i += 4;
+ q += 4 * GC_GRANULE_WORDS;
mark_word >>= 4;
}
- p += WORDSZ;
+ p += WORDSZ*GC_GRANULE_WORDS;
}
# undef GC_greatest_plausible_heap_addr
# undef GC_least_plausible_heap_addr
@@ -1689,9 +1718,11 @@ void GC_push_marked4(struct hblk *h, hdr *hhdr)
GC_mark_stack_top = mark_stack_top;
}
+#endif /* GC_GRANULE_WORDS < 4 */
+
#endif /* UNALIGNED */
-#endif /* SMALL_CONFIG */
+#endif /* USE_PUSH_MARKED_ACCELERATORS */
/* Push all objects reachable from marked objects in the given block */
void GC_push_marked(struct hblk *h, hdr *hhdr)
@@ -1715,20 +1746,21 @@ void GC_push_marked(struct hblk *h, hdr *hhdr)
lim = (h + 1)->hb_body - sz;
}
- switch(BYTES_TO_WORDS(sz)) {
-# if !defined(SMALL_CONFIG) && !defined(USE_MARK_BYTES)
+ switch(BYTES_TO_GRANULES(sz)) {
+# if defined(USE_PUSH_MARKED_ACCELERATORS)
case 1:
GC_push_marked1(h, hhdr);
break;
-# endif
-# if !defined(SMALL_CONFIG) && !defined(UNALIGNED) && \
- !defined(USE_MARK_BYTES)
- case 2:
- GC_push_marked2(h, hhdr);
- break;
- case 4:
- GC_push_marked4(h, hhdr);
- break;
+# if !defined(UNALIGNED)
+ case 2:
+ GC_push_marked2(h, hhdr);
+ break;
+# if GC_GRANULE_WORDS < 4
+ case 4:
+ GC_push_marked4(h, hhdr);
+ break;
+# endif
+# endif
# endif
default:
GC_mark_stack_top_reg = GC_mark_stack_top;
diff --git a/mark_rts.c b/mark_rts.c
index 19ea80a9..bd97c6ec 100644
--- a/mark_rts.c
+++ b/mark_rts.c
@@ -593,7 +593,7 @@ void GC_push_roots(GC_bool all, ptr_t cold_gc_frame)
/* If the world is not stopped, this is unsafe. It is */
/* also unnecessary, since we will do this again with the */
/* world stopped. */
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
if (GC_world_stopped) GC_mark_thread_local_free_lists();
# endif
diff --git a/misc.c b/misc.c
index 70e37fbf..10bf5129 100644
--- a/misc.c
+++ b/misc.c
@@ -43,31 +43,12 @@
int GC_log; /* Forward decl, so we can set it. */
#endif
-# ifdef THREADS
-# ifdef PCR
-# include "il/PCR_IL.h"
- PCR_Th_ML GC_allocate_ml;
-# elif defined(GC_WIN32_THREADS)
-# if defined(GC_PTHREADS)
- pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
-# elif defined(GC_DLL)
- __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml;
-# else
- CRITICAL_SECTION GC_allocate_ml;
-# endif
-# elif defined(GC_PTHREADS)
-# if defined(USE_SPIN_LOCK)
- pthread_t GC_lock_holder = NO_THREAD;
-# else
- pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
- pthread_t GC_lock_holder = NO_THREAD;
- /* Used only for assertions, and to prevent */
- /* recursive reentry in the system call wrapper. */
-# endif
-# else
- --> declare allocator lock here
-# endif
-# endif
+#if defined(THREADS) && defined(PCR)
+# include "il/PCR_IL.h"
+ PCR_Th_ML GC_allocate_ml;
+#endif
+/* For other platforms with threads, the lock and possibly */
+/* GC_lock_holder variables are defined in the thread support code. */
#if defined(NOSYS) || defined(ECOS)
#undef STACKBASE
@@ -157,7 +138,7 @@ void * GC_project2(void *arg1, void *arg2)
/* quantization alogrithm (but we precompute it). */
void GC_init_size_map(void)
{
- register unsigned i;
+ int i;
/* Map size 0 to something bigger. */
/* This avoids problems at lower levels. */
@@ -423,7 +404,7 @@ void GC_init(void)
#if defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS)
if (!GC_is_initialized) {
BOOL (WINAPI *pfn) (LPCRITICAL_SECTION, DWORD) = NULL;
- HMODULE hK32 = GetModuleHandle("kernel32.dll");
+ HMODULE hK32 = GetModuleHandleA("kernel32.dll");
if (hK32)
(FARPROC) pfn = GetProcAddress(hK32,
"InitializeCriticalSectionAndSpinCount");
diff --git a/os_dep.c b/os_dep.c
index d78f8e95..e43062f2 100644
--- a/os_dep.c
+++ b/os_dep.c
@@ -80,10 +80,12 @@
# undef GC_AMIGA_DEF
#endif
-#if defined(MSWIN32) || defined(MSWINCE)
+#if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
# define WIN32_LEAN_AND_MEAN
# define NOSERVICE
# include <windows.h>
+ /* It's not clear this is completely kosher under Cygwin. But it */
+ /* allows us to get a working GC_get_stack_base. */
#endif
#ifdef MACOS
@@ -468,7 +470,7 @@ void GC_enable_signals(void)
&& !defined(MACOS) && !defined(DJGPP) && !defined(DOS4GW) \
&& !defined(NOSYS) && !defined(ECOS)
-# if defined(sigmask) && !defined(UTS4) && !defined(HURD)
+# if 0
/* Use the traditional BSD interface */
# define SIGSET_T int
# define SIG_DEL(set, signal) (set) &= ~(sigmask(signal))
@@ -477,14 +479,15 @@ void GC_enable_signals(void)
/* longjmp implementations. Most systems appear not to have */
/* a signal 32. */
# define SIGSETMASK(old, new) (old) = sigsetmask(new)
-# else
- /* Use POSIX/SYSV interface */
-# define SIGSET_T sigset_t
-# define SIG_DEL(set, signal) sigdelset(&(set), (signal))
-# define SIG_FILL(set) sigfillset(&set)
-# define SIGSETMASK(old, new) sigprocmask(SIG_SETMASK, &(new), &(old))
# endif
+ /* Use POSIX/SYSV interface */
+# define SIGSET_T sigset_t
+# define SIG_DEL(set, signal) sigdelset(&(set), (signal))
+# define SIG_FILL(set) sigfillset(&set)
+# define SIGSETMASK(old, new) sigprocmask(SIG_SETMASK, &(new), &(old))
+
+
static GC_bool mask_initialized = FALSE;
static SIGSET_T new_mask;
@@ -578,7 +581,7 @@ word GC_page_size;
* With threads, GC_mark_roots needs to know how to do this.
* Called with allocator lock held.
*/
-# if defined(MSWIN32) || defined(MSWINCE)
+# if defined(MSWIN32) || defined(MSWINCE) || defined(CYGWIN32)
# define is_writable(prot) ((prot) == PAGE_READWRITE \
|| (prot) == PAGE_WRITECOPY \
|| (prot) == PAGE_EXECUTE_READWRITE \
@@ -970,7 +973,8 @@ ptr_t GC_get_main_stack_base(void)
#endif /* FREEBSD_STACKBOTTOM */
#if !defined(BEOS) && !defined(AMIGA) && !defined(MSWIN32) \
- && !defined(MSWINCE) && !defined(OS2) && !defined(NOSYS) && !defined(ECOS)
+ && !defined(MSWINCE) && !defined(OS2) && !defined(NOSYS) && !defined(ECOS) \
+ && !defined(CYGWIN32)
ptr_t GC_get_main_stack_base(void)
{
@@ -2021,8 +2025,7 @@ void GC_default_push_other_roots(void)
# endif /* PCR */
-# if defined(GC_SOLARIS_THREADS) || defined(GC_PTHREADS) || \
- defined(GC_WIN32_THREADS)
+# if defined(GC_PTHREADS) || defined(GC_WIN32_THREADS)
extern void GC_push_all_stacks(void);
@@ -2031,7 +2034,7 @@ void GC_default_push_other_roots(void)
GC_push_all_stacks();
}
-# endif /* GC_SOLARIS_THREADS || GC_PTHREADS */
+# endif /* GC_WIN32_THREADS || GC_PTHREADS */
void (*GC_push_other_roots)(void) = GC_default_push_other_roots;
@@ -2182,8 +2185,6 @@ void GC_or_pages(page_hash_table pht1, page_hash_table pht2)
}
# ifndef MPROTECT_VDB
- void GC_is_fresh(struct hblk *h, word n)
- {}
void GC_remove_protection(struct hblk *h, word nblocks, GC_bool is_ptrfree)
{}
# endif
@@ -2235,11 +2236,6 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h)
return(TRUE);
}
-/* Reset the n pages starting at h to "was never dirty" status. */
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
/* A call that: */
/* I) hints that [h, h+nblocks) is about to be written. */
/* II) guarantees that protection is removed. */
@@ -2302,11 +2298,6 @@ void GC_dirty(ptr_t p)
async_set_pht_entry_from_index(GC_dirty_pages, index);
}
-/* Reset the n pages starting at h to "was never dirty" status. */
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
/*ARGSUSED*/
void GC_remove_protection(struct hblk *h, word nblocks, GC_bool is_ptrfree)
{
@@ -2980,12 +2971,6 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h)
return(TRUE);
}
-/* Reset the n pages starting at h to "was never dirty" status. */
-/*ARGSUSED*/
-void GC_is_fresh(struct hblk *h, word n)
-{
-}
-
# endif /* MPROTECT_VDB */
# ifdef PROC_VDB
@@ -3013,23 +2998,6 @@ void GC_is_fresh(struct hblk *h, word n)
word GC_proc_buf_size = INITIAL_BUF_SZ;
char *GC_proc_buf;
-#ifdef GC_SOLARIS_THREADS
-/* We don't have exact sp values for threads. So we count on */
-/* occasionally declaring stack pages to be fresh. Thus we */
-/* need a real implementation of GC_is_fresh. We can't clear */
-/* entries in GC_written_pages, since that would declare all */
-/* pages with the given hash address to be fresh. */
-# define MAX_FRESH_PAGES 8*1024 /* Must be power of 2 */
- struct hblk ** GC_fresh_pages; /* A direct mapped cache. */
- /* Collisions are dropped. */
-
-# define FRESH_PAGE_SLOT(h) (divHBLKSZ((word)(h)) & (MAX_FRESH_PAGES-1))
-# define ADD_FRESH_PAGE(h) \
- GC_fresh_pages[FRESH_PAGE_SLOT(h)] = (h)
-# define PAGE_IS_FRESH(h) \
- (GC_fresh_pages[FRESH_PAGE_SLOT(h)] == (h) && (h) != 0)
-#endif
-
int GC_proc_fd;
void GC_dirty_init(void)
@@ -3060,15 +3028,6 @@ void GC_dirty_init(void)
ABORT("/proc ioctl failed");
}
GC_proc_buf = GC_scratch_alloc(GC_proc_buf_size);
-# ifdef GC_SOLARIS_THREADS
- GC_fresh_pages = (struct hblk **)
- GC_scratch_alloc(MAX_FRESH_PAGES * sizeof (struct hblk *));
- if (GC_fresh_pages == 0) {
- GC_err_printf("No space for fresh pages\n");
- EXIT();
- }
- BZERO(GC_fresh_pages, MAX_FRESH_PAGES * sizeof (struct hblk *));
-# endif
}
/* Ignore write hints. They don't help us here. */
@@ -3080,11 +3039,7 @@ GC_bool is_ptrfree;
{
}
-#ifdef GC_SOLARIS_THREADS
-# define READ(fd,buf,nbytes) syscall(SYS_read, fd, buf, nbytes)
-#else
-# define READ(fd,buf,nbytes) read(fd, buf, nbytes)
-#endif
+# define READ(fd,buf,nbytes) read(fd, buf, nbytes)
void GC_read_dirty(void)
{
@@ -3117,10 +3072,6 @@ void GC_read_dirty(void)
/* Punt: */
memset(GC_grungy_pages, 0xff, sizeof (page_hash_table));
memset(GC_written_pages, 0xff, sizeof(page_hash_table));
-# ifdef GC_SOLARIS_THREADS
- BZERO(GC_fresh_pages,
- MAX_FRESH_PAGES * sizeof (struct hblk *));
-# endif
return;
}
}
@@ -3147,15 +3098,6 @@ void GC_read_dirty(void)
register word index = PHT_HASH(h);
set_pht_entry_from_index(GC_grungy_pages, index);
-# ifdef GC_SOLARIS_THREADS
- {
- register int slot = FRESH_PAGE_SLOT(h);
-
- if (GC_fresh_pages[slot] == h) {
- GC_fresh_pages[slot] = 0;
- }
- }
-# endif
h++;
}
}
@@ -3165,30 +3107,16 @@ void GC_read_dirty(void)
}
/* Update GC_written_pages. */
GC_or_pages(GC_written_pages, GC_grungy_pages);
-# ifdef GC_SOLARIS_THREADS
- /* Make sure that old stacks are considered completely clean */
- /* unless written again. */
- GC_old_stacks_are_fresh();
-# endif
}
#undef READ
GC_bool GC_page_was_dirty(struct hblk *h)
-struct hblk *h;
{
register word index = PHT_HASH(h);
register GC_bool result;
result = get_pht_entry_from_index(GC_grungy_pages, index);
-# ifdef GC_SOLARIS_THREADS
- if (result && PAGE_IS_FRESH(h)) result = FALSE;
- /* This happens only if page was declared fresh since */
- /* the read_dirty call, e.g. because it's in an unused */
- /* thread stack. It's OK to treat it as clean, in */
- /* that case. And it's consistent with */
- /* GC_page_was_ever_dirty. */
-# endif
return(result);
}
@@ -3198,29 +3126,9 @@ GC_bool GC_page_was_ever_dirty(struct hblk *h)
register GC_bool result;
result = get_pht_entry_from_index(GC_written_pages, index);
-# ifdef GC_SOLARIS_THREADS
- if (result && PAGE_IS_FRESH(h)) result = FALSE;
-# endif
return(result);
}
-/* Caller holds allocation lock. */
-void GC_is_fresh(struct hblk *h, word n)
-{
-
- register word index;
-
-# ifdef GC_SOLARIS_THREADS
- register word i;
-
- if (GC_fresh_pages != 0) {
- for (i = 0; i < n; i++) {
- ADD_FRESH_PAGE(h + i);
- }
- }
-# endif
-}
-
# endif /* PROC_VDB */
diff --git a/pthread_stop_world.c b/pthread_stop_world.c
index bd1f67e8..33cc9e04 100644
--- a/pthread_stop_world.c
+++ b/pthread_stop_world.c
@@ -1,7 +1,7 @@
#include "private/pthread_support.h"
-#if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
- && !defined(GC_WIN32_THREADS) && !defined(GC_DARWIN_THREADS)
+#if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS) && \
+ !defined(GC_DARWIN_THREADS)
#include <signal.h>
#include <semaphore.h>
@@ -160,12 +160,12 @@ void GC_suspend_handler_inner(ptr_t sig_arg, void *context)
return;
}
# ifdef SPARC
- me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
+ me -> stop_info.stack_ptr = GC_save_regs_in_stack();
# else
me -> stop_info.stack_ptr = (ptr_t)(&dummy);
# endif
# ifdef IA64
- me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
+ me -> backing_store_ptr = GC_save_regs_in_stack();
# endif
/* Tell the thread that wants to stop the world that this */
@@ -282,6 +282,8 @@ void GC_push_all_stacks()
(unsigned)p -> id, bs_lo, bs_hi);
# endif
if (pthread_equal(p -> id, me)) {
+ /* FIXME: This may add an unbounded number of entries, */
+ /* and hence overflow the mark stack, which is bad. */
GC_push_all_eager(bs_lo, bs_hi);
} else {
GC_push_all_stack(bs_lo, bs_hi);
@@ -340,13 +342,13 @@ int GC_suspend_all()
return n_live_threads;
}
-/* Caller holds allocation lock. */
void GC_stop_world()
{
int i;
int n_live_threads;
int code;
+ GC_ASSERT(I_HOLD_LOCK());
#if DEBUG_THREADS
GC_printf("Stopping the world from 0x%x\n", (unsigned)pthread_self());
#endif
diff --git a/pthread_support.c b/pthread_support.c
index 5b255254..a8c3c6b8 100644
--- a/pthread_support.c
+++ b/pthread_support.c
@@ -80,7 +80,15 @@
# include <sys/sysctl.h>
#endif /* GC_DARWIN_THREADS */
-
+/* Allocator lock definitions. */
+#if defined(USE_SPIN_LOCK)
+ pthread_t GC_lock_holder = NO_THREAD;
+#else
+ pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
+ pthread_t GC_lock_holder = NO_THREAD;
+ /* Used only for assertions, and to prevent */
+ /* recursive reentry in the system call wrapper. */
+#endif
#if defined(GC_DGUX386_THREADS)
# include <sys/dg_sys_info.h>
@@ -241,7 +249,7 @@ void GC_mark_thread_local_free_lists(void)
GC_check_tls_for(&(p->tlfs));
}
}
-# if !defined(USE_COMPILER_TLS) && !defined(USE_PTHREAD_SPECIFIC)
+# if defined(USE_CUSTOM_SPECIFIC)
if (GC_thread_key != 0)
GC_check_tsd_marks(GC_thread_key);
# endif
@@ -346,16 +354,15 @@ volatile GC_thread GC_threads[THREAD_TABLE_SZ];
void GC_push_thread_structures(void)
{
+ GC_ASSERT(I_HOLD_LOCK());
GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
GC_push_all((ptr_t)(&GC_thread_key),
(ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
# endif
}
-#if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
-#endif /* THREAD_LOCAL_ALLOC */
-
+/* It may not be safe to allocate when we register the first thread. */
static struct GC_Thread_Rep first_thread;
/* Add a thread to GC_threads. We assume it wasn't already there. */
@@ -385,13 +392,13 @@ GC_thread GC_new_thread(pthread_t id)
/* Delete a thread from GC_threads. We assume it is there. */
/* (The code intentionally traps if it wasn't.) */
-/* Caller holds allocation lock. */
void GC_delete_thread(pthread_t id)
{
int hv = ((word)id) % THREAD_TABLE_SZ;
register GC_thread p = GC_threads[hv];
register GC_thread prev = 0;
+ GC_ASSERT(I_HOLD_LOCK());
while (!pthread_equal(p -> id, id)) {
prev = p;
p = p -> next;
@@ -408,12 +415,14 @@ void GC_delete_thread(pthread_t id)
/* been notified, then there may be more than one thread */
/* in the table with the same pthread id. */
/* This is OK, but we need a way to delete a specific one. */
-void GC_delete_gc_thread(pthread_t id, GC_thread gc_id)
+void GC_delete_gc_thread(GC_thread gc_id)
{
+ pthread_t id = gc_id -> id;
int hv = ((word)id) % THREAD_TABLE_SZ;
register GC_thread p = GC_threads[hv];
register GC_thread prev = 0;
+ GC_ASSERT(I_HOLD_LOCK());
while (p != gc_id) {
prev = p;
p = p -> next;
@@ -680,7 +689,8 @@ void GC_thr_init(void)
# if defined(GC_HPUX_THREADS)
GC_nprocs = pthread_num_processors_np();
# endif
-# if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS)
+# if defined(GC_OSF1_THREADS) || defined(GC_AIX_THREADS) \
+ || defined(GC_SOLARIS_THREADS)
GC_nprocs = sysconf(_SC_NPROCESSORS_ONLN);
if (GC_nprocs <= 0) GC_nprocs = 1;
# endif
@@ -742,7 +752,7 @@ void GC_thr_init(void)
/* may require allocation. */
/* Called without allocation lock. */
/* Must be called before a second thread is created. */
-/* Called without allocation lock. */
+/* Did we say it's called without the allocation lock? */
void GC_init_parallel(void)
{
if (parallel_initialized) return;
@@ -751,7 +761,7 @@ void GC_init_parallel(void)
/* GC_init() calls us back, so set flag first. */
if (!GC_is_initialized) GC_init();
/* Initialize thread local free lists if used. */
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
LOCK();
GC_init_thread_local(&(GC_lookup_thread(pthread_self())->tlfs));
UNLOCK();
@@ -789,12 +799,12 @@ static void GC_do_blocking_inner(ptr_t data, void * context) {
me = GC_lookup_thread(pthread_self());
GC_ASSERT(!(me -> thread_blocked));
# ifdef SPARC
- me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
+ me -> stop_info.stack_ptr = GC_save_regs_in_stack();
# elif !defined(GC_DARWIN_THREADS)
- me -> stop_info.stack_ptr = (ptr_t)GC_approx_sp();
+ me -> stop_info.stack_ptr = GC_approx_sp();
# endif
# ifdef IA64
- me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
+ me -> backing_store_ptr = GC_save_regs_in_stack();
# endif
me -> thread_blocked = TRUE;
/* Save context here if we want to support precise stack marking */
@@ -830,13 +840,17 @@ int GC_unregister_my_thread(void)
/* complete before we remove this thread. */
GC_wait_for_gc_completion(FALSE);
me = GC_lookup_thread(pthread_self());
- GC_destroy_thread_local(&(me->tlfs));
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_destroy_thread_local(&(me->tlfs));
+# endif
if (me -> flags & DETACHED) {
GC_delete_thread(pthread_self());
} else {
me -> flags |= FINISHED;
}
- GC_remove_specific(GC_thread_key);
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_remove_specific();
+# endif
UNLOCK();
return GC_SUCCESS;
}
@@ -877,7 +891,7 @@ int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval)
if (result == 0) {
LOCK();
/* Here the pthread thread id may have been recycled. */
- GC_delete_gc_thread(thread, thread_gc_id);
+ GC_delete_gc_thread(thread_gc_id);
UNLOCK();
}
return result;
@@ -899,7 +913,7 @@ WRAP_FUNC(pthread_detach)(pthread_t thread)
thread_gc_id -> flags |= DETACHED;
/* Here the pthread thread id may have been recycled. */
if (thread_gc_id -> flags & FINISHED) {
- GC_delete_gc_thread(thread, thread_gc_id);
+ GC_delete_gc_thread(thread_gc_id);
}
UNLOCK();
}
@@ -913,7 +927,7 @@ GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
{
GC_thread me;
- GC_in_thread_creation = TRUE; /* OK to collect from unknow thread. */
+ GC_in_thread_creation = TRUE; /* OK to collect from unknown thread. */
me = GC_new_thread(my_pthread);
GC_in_thread_creation = FALSE;
# ifdef GC_DARWIN_THREADS
@@ -975,7 +989,7 @@ void * GC_inner_start_routine(struct GC_stack_base *sb, void * arg)
sem_post(&(si -> registered)); /* Last action on si. */
/* OK to deallocate. */
pthread_cleanup_push(GC_thread_exit_proc, 0);
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
LOCK();
GC_init_thread_local(&(me->tlfs));
UNLOCK();
diff --git a/reclaim.c b/reclaim.c
index 6cb8b47e..1149f496 100644
--- a/reclaim.c
+++ b/reclaim.c
@@ -289,7 +289,12 @@ void GC_reclaim_block(struct hblk *hbp, word report_if_found)
}
} else {
GC_bool empty = GC_block_empty(hhdr);
- GC_ASSERT(sz * hhdr -> hb_n_marks <= HBLKSIZE);
+# ifdef PARALLEL_MARK
+ /* Count can be low or one too high. */
+ GC_ASSERT(hhdr -> hb_n_marks <= HBLKSIZE/sz + 1);
+# else
+ GC_ASSERT(sz * hhdr -> hb_n_marks <= HBLKSIZE);
+# endif
if (hhdr -> hb_descr != 0) {
GC_composite_in_use += sz * hhdr -> hb_n_marks;
} else {
@@ -387,7 +392,7 @@ int GC_n_set_marks(hdr *hhdr)
#endif /* !USE_MARK_BYTES */
/*ARGSUSED*/
-void GC_print_block_descr(struct hblk *h, word dummy)
+void GC_print_block_descr(struct hblk *h, word /* struct PrintStats */ raw_ps)
{
hdr * hhdr = HDR(h);
unsigned bytes = hhdr -> hb_sz;
@@ -405,7 +410,7 @@ void GC_print_block_descr(struct hblk *h, word dummy)
bytes += HBLKSIZE-1;
bytes &= ~(HBLKSIZE-1);
- ps = (struct Print_stats *)dummy;
+ ps = (struct Print_stats *)raw_ps;
ps->total_bytes += bytes;
ps->number_of_blocks++;
}
diff --git a/setjmp_t.c b/setjmp_t.c
index 648c7127..9dc6bfc5 100644
--- a/setjmp_t.c
+++ b/setjmp_t.c
@@ -24,7 +24,7 @@
#include <stdio.h>
#include <setjmp.h>
#include <string.h>
-#include "private/gcconfig.h"
+#include "private/gc_priv.h"
#ifdef OS2
/* GETPAGESIZE() is set to getpagesize() by default, but that */
@@ -82,6 +82,9 @@ int main()
printf("A good guess for ALIGNMENT on this machine is %ld.\n",
(unsigned long)(&(a.a_b))-(unsigned long)(&a));
+ printf("The following is a very dubious test of one root marking"
+ " strategy.\n");
+ printf("Results may not be accurate/useful:\n");
/* Encourage the compiler to keep x in a callee-save register */
x = 2*x-1;
printf("");
@@ -107,6 +110,27 @@ int main()
y++;
x = 2;
if (y == 1) longjmp(b,1);
+ printf("Some GC internal configuration stuff: \n");
+ printf("\tWORDSZ = %d, ALIGNMENT = %d, GC_GRANULE_BYTES = %d\n",
+ WORDSZ, ALIGNMENT, GC_GRANULE_BYTES);
+ printf("\tUsing one mark ");
+# if defined(USE_MARK_BYTES)
+ printf("byte");
+# elif defined(USE_MARK_BITS)
+ printf("bit");
+# endif
+ printf(" per ");
+# if defined(MARK_BIT_PER_OBJ)
+ printf("object.\n");
+# elif defined(MARK_BIT_PER_GRANULE)
+ printf("granule.\n");
+# endif
+# ifdef THREAD_LOCAL_ALLOC
+ printf("Thread local allocation enabled.\n");
+# endif
+# ifdef PARALLEL_MARK
+ printf("Parallel marking enabled.\n");
+# endif
return(0);
}
diff --git a/sparc_mach_dep.S b/sparc_mach_dep.S
index 06a0f3b4..6997fa19 100644
--- a/sparc_mach_dep.S
+++ b/sparc_mach_dep.S
@@ -24,6 +24,10 @@ GC_push_regs:
.size GC_save_regs_in_stack,.GC_save_regs_in_stack_end-GC_save_regs_in_stack
+! GC_clear_stack_inner(arg, limit) clears stack area up to limit and
+! returns arg. Stack clearing is crucial on SPARC, so we supply
+! an assembly version that's more careful. Assumes limit is hotter
+! than sp, and limit is 8 byte aligned.
.globl GC_clear_stack_inner
GC_clear_stack_inner:
#if defined(__arch64__) || defined(__sparcv9)
diff --git a/tests/leak_test.c b/tests/leak_test.c
index 421d0c6c..d6a60d47 100644
--- a/tests/leak_test.c
+++ b/tests/leak_test.c
@@ -5,6 +5,9 @@ main() {
int i;
GC_find_leak = 1; /* for new collect versions not compiled */
/* with -DFIND_LEAK. */
+
+ GC_INIT(); /* Needed if thread-local allocation is enabled. */
+ /* FIXME: This is not ideal. */
for (i = 0; i < 10; ++i) {
p[i] = malloc(sizeof(int)+i);
}
@@ -18,4 +21,5 @@ main() {
CHECK_LEAKS();
CHECK_LEAKS();
CHECK_LEAKS();
+ return 0;
}
diff --git a/tests/test.c b/tests/test.c
index 076ce4b3..afe62f25 100644
--- a/tests/test.c
+++ b/tests/test.c
@@ -36,12 +36,10 @@
# else
# include <assert.h> /* Not normally used, but handy for debugging. */
# endif
-# include <assert.h> /* Not normally used, but handy for debugging. */
# include "gc.h"
# include "gc_typed.h"
# include "private/gc_priv.h" /* For output, locking, MIN_WORDS, */
- /* and some statistics. */
-# include "private/gcconfig.h"
+ /* and some statistics, and gcconfig.h. */
# if defined(MSWIN32) || defined(MSWINCE)
# include <windows.h>
@@ -53,11 +51,6 @@
# define GC_printf printf
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-# include <thread.h>
-# include <synch.h>
-# endif
-
# if defined(GC_PTHREADS)
# include <pthread.h>
# endif
@@ -507,8 +500,6 @@ void check_marks_int_list(sexpr x)
}
}
-/* # elif defined(GC_SOLARIS_THREADS) */
-
# else
# define fork_a_thread()
@@ -674,17 +665,11 @@ volatile int dropped_something = 0;
# ifdef PCR
PCR_ThCrSec_EnterSys();
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- static mutex_t incr_lock;
- mutex_lock(&incr_lock);
-# endif
-# if defined(GC_PTHREADS)
+# if defined(GC_PTHREADS)
static pthread_mutex_t incr_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&incr_lock);
-# else
-# ifdef GC_WIN32_THREADS
- EnterCriticalSection(&incr_cs);
-# endif
+# elif defined(GC_WIN32_THREADS)
+ EnterCriticalSection(&incr_cs);
# endif
if ((int)(GC_word)client_data != t -> level) {
(void)GC_printf("Wrong finalization data - collector is broken\n");
@@ -695,15 +680,10 @@ volatile int dropped_something = 0;
# ifdef PCR
PCR_ThCrSec_ExitSys();
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- mutex_unlock(&incr_lock);
-# endif
# if defined(GC_PTHREADS)
pthread_mutex_unlock(&incr_lock);
-# else
-# ifdef GC_WIN32_THREADS
- LeaveCriticalSection(&incr_cs);
-# endif
+# elif defined(GC_WIN32_THREADS)
+ LeaveCriticalSection(&incr_cs);
# endif
}
@@ -757,17 +737,11 @@ int n;
# ifdef PCR
PCR_ThCrSec_EnterSys();
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- static mutex_t incr_lock;
- mutex_lock(&incr_lock);
-# endif
# if defined(GC_PTHREADS)
static pthread_mutex_t incr_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_lock(&incr_lock);
-# else
-# ifdef GC_WIN32_THREADS
- EnterCriticalSection(&incr_cs);
-# endif
+# elif defined(GC_WIN32_THREADS)
+ EnterCriticalSection(&incr_cs);
# endif
/* Losing a count here causes erroneous report of failure. */
finalizable_count++;
@@ -775,15 +749,10 @@ int n;
# ifdef PCR
PCR_ThCrSec_ExitSys();
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- mutex_unlock(&incr_lock);
-# endif
# if defined(GC_PTHREADS)
pthread_mutex_unlock(&incr_lock);
-# else
-# ifdef GC_WIN32_THREADS
- LeaveCriticalSection(&incr_cs);
-# endif
+# elif defined(GC_WIN32_THREADS)
+ LeaveCriticalSection(&incr_cs);
# endif
}
@@ -841,48 +810,8 @@ int n;
chktree(t -> rchild, n-1);
}
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
-thread_key_t fl_key;
-void * alloc8bytes()
-{
-# if defined(SMALL_CONFIG) || defined(GC_DEBUG)
- collectable_count++;
- return(GC_MALLOC(8));
-# else
- void ** my_free_list_ptr;
- void * my_free_list;
-
- if (thr_getspecific(fl_key, (void **)(&my_free_list_ptr)) != 0) {
- (void)GC_printf("thr_getspecific failed\n");
- FAIL;
- }
- if (my_free_list_ptr == 0) {
- uncollectable_count++;
- my_free_list_ptr = GC_NEW_UNCOLLECTABLE(void *);
- if (thr_setspecific(fl_key, my_free_list_ptr) != 0) {
- (void)GC_printf("thr_setspecific failed\n");
- FAIL;
- }
- }
- my_free_list = *my_free_list_ptr;
- if (my_free_list == 0) {
- collectable_count++;
- my_free_list = GC_malloc_many(8);
- if (my_free_list == 0) {
- (void)GC_printf("alloc8bytes out of memory\n");
- FAIL;
- }
- }
- *my_free_list_ptr = GC_NEXT(my_free_list);
- GC_NEXT(my_free_list) = 0;
- return(my_free_list);
-# endif
-}
-
-#else
-
-# if defined(GC_PTHREADS)
+#if defined(GC_PTHREADS)
pthread_key_t fl_key;
void * alloc8bytes()
@@ -918,9 +847,8 @@ void * alloc8bytes()
# endif
}
-# else
+#else
# define alloc8bytes() GC_MALLOC_ATOMIC(8)
-# endif
#endif
void alloc_small(n)
@@ -1099,7 +1027,7 @@ static void uniq(void *p, ...) {
for (j=0; j<i; j++)
if (q[i] == q[j]) {
GC_printf(
- "Apparently failed to mark form some function arguments.\n"
+ "Apparently failed to mark from some function arguments.\n"
"Perhaps GC_push_regs was configured incorrectly?\n"
);
FAIL;
@@ -1389,7 +1317,7 @@ void SetMinimumStack(long minSize)
}
-#if !defined(PCR) && !defined(GC_SOLARIS_THREADS) \
+#if !defined(PCR) \
&& !defined(GC_WIN32_THREADS) && !defined(GC_PTHREADS) \
|| defined(LINT)
#if defined(MSWIN32) && !defined(__MINGW32__)
diff --git a/tests/test_cpp.cc b/tests/test_cpp.cc
index 6661e411..9a7af1cc 100644
--- a/tests/test_cpp.cc
+++ b/tests/test_cpp.cc
@@ -52,7 +52,7 @@ extern "C" {
#define my_assert( e ) \
if (! (e)) { \
- GC_printf1( "Assertion failure in " __FILE__ ", line %d: " #e "\n", \
+ GC_printf( "Assertion failure in " __FILE__ ", line %d: " #e "\n", \
__LINE__ ); \
exit( 1 ); }
@@ -216,11 +216,11 @@ int APIENTRY WinMain(
x = 0;
# endif
if (argc != 2 || (0 >= (n = atoi( argv[ 1 ] )))) {
- GC_printf0( "usage: test_cpp number-of-iterations\nAssuming 10 iters\n" );
+ GC_printf( "usage: test_cpp number-of-iterations\nAssuming 10 iters\n" );
n = 10;}
for (iters = 1; iters <= n; iters++) {
- GC_printf1( "Starting iteration %d\n", iters );
+ GC_printf( "Starting iteration %d\n", iters );
/* Allocate some uncollectable As and disguise their pointers.
Later we'll check to see if the objects are still there. We're
@@ -282,7 +282,7 @@ int APIENTRY WinMain(
x = *xptr;
# endif
my_assert (29 == x[0]);
- GC_printf0( "The test appears to have succeeded.\n" );
+ GC_printf( "The test appears to have succeeded.\n" );
return( 0 );}
diff --git a/tests/thread_leak_test.c b/tests/thread_leak_test.c
index 1174705e..5f183cfa 100644
--- a/tests/thread_leak_test.c
+++ b/tests/thread_leak_test.c
@@ -37,4 +37,5 @@ main() {
CHECK_LEAKS();
CHECK_LEAKS();
CHECK_LEAKS();
+ return 0;
}
diff --git a/thread_local_alloc.c b/thread_local_alloc.c
index 09617941..b3fe28cc 100644
--- a/thread_local_alloc.c
+++ b/thread_local_alloc.c
@@ -12,37 +12,17 @@
*/
#include "private/gc_priv.h"
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+# if defined(THREAD_LOCAL_ALLOC)
#include "private/thread_local_alloc.h"
#include "gc_inline.h"
-# if defined(GC_HPUX_THREADS) && !defined(USE_PTHREAD_SPECIFIC) \
- && !defined(USE_COMPILER_TLS)
-# ifdef __GNUC__
-# define USE_PTHREAD_SPECIFIC
- /* Empirically, as of gcc 3.3, USE_COMPILER_TLS doesn't work. */
-# else
-# define USE_COMPILER_TLS
-# endif
-# endif
-
-# if defined USE_HPUX_TLS
-# error USE_HPUX_TLS macro was replaced by USE_COMPILER_TLS
-# endif
-
-# if (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \
- defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS)) \
- && !defined(USE_PTHREAD_SPECIFIC)
-# define USE_PTHREAD_SPECIFIC
-# endif
-
# include <stdlib.h>
-/* We don't really support thread-local allocation with DBG_HDRS_ALL */
-
-#ifdef USE_COMPILER_TLS
+#if defined(USE_COMPILER_TLS)
__thread
+#elif defined(USE_WIN32_COMPILER_TLS)
+ declspec(thread)
#endif
GC_key_t GC_thread_key;
@@ -56,7 +36,6 @@ static void return_freelists(void **fl, void **gfl)
void *q, **qptr;
for (i = 1; i < TINY_FREELISTS; ++i) {
-#if 0
if ((word)(fl[i]) >= HBLKSIZE) {
if (gfl[i] == 0) {
gfl[i] = fl[i];
@@ -71,7 +50,6 @@ static void return_freelists(void **fl, void **gfl)
gfl[i] = fl[i];
}
}
-#endif
/* Clear fl[i], since the thread structure may hang around. */
/* Do it in a way that is likely to trap if we access it. */
fl[i] = (ptr_t)HBLKSIZE;
@@ -134,11 +112,16 @@ void GC_destroy_thread_local(GC_tlfs p)
# endif
}
-#if defined(GC_ASSERTIONS) && defined(GC_LINUX_THREADS)
+#if defined(GC_ASSERTIONS) && defined(GC_PTHREADS) && !defined(CYGWIN32)
# include <pthread.h>
extern char * GC_lookup_thread(pthread_t id);
#endif
+#if defined(GC_ASSERTIONS) && defined(GC_WIN32_THREADS)
+# include <pthread.h>
+ extern char * GC_lookup_thread(int id);
+#endif
+
void * GC_malloc(size_t bytes)
{
size_t granules = ROUNDED_UP_GRANULES(bytes);
@@ -164,10 +147,14 @@ void * GC_malloc(size_t bytes)
# endif
# ifdef GC_ASSERTIONS
/* We can't check tsd correctly, since we don't have access to */
- /* the right declarations. But we cna check that it's close. */
+ /* the right declarations. But we can check that it's close. */
LOCK();
{
- char * me = GC_lookup_thread(pthread_self());
+# if defined(GC_WIN32_THREADS)
+ char * me = (char *)GC_lookup_thread_inner(GetCurrentThreadId());
+# else
+ char * me = GC_lookup_thread(pthread_self());
+# endif
GC_ASSERT((char *)tsd > me && (char *)tsd < me + 1000);
}
UNLOCK();
@@ -273,7 +260,7 @@ void GC_mark_thread_local_fls_for(GC_tlfs p)
}
#endif /* GC_ASSERTIONS */
-# else /* !THREAD_LOCAL_ALLOC && !DBG_HDRS_ALL */
+# else /* !THREAD_LOCAL_ALLOC */
# define GC_destroy_thread_local(t)
diff --git a/threadlibs.c b/threadlibs.c
index 178a7ec4..13096944 100644
--- a/threadlibs.c
+++ b/threadlibs.c
@@ -11,7 +11,6 @@ int main()
"-Wl,--wrap -Wl,pthread_sigmask -Wl,--wrap -Wl,sleep\n");
# endif
# if defined(GC_LINUX_THREADS) || defined(GC_IRIX_THREADS) \
- || defined(GC_SOLARIS_PTHREADS) \
|| defined(GC_DARWIN_THREADS) || defined(GC_AIX_THREADS)
# ifdef GC_USE_DLOPEN_WRAP
printf("-ldl ");
@@ -31,8 +30,9 @@ int main()
# if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS)
printf("-lpthread -lrt\n");
# endif
-# if defined(GC_SOLARIS_THREADS) && !defined(GC_SOLARIS_PTHREADS)
- printf("-lthread -ldl\n");
+# if defined(GC_SOLARIS_THREADS) || defined(GC_SOLARIS_PTHREADS)
+ printf("-lthread -lposix4\n");
+ /* Is this right for recent versions? */
# endif
# if defined(GC_WIN32_THREADS) && defined(CYGWIN32)
printf("-lpthread\n");
diff --git a/typd_mlc.c b/typd_mlc.c
index cdedf465..1124ff95 100644
--- a/typd_mlc.c
+++ b/typd_mlc.c
@@ -587,9 +587,9 @@ void * GC_malloc_explicitly_typed(size_t lb, GC_descr d)
if(SMALL_OBJ(lb)) {
lg = GC_size_map[lb];
opp = &(GC_eobjfreelist[lg]);
- FASTLOCK();
- if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
- FASTUNLOCK();
+ LOCK();
+ if( (op = *opp) == 0 ) {
+ UNLOCK();
op = (ptr_t)GENERAL_MALLOC((word)lb, GC_explicit_kind);
if (0 == op) return 0;
lg = GC_size_map[lb]; /* May have been uninitialized. */
@@ -597,7 +597,7 @@ void * GC_malloc_explicitly_typed(size_t lb, GC_descr d)
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
}
} else {
op = (ptr_t)GENERAL_MALLOC((word)lb, GC_explicit_kind);
@@ -620,16 +620,16 @@ DCL_LOCK_STATE;
if( SMALL_OBJ(lb) ) {
lg = GC_size_map[lb];
opp = &(GC_eobjfreelist[lg]);
- FASTLOCK();
- if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
- FASTUNLOCK();
+ LOCK();
+ if( (op = *opp) == 0 ) {
+ UNLOCK();
op = (ptr_t)GENERAL_MALLOC_IOP(lb, GC_explicit_kind);
lg = GC_size_map[lb]; /* May have been uninitialized. */
} else {
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
}
} else {
op = (ptr_t)GENERAL_MALLOC_IOP(lb, GC_explicit_kind);
@@ -669,9 +669,9 @@ DCL_LOCK_STATE;
if( SMALL_OBJ(lb) ) {
lg = GC_size_map[lb];
opp = &(GC_arobjfreelist[lg]);
- FASTLOCK();
- if( !FASTLOCK_SUCCEEDED() || (op = *opp) == 0 ) {
- FASTUNLOCK();
+ LOCK();
+ if( (op = *opp) == 0 ) {
+ UNLOCK();
op = (ptr_t)GENERAL_MALLOC((word)lb, GC_array_kind);
if (0 == op) return(0);
lg = GC_size_map[lb]; /* May have been uninitialized. */
@@ -679,7 +679,7 @@ DCL_LOCK_STATE;
*opp = obj_link(op);
obj_link(op) = 0;
GC_bytes_allocd += GRANULES_TO_BYTES(lg);
- FASTUNLOCK();
+ UNLOCK();
}
} else {
op = (ptr_t)GENERAL_MALLOC((word)lb, GC_array_kind);
diff --git a/version.h b/version.h
index 9c34ccc8..b71dd6b4 100644
--- a/version.h
+++ b/version.h
@@ -3,7 +3,7 @@
/* it to keep the old-style build process working. */
#define GC_TMP_VERSION_MAJOR 7
#define GC_TMP_VERSION_MINOR 0
-#define GC_TMP_ALPHA_VERSION 4
+#define GC_TMP_ALPHA_VERSION 5
#ifndef GC_NOT_ALPHA
# define GC_NOT_ALPHA 0xff
diff --git a/win32_threads.c b/win32_threads.c
index 8609900e..214d5c1e 100755
--- a/win32_threads.c
+++ b/win32_threads.c
@@ -4,6 +4,24 @@
#include <windows.h>
+#ifdef THREAD_LOCAL_ALLOC
+# include "private/thread_local_alloc.h"
+#endif /* THREAD_LOCAL_ALLOC */
+
+/* Allocation lock declarations. */
+#if !defined(USE_PTHREAD_LOCKS)
+# if defined(GC_DLL)
+ __declspec(dllexport) CRITICAL_SECTION GC_allocate_ml;
+# else
+ CRITICAL_SECTION GC_allocate_ml;
+# endif
+ DWORD GC_lock_holder = NO_THREAD;
+ /* Thread id for current holder of allocation lock */
+#else
+ pthread_mutex_t GC_allocate_ml = PTHREAD_MUTEX_INITIALIZER;
+ pthread_t GC_lock_holder = NO_THREAD;
+#endif
+
#ifdef CYGWIN32
# include <errno.h>
@@ -16,41 +34,102 @@
# define DEBUG_CYGWIN_THREADS 0
- void * GC_start_routine(void * arg);
+ void * GC_pthread_start(void * arg);
void GC_thread_exit_proc(void *arg);
# include <pthread.h>
#endif
+#if defined(GC_DLL) && !defined(MSWINCE)
+ static GC_bool GC_win32_dll_threads = FALSE;
+ /* This code operates in two distinct modes, depending on */
+ /* the setting of GC_win32_dll_threads. If */
+ /* GC_win32_dll_threads is set, all threads in the process */
+ /* are implicitly registered with the GC by DllMain. */
+ /* No explicit registration is required, and attempts at */
+ /* explicit registration are ignored. This mode is */
+ /* very different from the Posix operation of the collector. */
+ /* In this mode access to the thread table is lock-free. */
+ /* Hence there is a static limit on the number of threads. */
+
+ /* If GC_win32_dll_threads is FALSE, or the collector is */
+ /* built without GC_DLL defined, things operate in a way */
+ /* that is very similar to Posix platforms, and new threads */
+ /* must be registered with the collector, e.g. by using */
+ /* preprocessor-based interception of the thread primitives. */
+ /* In this case, we use a real data structure for the thread */
+ /* table. Note that there is no equivalent of linker-based */
+ /* call interception, since we don't have ELF-like */
+ /* facilities. The Windows analog appears to be "API */
+ /* hooking", which really seems to be a standard way to */
+ /* do minor binary rewriting (?). I'd prefer not to have */
+ /* the basic collector rely on such facilities, but an */
+ /* optional package that intercepts thread calls this way */
+ /* would probably be nice. */
+
+ /* GC_win32_dll_threads must be set at initialization time, */
+ /* i.e. before any collector or thread calls. We make it a */
+ /* "dynamic" option only to avoid multiple library versions. */
+#else
+# define GC_win32_dll_threads FALSE
+#endif
+
/* The type of the first argument to InterlockedExchange. */
/* Documented to be LONG volatile *, but at least gcc likes */
/* this better. */
typedef LONG * IE_t;
-#ifndef MAX_THREADS
-# define MAX_THREADS 256
- /* FIXME: */
- /* Things may get quite slow for large numbers of threads, */
- /* since we look them up with sequential search. */
-#endif
-
GC_bool GC_thr_initialized = FALSE;
+GC_bool GC_need_to_lock = FALSE;
+
+static GC_bool parallel_initialized = FALSE;
+
+void GC_init_parallel(void);
+
#ifdef GC_DLL
- GC_API GC_bool GC_need_to_lock = TRUE;
+ /* Turn on GC_win32_dll_threads */
+ GC_API void GC_use_DllMain(void)
+ {
+# ifdef THREAD_LOCAL_ALLOC
+ ABORT("Cannot use thread local allocation with DllMain-based "
+ "thread registration.");
+ /* Thread-local allocation really wants to lock at thread */
+ /* entry and exit. */
+# endif
+ GC_need_to_lock = TRUE;
/* Cannot intercept thread creation. */
+ GC_ASSERT(GC_gc_no == 0);
+ GC_win32_dll_threads = TRUE;
+ }
#else
- GC_bool GC_need_to_lock = FALSE;
+ GC_API void GC_use_DllMain(void)
+ {
+ ABORT("GC not configured as DLL");
+ }
#endif
DWORD GC_main_thread = 0;
-struct GC_thread_Rep {
- AO_t in_use; /* Updated without lock. */
- /* We assert that unused */
- /* entries have invalid ids of */
- /* zero and zero stack fields. */
+struct GC_Thread_Rep {
+ union {
+ AO_t tm_in_use; /* Updated without lock. */
+ /* We assert that unused */
+ /* entries have invalid ids of */
+ /* zero and zero stack fields. */
+ /* Used only with GC_win32_dll_threads. */
+ struct GC_Thread_Rep * tm_next;
+ /* Hash table link without */
+ /* GC_win32_dll_threads. */
+ /* More recently allocated threads */
+ /* with a given pthread id come */
+ /* first. (All but the first are */
+ /* guaranteed to be dead, but we may */
+ /* not yet have registered the join.) */
+ } table_management;
+# define in_use table_management.tm_in_use
+# define next table_management.tm_next
DWORD id;
HANDLE handle;
ptr_t stack_base; /* The cold end of the stack. */
@@ -65,9 +144,13 @@ struct GC_thread_Rep {
# define FINISHED 1 /* Thread has exited. */
# define DETACHED 2 /* Thread is intended to be detached. */
# endif
+# ifdef THREAD_LOCAL_ALLOC
+ struct thread_local_freelists tlfs;
+# endif
};
-typedef volatile struct GC_thread_Rep * GC_thread;
+typedef struct GC_Thread_Rep * GC_thread;
+
/*
* We assumed that volatile ==> memory ordering, at least among
@@ -76,36 +159,78 @@ typedef volatile struct GC_thread_Rep * GC_thread;
volatile GC_bool GC_please_stop = FALSE;
-/*
- * FIXME: At initialization time we should perhaps chose
- * between two different thread table representations. This simple
- * linear representation may be the best we can reliably do if we use
- * DllMain. By default we should probably rely on thread registration
- * as with the other platforms, and use a hash table or other real
- * data structure.
- */
-volatile struct GC_thread_Rep thread_table[MAX_THREADS];
+/* We have two versions of the thread table. Which one */
+/* we us depends on whether or not GC_win32_dll_threads */
+/* is set. The one complication is that at process */
+/* startup, we use both, since the client hasn't yet */
+/* had a chance to tell us which one (s)he wants. */
+static GC_bool client_has_run = FALSE;
+
+/* Thread table used if GC_win32_dll_threads is set. */
+/* This is a fixed size array. */
+/* Since we use runtime conditionals, both versions */
+/* are always defined. */
+# ifndef MAX_THREADS
+# define MAX_THREADS 512
+# endif
+ /* Things may get quite slow for large numbers of threads, */
+ /* since we look them up with sequential search. */
+
+ volatile struct GC_Thread_Rep dll_thread_table[MAX_THREADS];
+
+ volatile LONG GC_max_thread_index = 0;
+ /* Largest index in dll_thread_table */
+ /* that was ever used. */
+
+/* And now the version used if GC_win32_dll_threads is not set. */
+/* This is a chained hash table, with much of the code borrowed */
+/* From the Posix implementation. */
+# define THREAD_TABLE_SZ 256 /* Must be power of 2 */
+ volatile GC_thread GC_threads[THREAD_TABLE_SZ];
+
-volatile LONG GC_max_thread_index = 0; /* Largest index in thread_table */
- /* that was ever used. */
+/* Add a thread to GC_threads. We assume it wasn't already there. */
+/* Caller holds allocation lock. */
+/* Unlike the pthreads version, the id field is set by the caller. */
+GC_thread GC_new_thread(DWORD id)
+{
+ int hv = ((word)id) % THREAD_TABLE_SZ;
+ GC_thread result;
+ /* It may not be safe to allocate when we register the first thread. */
+ static struct GC_Thread_Rep first_thread;
+ static GC_bool first_thread_used = FALSE;
+
+ GC_ASSERT(I_HOLD_LOCK());
+ if (!first_thread_used) {
+ result = &first_thread;
+ first_thread_used = TRUE;
+ } else {
+ GC_ASSERT(!GC_win32_dll_threads);
+ result = (struct GC_Thread_Rep *)
+ GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL);
+ GC_ASSERT(result -> flags == 0);
+ }
+ if (result == 0) return(0);
+ /* result -> id = id; Done by caller. */
+ result -> next = GC_threads[hv];
+ GC_threads[hv] = result;
+ GC_ASSERT(result -> flags == 0 /* && result -> thread_blocked == 0 */);
+ return(result);
+}
extern LONG WINAPI GC_write_fault_handler(struct _EXCEPTION_POINTERS *exc_info);
/*
* This may be called from DllMain, and hence operates under unusual
- * constraints. In particular, it must be lock-free.
- * Always called from the thread being added.
+ * constraints. In particular, it must be lock-free if GC_win32_dll_threads
+ * is set. Always called from the thread being added.
+ * If GC_win32_dll_threads is not set, we already hold the allocation lock,
+ * except possibly during single-threaded start-up code.
*/
static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
DWORD thread_id)
{
- int i;
- /* It appears to be unsafe to acquire a lock here, since this */
- /* code is apparently not preeemptible on some systems. */
- /* (This is based on complaints, not on Microsoft's official */
- /* documentation, which says this should perform "only simple */
- /* initialization tasks".) */
- /* Hence we make do with nonblocking synchronization. */
+ volatile struct GC_Thread_Rep * me;
/* The following should be a noop according to the win32 */
/* documentation. There is empirical evidence that it */
@@ -113,39 +238,60 @@ static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
# if defined(MPROTECT_VDB)
if (GC_incremental) SetUnhandledExceptionFilter(GC_write_fault_handler);
# endif
+
+ if (GC_win32_dll_threads || !client_has_run) {
+ int i;
+ /* It appears to be unsafe to acquire a lock here, since this */
+ /* code is apparently not preeemptible on some systems. */
+ /* (This is based on complaints, not on Microsoft's official */
+ /* documentation, which says this should perform "only simple */
+ /* initialization tasks".) */
+ /* Hence we make do with nonblocking synchronization. */
+ /* It has been claimed that DllMain is really only executed with */
+ /* a particular system lock held, and thus careful use of locking */
+ /* around code that doesn't call back into the system libraries */
+ /* might be OK. But this hasn't been tested across all win32 */
+ /* variants. */
/* cast away volatile qualifier */
- for (i = 0; InterlockedExchange((IE_t)&thread_table[i].in_use,1) != 0; i++) {
- /* Compare-and-swap would make this cleaner, but that's not */
- /* supported before Windows 98 and NT 4.0. In Windows 2000, */
- /* InterlockedExchange is supposed to be replaced by */
- /* InterlockedExchangePointer, but that's not really what I */
- /* want here. */
- /* FIXME: We should eventually declare Win95 dead and use AO_ */
- /* primitives here. */
- if (i == MAX_THREADS - 1)
- ABORT("too many threads");
- }
- /* Update GC_max_thread_index if necessary. The following is safe, */
- /* and unlike CompareExchange-based solutions seems to work on all */
- /* Windows95 and later platforms. */
- /* Unfortunately, GC_max_thread_index may be temporarily out of */
- /* bounds, so readers have to compensate. */
- while (i > GC_max_thread_index) {
- InterlockedIncrement((IE_t)&GC_max_thread_index);
+ for (i = 0; InterlockedExchange((IE_t)&dll_thread_table[i].in_use,1) != 0;
+ i++) {
+ /* Compare-and-swap would make this cleaner, but that's not */
+ /* supported before Windows 98 and NT 4.0. In Windows 2000, */
+ /* InterlockedExchange is supposed to be replaced by */
+ /* InterlockedExchangePointer, but that's not really what I */
+ /* want here. */
+ /* FIXME: We should eventually declare Win95 dead and use AO_ */
+ /* primitives here. */
+ if (i == MAX_THREADS - 1)
+ ABORT("too many threads");
+ }
+ /* Update GC_max_thread_index if necessary. The following is safe, */
+ /* and unlike CompareExchange-based solutions seems to work on all */
+ /* Windows95 and later platforms. */
+ /* Unfortunately, GC_max_thread_index may be temporarily out of */
+ /* bounds, so readers have to compensate. */
+ while (i > GC_max_thread_index) {
+ InterlockedIncrement((IE_t)&GC_max_thread_index);
+ }
+ if (GC_max_thread_index >= MAX_THREADS) {
+ /* We overshot due to simultaneous increments. */
+ /* Setting it to MAX_THREADS-1 is always safe. */
+ GC_max_thread_index = MAX_THREADS - 1;
+ }
+ me = dll_thread_table + i;
}
- if (GC_max_thread_index >= MAX_THREADS) {
- /* We overshot due to simultaneous increments. */
- /* Setting it to MAX_THREADS-1 is always safe. */
- GC_max_thread_index = MAX_THREADS - 1;
+ if (!GC_win32_dll_threads || !client_has_run) {
+ GC_ASSERT(I_HOLD_LOCK() || !client_has_run);
+ me = GC_new_thread(thread_id);
}
# ifdef CYGWIN32
- thread_table[i].pthread_id = pthread_self();
+ me -> pthread_id = pthread_self();
# endif
if (!DuplicateHandle(GetCurrentProcess(),
GetCurrentThread(),
GetCurrentProcess(),
- (HANDLE*)&thread_table[i].handle,
+ (HANDLE*)&(me -> handle),
0,
0,
DUPLICATE_SAME_ACCESS)) {
@@ -153,19 +299,25 @@ static GC_thread GC_register_my_thread_inner(struct GC_stack_base *sb,
GC_err_printf("Last error code: %d\n", last_error);
ABORT("DuplicateHandle failed");
}
- thread_table[i].stack_base = sb -> mem_base;
+ me -> stack_base = sb -> mem_base;
/* Up until this point, GC_push_all_stacks considers this thread */
/* invalid. */
- if (thread_table[i].stack_base == NULL)
- ABORT("Bad stack base in GC_register_my_thread");
+ if (me -> stack_base == NULL)
+ ABORT("Bad stack base in GC_register_my_thread_inner");
/* Up until this point, this entry is viewed as reserved but invalid */
/* by GC_delete_thread. */
- thread_table[i].id = thread_id;
+ me -> id = thread_id;
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_init_thread_local((GC_tlfs)(&(me->tlfs)));
+# endif
+ GC_ASSERT(!GC_please_stop || GC_win32_dll_threads);
+ /* Otherwise both we and the thread stopping code would be */
+ /* holding the allocation lock. */
/* If this thread is being created while we are trying to stop */
/* the world, wait here. Hopefully this can't happen on any */
/* systems that don't allow us to block here. */
while (GC_please_stop) Sleep(20);
- return thread_table + i;
+ return (GC_thread)(me);
}
/*
@@ -187,149 +339,326 @@ LONG GC_get_max_thread_index()
/* without a lock, but should be called in contexts in which the */
/* requested thread cannot be asynchronously deleted, e.g. from the */
/* thread itself. */
-static GC_thread GC_lookup_thread(DWORD thread_id) {
- int i;
- LONG my_max = GC_get_max_thread_index();
-
- for (i = 0;
+/* This version assumes that either GC_win32_dll_threads is set, or */
+/* we hold the allocator lock. */
+/* Also used (for assertion checking only) from thread_local_alloc.c. */
+GC_thread GC_lookup_thread_inner(DWORD thread_id) {
+ if (GC_win32_dll_threads) {
+ int i;
+ LONG my_max = GC_get_max_thread_index();
+ for (i = 0;
i <= my_max &&
- (!AO_load_acquire(&(thread_table[i].in_use))
- || thread_table[i].id != thread_id);
+ (!AO_load_acquire(&(dll_thread_table[i].in_use))
+ || dll_thread_table[i].id != thread_id);
/* Must still be in_use, since nobody else can store our thread_id. */
i++) {}
- if (i > my_max) {
- return 0;
+ if (i > my_max) {
+ return 0;
+ } else {
+ return (GC_thread)(dll_thread_table + i);
+ }
} else {
- return thread_table + i;
+ int hv = ((word)thread_id) % THREAD_TABLE_SZ;
+ register GC_thread p = GC_threads[hv];
+
+ GC_ASSERT(I_HOLD_LOCK());
+ while (p != 0 && p -> id != thread_id) p = p -> next;
+ return(p);
}
}
-int GC_register_my_thread(struct GC_stack_base *sb) {
- DWORD t = GetCurrentThreadId();
-
- if (0 == GC_lookup_thread(t)) {
- /* We lock here, since we want to wait for an ongoing GC. */
+/* A version of the above that acquires the lock if necessary. Note */
+/* that the identically named function for pthreads is different, and */
+/* just assumes we hold the lock. */
+/* Also used (for assertion checking only) from thread_local_alloc.c. */
+static GC_thread GC_lookup_thread(DWORD thread_id)
+{
+ if (GC_win32_dll_threads) {
+ return GC_lookup_thread_inner(thread_id);
+ } else {
+ GC_thread result;
LOCK();
- GC_register_my_thread_inner(sb, t);
+ result = GC_lookup_thread_inner(thread_id);
UNLOCK();
- return GC_SUCCESS;
- } else {
- return GC_DUPLICATE;
+ return result;
}
}
-/* This is intended to be lock-free. */
-/* It is either called synchronously from the thread being deleted, */
-/* or by the joining thread. */
-static void GC_delete_gc_thread(GC_thread thr)
+/* If a thread has been joined, but we have not yet */
+/* been notified, then there may be more than one thread */
+/* in the table with the same win32 id. */
+/* This is OK, but we need a way to delete a specific one. */
+/* Assumes we hold the allocation lock unless */
+/* GC_win32_dll_threads is set. */
+/* If GC_win32_dll_threads is set it should be called from the */
+/* thread being deleted. */
+void GC_delete_gc_thread(GC_thread gc_id)
{
- CloseHandle(thr->handle);
+ if (GC_win32_dll_threads) {
+ /* This is intended to be lock-free. */
+ /* It is either called synchronously from the thread being deleted, */
+ /* or by the joining thread. */
+ CloseHandle(gc_id->handle);
/* cast away volatile qualifier */
- thr->stack_base = 0;
- thr->id = 0;
+ gc_id -> stack_base = 0;
+ gc_id -> id = 0;
# ifdef CYGWIN32
- thr->pthread_id = 0;
+ gc_id -> pthread_id = 0;
# endif /* CYGWIN32 */
- AO_store_release(&(thr->in_use), FALSE);
+ AO_store_release(&(gc_id->in_use), FALSE);
+ } else {
+ DWORD id = gc_id -> id;
+ int hv = ((word)id) % THREAD_TABLE_SZ;
+ register GC_thread p = GC_threads[hv];
+ register GC_thread prev = 0;
+
+ GC_ASSERT(I_HOLD_LOCK());
+ while (p != gc_id) {
+ prev = p;
+ p = p -> next;
+ }
+ if (prev == 0) {
+ GC_threads[hv] = p -> next;
+ } else {
+ prev -> next = p -> next;
+ }
+ GC_INTERNAL_FREE(p);
+ }
}
+/* Delete a thread from GC_threads. We assume it is there. */
+/* (The code intentionally traps if it wasn't.) */
+/* Assumes we hold the allocation lock unless */
+/* GC_win32_dll_threads is set. */
+/* If GC_win32_dll_threads is set it should be called from the */
+/* thread being deleted. */
+void GC_delete_thread(DWORD id)
+{
+ if (GC_win32_dll_threads) {
+ GC_thread t = GC_lookup_thread_inner(id);
-static void GC_delete_thread(DWORD thread_id) {
- GC_thread t = GC_lookup_thread(thread_id);
+ if (0 == t) {
+ WARN("Removing nonexistent thread %ld\n", (GC_word)id);
+ } else {
+ GC_delete_gc_thread(t);
+ }
+ } else {
+ int hv = ((word)id) % THREAD_TABLE_SZ;
+ register GC_thread p = GC_threads[hv];
+ register GC_thread prev = 0;
+
+ GC_ASSERT(I_HOLD_LOCK());
+ while (p -> id != id) {
+ prev = p;
+ p = p -> next;
+ }
+ if (prev == 0) {
+ GC_threads[hv] = p -> next;
+ } else {
+ prev -> next = p -> next;
+ }
+ GC_INTERNAL_FREE(p);
+ }
+}
+
+int GC_register_my_thread(struct GC_stack_base *sb) {
+ DWORD t = GetCurrentThreadId();
- if (0 == t) {
- WARN("Removing nonexistent thread %ld\n", (GC_word)thread_id);
+ if (0 == GC_lookup_thread(t)) {
+ /* We lock here, since we want to wait for an ongoing GC. */
+ LOCK();
+ GC_register_my_thread_inner(sb, t);
+ UNLOCK();
+ return GC_SUCCESS;
} else {
- GC_delete_gc_thread(t);
+ return GC_DUPLICATE;
}
}
int GC_unregister_my_thread(void)
{
- GC_delete_thread(GetCurrentThreadId());
+ if (GC_win32_dll_threads) {
+ /* Should we just ignore this? */
+ GC_delete_thread(GetCurrentThreadId());
+ } else {
+ LOCK();
+ GC_delete_thread(GetCurrentThreadId());
+ UNLOCK();
+ }
+# if defined(THREAD_LOCAL_ALLOC)
+ LOCK();
+ {
+ GC_thread me = GC_lookup_thread_inner(GetCurrentThreadId());
+ GC_destroy_thread_local(&(me->tlfs));
+ }
+ UNLOCK();
+# endif
return GC_SUCCESS;
}
#ifdef CYGWIN32
+/* A quick-and-dirty cache of the mapping between pthread_t */
+/* and win32 thread id. */
+#define PTHREAD_MAP_SIZE 512
+DWORD GC_pthread_map_cache[PTHREAD_MAP_SIZE];
+#define HASH(pthread_id) ((((word)(pthread_id) >> 5)) % PTHREAD_MAP_SIZE)
+ /* It appears pthread_t is really a pointer type ... */
+#define SET_PTHREAD_MAP_CACHE(pthread_id, win32_id) \
+ GC_pthread_map_cache[HASH(pthread_id)] = (win32_id);
+#define GET_PTHREAD_MAP_CACHE(pthread_id) \
+ GC_pthread_map_cache[HASH(pthread_id)]
+
/* Return a GC_thread corresponding to a given pthread_t. */
/* Returns 0 if it's not there. */
/* We assume that this is only called for pthread ids that */
/* have not yet terminated or are still joinable, and */
/* cannot be concurrently terminated. */
+/* Assumes we do NOT hold the allocation lock. */
static GC_thread GC_lookup_pthread(pthread_t id)
{
- int i;
- LONG my_max = GC_get_max_thread_index();
+ if (GC_win32_dll_threads) {
+ int i;
+ LONG my_max = GC_get_max_thread_index();
- for (i = 0;
- i <= my_max &&
- (!AO_load_acquire(&(thread_table[i].in_use))
- || thread_table[i].pthread_id != id);
+ for (i = 0;
+ i <= my_max &&
+ (!AO_load_acquire(&(dll_thread_table[i].in_use))
+ || dll_thread_table[i].pthread_id != id);
/* Must still be in_use, since nobody else can store our thread_id. */
i++);
- if (i > my_max) return 0;
- return thread_table + i;
+ if (i > my_max) return 0;
+ return (GC_thread)(dll_thread_table + i);
+ } else {
+ /* We first try the cache. If that fails, we use a very slow */
+ /* approach. */
+ int hv_guess = GET_PTHREAD_MAP_CACHE(id) % THREAD_TABLE_SZ;
+ int hv;
+ GC_thread p;
+
+ LOCK();
+ for (p = GC_threads[hv_guess]; 0 != p; p = p -> next) {
+ if (pthread_equal(p -> pthread_id, id))
+ goto foundit;
+ }
+ for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
+ for (p = GC_threads[hv]; 0 != p; p = p -> next) {
+ if (pthread_equal(p -> pthread_id, id))
+ goto foundit;
+ }
+ }
+ p = 0;
+ foundit:
+ UNLOCK();
+ return p;
+ }
}
#endif /* CYGWIN32 */
void GC_push_thread_structures(void)
{
+ GC_ASSERT(I_HOLD_LOCK());
+ if (GC_win32_dll_threads) {
/* Unlike the other threads implementations, the thread table here */
/* contains no pointers to the collectable heap. Thus we have */
/* no private structures we need to preserve. */
-# ifdef CYGWIN32
- { int i; /* pthreads may keep a pointer in the thread exit value */
- LONG my_max = GC_get_max_thread_index();
+# ifdef CYGWIN32
+ { int i; /* pthreads may keep a pointer in the thread exit value */
+ LONG my_max = GC_get_max_thread_index();
- for (i = 0; i <= my_max; i++)
- if (thread_table[i].in_use)
- GC_push_all((ptr_t)&(thread_table[i].status),
- (ptr_t)(&(thread_table[i].status)+1));
+ for (i = 0; i <= my_max; i++)
+ if (dll_thread_table[i].in_use)
+ GC_push_all((ptr_t)&(dll_thread_table[i].status),
+ (ptr_t)(&(dll_thread_table[i].status)+1));
+ }
+# endif
+ } else {
+ GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
}
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_push_all((ptr_t)(&GC_thread_key),
+ (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
+ /* Just in case we ever use our own TLS implementation. */
# endif
}
+/* Suspend the given thread, if it's still active. */
+GC_suspend(GC_thread t)
+{
+# ifdef MSWINCE
+ /* SuspendThread will fail if thread is running kernel code */
+ while (SuspendThread(t -> handle) == (DWORD)-1)
+ Sleep(10);
+# else
+ /* Apparently the Windows 95 GetOpenFileName call creates */
+ /* a thread that does not properly get cleaned up, and */
+ /* SuspendThread on its descriptor may provoke a crash. */
+ /* This reduces the probability of that event, though it still */
+ /* appears there's a race here. */
+ DWORD exitCode;
+ if (GetExitCodeThread(t -> handle, &exitCode) &&
+ exitCode != STILL_ACTIVE) {
+ t -> stack_base = 0; /* prevent stack from being pushed */
+# ifndef CYGWIN32
+ /* this breaks pthread_join on Cygwin, which is guaranteed to */
+ /* only see user pthreads */
+ AO_store(&(t -> in_use), FALSE);
+ CloseHandle(t -> handle);
+# endif
+ return;
+ }
+ if (SuspendThread(t -> handle) == (DWORD)-1)
+ ABORT("SuspendThread failed");
+# endif
+ t -> suspended = TRUE;
+}
+
+/* Defined in misc.c */
+#ifndef CYGWIN32
+ extern CRITICAL_SECTION GC_write_cs;
+#endif
+
void GC_stop_world(void)
{
DWORD thread_id = GetCurrentThreadId();
int i;
if (!GC_thr_initialized) ABORT("GC_stop_world() called before GC_thr_init()");
+ GC_ASSERT(I_HOLD_LOCK());
GC_please_stop = TRUE;
- for (i = 0; i <= GC_get_max_thread_index(); i++)
- if (thread_table[i].stack_base != 0
- && thread_table[i].id != thread_id) {
-# ifdef MSWINCE
- /* SuspendThread will fail if thread is running kernel code */
- while (SuspendThread(thread_table[i].handle) == (DWORD)-1)
- Sleep(10);
-# else
- /* Apparently the Windows 95 GetOpenFileName call creates */
- /* a thread that does not properly get cleaned up, and */
- /* SuspendThread on its descriptor may provoke a crash. */
- /* This reduces the probability of that event, though it still */
- /* appears there's a race here. */
- DWORD exitCode;
- if (GetExitCodeThread(thread_table[i].handle,&exitCode) &&
- exitCode != STILL_ACTIVE) {
- thread_table[i].stack_base = 0; /* prevent stack from being pushed */
-# ifndef CYGWIN32
- /* this breaks pthread_join on Cygwin, which is guaranteed to */
- /* only see user pthreads */
- AO_store(&(thread_table[i].in_use), FALSE);
- CloseHandle(thread_table[i].handle);
-# endif
- continue;
- }
- if (SuspendThread(thread_table[i].handle) == (DWORD)-1)
- ABORT("SuspendThread failed");
-# endif
- thread_table[i].suspended = TRUE;
+# ifndef CYGWIN32
+ EnterCriticalSection(&GC_write_cs);
+# endif
+ if (GC_win32_dll_threads) {
+ /* Any threads being created during this loop will end up sleeping */
+ /* in the thread registration code until GC_please_stop becomes */
+ /* false. This is not ideal, but hopefully correct. */
+ for (i = 0; i <= GC_get_max_thread_index(); i++) {
+ volatile struct GC_Thread_Rep * t = dll_thread_table + i;
+ if (t -> stack_base != 0
+ && t -> id != thread_id) {
+ GC_suspend((GC_thread)t);
+ }
}
+ } else {
+ GC_thread t;
+ int i;
+
+ for (i = 0; i < THREAD_TABLE_SZ; i++) {
+ for (t = GC_threads[i]; t != 0; t = t -> next) {
+ if (t -> stack_base != 0
+ && t -> id != thread_id) {
+ GC_suspend(t);
+ }
+ }
+ }
+ }
+# ifndef CYGWIN32
+ LeaveCriticalSection(&GC_write_cs);
+# endif
}
void GC_start_world(void)
@@ -338,33 +667,34 @@ void GC_start_world(void)
int i;
LONG my_max = GC_get_max_thread_index();
- for (i = 0; i <= my_max; i++)
- if (thread_table[i].stack_base != 0 && thread_table[i].suspended
- && thread_table[i].id != thread_id) {
- if (ResumeThread(thread_table[i].handle) == (DWORD)-1)
- ABORT("ResumeThread failed");
- thread_table[i].suspended = FALSE;
+ GC_ASSERT(I_HOLD_LOCK());
+ if (GC_win32_dll_threads) {
+ for (i = 0; i <= my_max; i++) {
+ GC_thread t = (GC_thread)(dll_thread_table + i);
+ if (t -> stack_base != 0 && t -> suspended
+ && t -> id != thread_id) {
+ if (ResumeThread(t -> handle) == (DWORD)-1)
+ ABORT("ResumeThread failed");
+ t -> suspended = FALSE;
+ }
}
- GC_please_stop = FALSE;
-}
-
-# ifdef _MSC_VER
-# pragma warning(disable:4715)
-# endif
-ptr_t GC_current_stackbottom(void)
-{
- DWORD thread_id = GetCurrentThreadId();
- int i;
- LONG my_max = GC_get_max_thread_index();
+ } else {
+ GC_thread t;
+ int i;
- for (i = 0; i <= my_max; i++)
- if (thread_table[i].stack_base && thread_table[i].id == thread_id)
- return thread_table[i].stack_base;
- ABORT("no thread table entry for current thread");
+ for (i = 0; i < THREAD_TABLE_SZ; i++) {
+ for (t = GC_threads[i]; t != 0; t = t -> next) {
+ if (t -> stack_base != 0 && t -> suspended
+ && t -> id != thread_id) {
+ if (ResumeThread(t -> handle) == (DWORD)-1)
+ ABORT("ResumeThread failed");
+ t -> suspended = FALSE;
+ }
+ }
+ }
+ }
+ GC_please_stop = FALSE;
}
-# ifdef _MSC_VER
-# pragma warning(default:4715)
-# endif
# ifdef MSWINCE
/* The VirtualQuery calls below won't work properly on WinCE, but */
@@ -387,26 +717,19 @@ ptr_t GC_current_stackbottom(void)
}
# endif
-void GC_push_all_stacks(void)
+void GC_push_stack_for(GC_thread thread)
{
- DWORD thread_id = GetCurrentThreadId();
- GC_bool found_me = FALSE;
- int i;
- int dummy;
- ptr_t sp, stack_min;
- GC_thread thread;
- LONG my_max = GC_get_max_thread_index();
-
- for (i = 0; i <= my_max; i++) {
- thread = thread_table + i;
- if (thread -> in_use && thread -> stack_base) {
- if (thread -> id == thread_id) {
+ int dummy;
+ ptr_t sp, stack_min;
+ DWORD me = GetCurrentThreadId();
+
+ if (thread -> stack_base) {
+ if (thread -> id == me) {
sp = (ptr_t) &dummy;
- found_me = TRUE;
} else {
CONTEXT context;
context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL;
- if (!GetThreadContext(thread_table[i].handle, &context))
+ if (!GetThreadContext(thread -> handle, &context))
ABORT("GetThreadContext failed");
/* Push all registers that might point into the heap. Frame */
@@ -446,17 +769,49 @@ void GC_push_all_stacks(void)
# else
# error "architecture is not supported"
# endif
- }
+ } /* ! current thread */
stack_min = GC_get_stack_min(thread->stack_base);
- if (sp >= stack_min && sp < thread->stack_base)
+ if (sp >= stack_min && sp < thread->stack_base) {
+# if DEBUG_CYGWIN_THREADS
+ GC_printf("Pushing thread from %p to %p for %d from %d\n",
+ sp, thread -> stack_base, thread -> id, me);
+# endif
GC_push_all_stack(sp, thread->stack_base);
- else {
+ } else {
WARN("Thread stack pointer 0x%lx out of range, pushing everything\n",
(unsigned long)sp);
GC_push_all_stack(stack_min, thread->stack_base);
}
+ } /* thread looks live */
+}
+
+void GC_push_all_stacks(void)
+{
+ DWORD me = GetCurrentThreadId();
+ GC_bool found_me = FALSE;
+
+ if (GC_win32_dll_threads) {
+ int i;
+ LONG my_max = GC_get_max_thread_index();
+
+ for (i = 0; i <= my_max; i++) {
+ GC_thread t = (GC_thread)(dll_thread_table + i);
+ if (t -> in_use) {
+ GC_push_stack_for(t);
+ if (t -> id == me) found_me = TRUE;
+ }
+ }
+ } else {
+ GC_thread t;
+ int i;
+
+ for (i = 0; i < THREAD_TABLE_SZ; i++) {
+ for (t = GC_threads[i]; t != 0; t = t -> next) {
+ GC_push_stack_for(t);
+ if (t -> id == me) found_me = TRUE;
+ }
}
}
if (!found_me) ABORT("Collecting from unknown thread.");
@@ -467,14 +822,29 @@ void GC_get_next_stack(char *start, char **lo, char **hi)
int i;
# define ADDR_LIMIT (char *)(-1L)
char * current_min = ADDR_LIMIT;
- LONG my_max = GC_get_max_thread_index();
+
+ if (GC_win32_dll_threads) {
+ LONG my_max = GC_get_max_thread_index();
- for (i = 0; i <= my_max; i++) {
- char * s = (char *)thread_table[i].stack_base;
+ for (i = 0; i <= my_max; i++) {
+ ptr_t s = (ptr_t)(dll_thread_table[i].stack_base);
if (0 != s && s > start && s < current_min) {
current_min = s;
}
+ }
+ } else {
+ for (i = 0; i < THREAD_TABLE_SZ; i++) {
+ GC_thread t;
+
+ for (t = GC_threads[i]; t != 0; t = t -> next) {
+ ptr_t s = (ptr_t)(t -> stack_base);
+
+ if (0 != s && s > start && s < current_min) {
+ current_min = s;
+ }
+ }
+ }
}
*hi = current_min;
if (current_min == ADDR_LIMIT) {
@@ -487,21 +857,6 @@ void GC_get_next_stack(char *start, char **lo, char **hi)
#if !defined(CYGWIN32)
-#if !defined(MSWINCE) && defined(GC_DLL)
-
-/* We register threads from DllMain */
-
-GC_API HANDLE WINAPI GC_CreateThread(
- LPSECURITY_ATTRIBUTES lpThreadAttributes,
- DWORD dwStackSize, LPTHREAD_START_ROUTINE lpStartAddress,
- LPVOID lpParameter, DWORD dwCreationFlags, LPDWORD lpThreadId )
-{
- return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
- lpParameter, dwCreationFlags, lpThreadId);
-}
-
-#else /* defined(MSWINCE) || !defined(GC_DLL)) */
-
/* We have no DllMain to take care of new threads. Thus we */
/* must properly intercept thread creation. */
@@ -521,37 +876,42 @@ GC_API HANDLE WINAPI GC_CreateThread(
thread_args *args;
- if (!GC_is_initialized) GC_init();
- /* make sure GC is initialized (i.e. main thread is attached) */
-
- args = GC_malloc_uncollectable(sizeof(thread_args));
+ if (!parallel_initialized) GC_init_parallel();
+ /* make sure GC is initialized (i.e. main thread is attached,
+ tls initialized) */
+
+ client_has_run = TRUE;
+ if (GC_win32_dll_threads) {
+ return CreateThread(lpThreadAttributes, dwStackSize, lpStartAddress,
+ lpParameter, dwCreationFlags, lpThreadId);
+ } else {
+ args = GC_malloc_uncollectable(sizeof(thread_args));
/* Handed off to and deallocated by child thread. */
- if (0 == args) {
+ if (0 == args) {
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
return NULL;
- }
+ }
- /* set up thread arguments */
+ /* set up thread arguments */
args -> start = lpStartAddress;
args -> param = lpParameter;
- GC_need_to_lock = TRUE;
- thread_h = CreateThread(lpThreadAttributes,
- dwStackSize, thread_start,
- args, dwCreationFlags,
- lpThreadId);
+ GC_need_to_lock = TRUE;
+ thread_h = CreateThread(lpThreadAttributes,
+ dwStackSize, GC_win32_start,
+ args, dwCreationFlags,
+ lpThreadId);
- return thread_h;
+ return thread_h;
+ }
}
-static DWORD WINAPI thread_start(LPVOID arg)
+void * GC_win32_start_inner(struct GC_stack_base *sb, LPVOID arg)
{
- DWORD ret = 0;
+ void * ret;
thread_args *args = (thread_args *)arg;
- struct GC_stack_base *sb;
- GC_get_stack_base(&sb);
- GC_register_my_thread(&sb); /* This waits for an in-progress GC. */
+ GC_register_my_thread(sb); /* This waits for an in-progress GC. */
/* Clear the thread entry even if we exit with an exception. */
/* This is probably pointless, since an uncaught exception is */
@@ -559,10 +919,15 @@ static DWORD WINAPI thread_start(LPVOID arg)
#ifndef __GNUC__
__try {
#endif /* __GNUC__ */
- ret = args->start (args->param);
+ ret = (void *)args->start (args->param);
#ifndef __GNUC__
} __finally {
#endif /* __GNUC__ */
+# if defined(THREAD_LOCAL_ALLOC)
+ LOCK();
+ GC_destroy_thread_local(&(me->tlfs));
+ UNLOCK();
+# endif
GC_free(args);
GC_delete_thread(GetCurrentThreadId());
#ifndef __GNUC__
@@ -571,8 +936,11 @@ static DWORD WINAPI thread_start(LPVOID arg)
return ret;
}
-#endif /* !defined(MSWINCE) && !(defined(__MINGW32__) && !defined(_DLL)) */
+DWORD WINAPI GC_win32_start(struct GC_stack_base *sb, LPVOID arg)
+{
+ return (DWORD)GC_call_with_stack_base(GC_win32_start_inner, arg);
+}
#endif /* !CYGWIN32 */
#ifdef MSWINCE
@@ -630,13 +998,16 @@ DWORD WINAPI main_thread_start(LPVOID arg)
/* Called by GC_init() - we hold the allocation lock. */
void GC_thr_init(void) {
struct GC_stack_base sb;
+ int sb_result;
+ GC_ASSERT(I_HOLD_LOCK());
if (GC_thr_initialized) return;
GC_main_thread = GetCurrentThreadId();
GC_thr_initialized = TRUE;
/* Add the initial thread, so we can stop it. */
- GC_get_stack_base(&sb);
+ sb_result = GC_get_stack_base(&sb);
+ GC_ASSERT(sb_result == GC_SUCCESS);
GC_register_my_thread(&sb);
}
@@ -658,6 +1029,7 @@ int GC_pthread_join(pthread_t pthread_id, void **retval) {
(int)pthread_self(), GetCurrentThreadId(), (int)pthread_id);
# endif
+ client_has_run = TRUE;
/* Thread being joined might not have registered itself yet. */
/* After the join,thread id may have been recycled. */
/* FIXME: It would be better if this worked more like */
@@ -667,9 +1039,11 @@ int GC_pthread_join(pthread_t pthread_id, void **retval) {
result = pthread_join(pthread_id, retval);
- /* FIXME: This is an asynchronous deletion, which we said can't */
- /* happen? */
- GC_delete_gc_thread(joinee);
+ if (!GC_win32_dll_threads) {
+ LOCK();
+ GC_delete_gc_thread(joinee);
+ UNLOCK();
+ } /* otherwise dllmain handles it. */
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) completed join with thread 0x%x.\n",
@@ -690,8 +1064,12 @@ GC_pthread_create(pthread_t *new_thread,
int result;
struct start_info * si;
- if (!GC_is_initialized) GC_init();
+ if (!parallel_initialized) GC_init_parallel();
/* make sure GC is initialized (i.e. main thread is attached) */
+ client_has_run = TRUE;
+ if (GC_win32_dll_threads) {
+ return pthread_create(new_thread, attr, start_routine, arg);
+ }
/* This is otherwise saved only in an area mmapped by the thread */
/* library, which isn't visible to the collector. */
@@ -711,7 +1089,7 @@ GC_pthread_create(pthread_t *new_thread,
(int)pthread_self(), GetCurrentThreadId);
# endif
GC_need_to_lock = TRUE;
- result = pthread_create(new_thread, attr, GC_start_routine, si);
+ result = pthread_create(new_thread, attr, GC_pthread_start, si);
if (result) { /* failure */
GC_free(si);
@@ -720,24 +1098,24 @@ GC_pthread_create(pthread_t *new_thread,
return(result);
}
-void * GC_start_routine(void * arg)
+void * GC_pthread_start_inner(struct GC_stack_base *sb, void * arg)
{
struct start_info * si = arg;
void * result;
void *(*start)(void *);
void *start_arg;
- pthread_t pthread_id;
DWORD thread_id = GetCurrentThreadId();
+ pthread_t pthread_id = pthread_self();
GC_thread me;
GC_bool detached;
int i;
- struct GC_stack_base sb;
# if DEBUG_CYGWIN_THREADS
- GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_self(),
+ GC_printf("thread 0x%x(0x%x) starting...\n",(int)pthread_id,
thread_id);
# endif
+ GC_ASSERT(!GC_win32_dll_threads);
/* If a GC occurs before the thread is registered, that GC will */
/* ignore this thread. That's fine, since it will block trying to */
/* acquire the allocation lock, and won't yet hold interesting */
@@ -745,14 +1123,14 @@ void * GC_start_routine(void * arg)
LOCK();
/* We register the thread here instead of in the parent, so that */
/* we don't need to hold the allocation lock during pthread_create. */
- GC_get_stack_base(&sb);
- me = GC_register_my_thread_inner(&sb, thread_id);
+ me = GC_register_my_thread_inner(sb, thread_id);
+ SET_PTHREAD_MAP_CACHE(pthread_id, thread_id);
UNLOCK();
start = si -> start_routine;
start_arg = si -> arg;
if (si-> detached) me -> flags |= DETACHED;
- me -> pthread_id = pthread_id = pthread_self();
+ me -> pthread_id = pthread_id;
GC_free(si); /* was allocated uncollectable */
@@ -769,17 +1147,26 @@ void * GC_start_routine(void * arg)
return(result);
}
+void * GC_pthread_start(void * arg)
+{
+ return GC_call_with_stack_base(GC_pthread_start_inner, arg);
+}
+
void GC_thread_exit_proc(void *arg)
{
GC_thread me = (GC_thread)arg;
int i;
+ GC_ASSERT(!GC_win32_dll_threads);
# if DEBUG_CYGWIN_THREADS
GC_printf("thread 0x%x(0x%x) called pthread_exit().\n",
(int)pthread_self(),GetCurrentThreadId());
# endif
LOCK();
+# if defined(THREAD_LOCAL_ALLOC)
+ GC_destroy_thread_local(&(me->tlfs));
+# endif
if (me -> flags & DETACHED) {
GC_delete_thread(GetCurrentThreadId());
} else {
@@ -791,6 +1178,7 @@ void GC_thread_exit_proc(void *arg)
/* nothing required here... */
int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset) {
+ client_has_run = TRUE;
return pthread_sigmask(how, set, oset);
}
@@ -799,6 +1187,7 @@ int GC_pthread_detach(pthread_t thread)
int result;
GC_thread thread_gc_id;
+ client_has_run = TRUE;
LOCK();
thread_gc_id = GC_lookup_pthread(thread);
UNLOCK();
@@ -827,6 +1216,9 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved)
{
struct GC_stack_base sb;
DWORD thread_id;
+ int sb_result;
+
+ if (client_has_run && !GC_win32_dll_threads) return TRUE;
switch (reason) {
case DLL_PROCESS_ATTACH:
@@ -837,28 +1229,32 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved)
thread_id = GetCurrentThreadId();
if (GC_main_thread != thread_id) {
/* Don't lock here. */
- GC_get_stack_base(&sb);
+ sb_result = GC_get_stack_base(&sb);
+ GC_ASSERT(sb_result == GC_SUCCESS);
+# ifdef THREAD_LOCAL_ALLOC
+ ABORT("Cannot initialize thread local cache from DllMain");
+# endif
GC_register_my_thread_inner(&sb, thread_id);
} /* o.w. we already did it during GC_thr_init(), called by GC_init() */
break;
case DLL_THREAD_DETACH:
- LOCK(); /* Safe? DllMain description is ambiguous. */
+ /* We are hopefully running in the context of the exiting thread. */
+ client_has_run = TRUE;
+ if (!GC_win32_dll_threads) return TRUE;
GC_delete_thread(GetCurrentThreadId());
- UNLOCK();
break;
case DLL_PROCESS_DETACH:
{
int i;
- LOCK();
+ if (!GC_win32_dll_threads) return TRUE;
for (i = 0; i <= GC_get_max_thread_index(); ++i)
{
- if (AO_load(&(thread_table[i].in_use)))
- GC_delete_gc_thread(thread_table + i);
+ if (AO_load(&(dll_thread_table[i].in_use)))
+ GC_delete_gc_thread(dll_thread_table + i);
}
- UNLOCK();
GC_deinit();
DeleteCriticalSection(&GC_allocate_ml);
@@ -873,12 +1269,80 @@ BOOL WINAPI DllMain(HINSTANCE inst, ULONG reason, LPVOID reserved)
# endif /* !MSWINCE */
-# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+/* Perform all initializations, including those that */
+/* may require allocation. */
+/* Called without allocation lock. */
+/* Must be called before a second thread is created. */
+void GC_init_parallel(void)
+{
+ if (parallel_initialized) return;
+ parallel_initialized = TRUE;
+
+ /* GC_init() calls us back, so set flag first. */
+ if (!GC_is_initialized) GC_init();
+ /* Initialize thread local free lists if used. */
+# if defined(THREAD_LOCAL_ALLOC)
+ LOCK();
+ GC_init_thread_local(&(GC_lookup_thread(GetCurrentThreadId())->tlfs));
+ UNLOCK();
+# endif
+}
+
+#if defined(USE_PTHREAD_LOCKS)
+ /* Support for pthread locking code. */
+ /* Pthread_mutex_try_lock may not win here, */
+ /* due to builtinsupport for spinning first? */
+
+volatile GC_bool GC_collecting = 0;
+ /* A hint that we're in the collector and */
+ /* holding the allocation lock for an */
+ /* extended period. */
+
+void GC_lock(void)
+{
+ pthread_mutex_lock(&GC_allocate_ml);
+}
+#endif /* USE_PTHREAD ... */
-/* We don't really support thread-local allocation with DBG_HDRS_ALL */
+# if defined(THREAD_LOCAL_ALLOC)
/* Add thread-local allocation support. Microsoft uses __declspec(thread) */
+/* We must explicitly mark ptrfree and gcj free lists, since the free */
+/* list links wouldn't otherwise be found. We also set them in the */
+/* normal free lists, since that involves touching less memory than if */
+/* we scanned them normally. */
+void GC_mark_thread_local_free_lists(void)
+{
+ int i;
+ GC_thread p;
+
+ for (i = 0; i < THREAD_TABLE_SZ; ++i) {
+ for (p = GC_threads[i]; 0 != p; p = p -> next) {
+ GC_mark_thread_local_fls_for(&(p->tlfs));
+ }
+ }
+}
+
+#if defined(GC_ASSERTIONS)
+ /* Check that all thread-local free-lists are completely marked. */
+ /* also check that thread-specific-data structures are marked. */
+ void GC_check_tls(void) {
+ int i;
+ GC_thread p;
+
+ for (i = 0; i < THREAD_TABLE_SZ; ++i) {
+ for (p = GC_threads[i]; 0 != p; p = p -> next) {
+ GC_check_tls_for(&(p->tlfs));
+ }
+ }
+# if defined(USE_CUSTOM_SPECIFIC)
+ if (GC_thread_key != 0)
+ GC_check_tsd_marks(GC_thread_key);
+# endif
+ }
+#endif /* GC_ASSERTIONS */
+
#endif /* THREAD_LOCAL_ALLOC ... */
#endif /* GC_WIN32_THREADS */