diff options
author | jsturm <jsturm@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-07-28 03:46:07 +0000 |
---|---|---|
committer | jsturm <jsturm@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-07-28 03:46:07 +0000 |
commit | 715e3bd3f343537e75f28ab83f73d8c20ed7929f (patch) | |
tree | c698ff9b5618f81b32dbda3a94016046ef2e1a48 /boehm-gc/doc | |
parent | 2f61b8cae7e804b377aede07f9d06291244ff64d (diff) | |
download | gcc-715e3bd3f343537e75f28ab83f73d8c20ed7929f.tar.gz |
Initial revision
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@69872 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'boehm-gc/doc')
-rw-r--r-- | boehm-gc/doc/Makefile.am | 27 | ||||
-rw-r--r-- | boehm-gc/doc/Makefile.in | 282 | ||||
-rw-r--r-- | boehm-gc/doc/README.DGUX386 | 215 | ||||
-rw-r--r-- | boehm-gc/doc/README.arm.cross | 68 | ||||
-rw-r--r-- | boehm-gc/doc/README.darwin | 106 | ||||
-rw-r--r-- | boehm-gc/doc/gcinterface.html | 203 | ||||
-rw-r--r-- | boehm-gc/doc/leak.html | 197 | ||||
-rw-r--r-- | boehm-gc/doc/scale.html | 210 |
8 files changed, 1308 insertions, 0 deletions
diff --git a/boehm-gc/doc/Makefile.am b/boehm-gc/doc/Makefile.am new file mode 100644 index 00000000000..91446305581 --- /dev/null +++ b/boehm-gc/doc/Makefile.am @@ -0,0 +1,27 @@ +# +# +# THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED +# OR IMPLIED. ANY USE IS AT YOUR OWN RISK. +# +# Permission is hereby granted to use or copy this program +# for any purpose, provided the above notices are retained on all copies. +# Permission to modify the code and to distribute modified code is granted, +# provided the above notices are retained, and a notice that the code was +# modified is included with the above copyright notice. +# +# Modified by: Grzegorz Jakacki <jakacki at acm dot org> + +## Process this file with automake to produce Makefile.in. + +# installed documentation +# +dist_pkgdata_DATA = barrett_diagram debugging.html gc.man \ + gcdescr.html README README.amiga README.arm.cross \ + README.autoconf README.changes README.contributors \ + README.cords README.DGUX386 README.dj README.environment \ + README.ews4800 README.hp README.linux README.Mac \ + README.MacOSX README.macros README.OS2 README.rs6000 \ + README.sgi README.solaris2 README.uts README.win32 \ + tree.html leak.html gcinterface.html scale.html \ + README.darwin + diff --git a/boehm-gc/doc/Makefile.in b/boehm-gc/doc/Makefile.in new file mode 100644 index 00000000000..9bf1ff5fead --- /dev/null +++ b/boehm-gc/doc/Makefile.in @@ -0,0 +1,282 @@ +# Makefile.in generated by automake 1.6.3 from Makefile.am. +# @configure_input@ + +# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +# Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# +# +# THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED +# OR IMPLIED. ANY USE IS AT YOUR OWN RISK. +# +# Permission is hereby granted to use or copy this program +# for any purpose, provided the above notices are retained on all copies. +# Permission to modify the code and to distribute modified code is granted, +# provided the above notices are retained, and a notice that the code was +# modified is included with the above copyright notice. +# +# Modified by: Grzegorz Jakacki <jakacki at acm dot org> +SHELL = @SHELL@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. + +ACLOCAL = @ACLOCAL@ +AUTOCONF = @AUTOCONF@ +AUTOMAKE = @AUTOMAKE@ +AUTOHEADER = @AUTOHEADER@ + +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_HEADER = $(INSTALL_DATA) +transform = @program_transform_name@ +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +host_alias = @host_alias@ +host_triplet = @host@ + +EXEEXT = @EXEEXT@ +OBJEXT = @OBJEXT@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +AMTAR = @AMTAR@ +AR = @AR@ +AS = @AS@ +AWK = @AWK@ +CC = @CC@ +CCAS = @CCAS@ +CCASFLAGS = @CCASFLAGS@ +CFLAGS = @CFLAGS@ +CXX = @CXX@ +CXXFLAGS = @CXXFLAGS@ +CXXINCLUDES = @CXXINCLUDES@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +ECHO = @ECHO@ +EXTRA_TEST_LIBS = @EXTRA_TEST_LIBS@ +GC_CFLAGS = @GC_CFLAGS@ +GC_VERSION = @GC_VERSION@ +INCLUDES = @INCLUDES@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +MAINT = @MAINT@ +MY_CFLAGS = @MY_CFLAGS@ +OBJDUMP = @OBJDUMP@ +PACKAGE = @PACKAGE@ +RANLIB = @RANLIB@ +STRIP = @STRIP@ +THREADLIBS = @THREADLIBS@ +VERSION = @VERSION@ +addincludes = @addincludes@ +addlibs = @addlibs@ +addobjs = @addobjs@ +addtests = @addtests@ +am__include = @am__include@ +am__quote = @am__quote@ +install_sh = @install_sh@ +target_all = @target_all@ + +# installed documentation +# +dist_pkgdata_DATA = barrett_diagram debugging.html gc.man \ + gcdescr.html README README.amiga README.arm.cross \ + README.autoconf README.changes README.contributors \ + README.cords README.DGUX386 README.dj README.environment \ + README.ews4800 README.hp README.linux README.Mac \ + README.MacOSX README.macros README.OS2 README.rs6000 \ + README.sgi README.solaris2 README.uts README.win32 \ + tree.html leak.html gcinterface.html scale.html \ + README.darwin + +subdir = doc +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_CLEAN_FILES = +DIST_SOURCES = +DATA = $(dist_pkgdata_DATA) + +DIST_COMMON = README $(dist_pkgdata_DATA) Makefile.am Makefile.in +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu doc/Makefile +Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe) + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: +dist_pkgdataDATA_INSTALL = $(INSTALL_DATA) +install-dist_pkgdataDATA: $(dist_pkgdata_DATA) + @$(NORMAL_INSTALL) + $(mkinstalldirs) $(DESTDIR)$(pkgdatadir) + @list='$(dist_pkgdata_DATA)'; for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " $(dist_pkgdataDATA_INSTALL) $$d$$p $(DESTDIR)$(pkgdatadir)/$$f"; \ + $(dist_pkgdataDATA_INSTALL) $$d$$p $(DESTDIR)$(pkgdatadir)/$$f; \ + done + +uninstall-dist_pkgdataDATA: + @$(NORMAL_UNINSTALL) + @list='$(dist_pkgdata_DATA)'; for p in $$list; do \ + f="`echo $$p | sed -e 's|^.*/||'`"; \ + echo " rm -f $(DESTDIR)$(pkgdatadir)/$$f"; \ + rm -f $(DESTDIR)$(pkgdatadir)/$$f; \ + done +tags: TAGS +TAGS: + +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) + +top_distdir = .. +distdir = $(top_distdir)/$(PACKAGE)-$(VERSION) + +distdir: $(DISTFILES) + @list='$(DISTFILES)'; for file in $$list; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkinstalldirs) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(DATA) + +installdirs: + $(mkinstalldirs) $(DESTDIR)$(pkgdatadir) + +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -rm -f Makefile $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + +distclean-am: clean-am distclean-generic distclean-libtool + +dvi: dvi-am + +dvi-am: + +info: info-am + +info-am: + +install-data-am: install-dist_pkgdataDATA + +install-exec-am: + +install-info: install-info-am + +install-man: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +uninstall-am: uninstall-dist_pkgdataDATA uninstall-info-am + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + distclean distclean-generic distclean-libtool distdir dvi \ + dvi-am info info-am install install-am install-data \ + install-data-am install-dist_pkgdataDATA install-exec \ + install-exec-am install-info install-info-am install-man \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool uninstall uninstall-am \ + uninstall-dist_pkgdataDATA uninstall-info-am + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/boehm-gc/doc/README.DGUX386 b/boehm-gc/doc/README.DGUX386 new file mode 100644 index 00000000000..9d6d84788ef --- /dev/null +++ b/boehm-gc/doc/README.DGUX386 @@ -0,0 +1,215 @@ + Garbage Collector (parallel iversion) for ix86 DG/UX Release R4.20MU07 + + + *READ* the file README.QUICK. + + You need the GCC-3.0.3 rev (DG/UX) compiler to build this tree. + This compiler has the new "dgux386" threads package implemented. + It also supports the switch "-pthread" needed to link correctly + the DG/UX's -lrte -lthread with -lgcc and the system's -lc. + Finally we support parralleli-mark for the SMP DG/UX machines. + To build the garbage collector do: + + ./configure --enable-parallel-mark + make + make gctest + + Before you run "gctest" you need to set your LD_LIBRARY_PATH + correctly so that "gctest" can find the shared library libgc. + Alternatively you can do a configuration + + ./configure --enable-parallel-mark --disable-shared + + to build only the static version of libgc. + + To enable debugging messages please do: + 1) Add the "--enable-full-debug" flag during configuration. + 2) Edit the file linux-threads.c and uncommnect the line: + + /* #define DEBUG_THREADS 1 */ to ---> + + #define DEBUG_THREADS 1 + + Then give "make" as usual. + + In a machine with 4 CPUs (my own machine) the option parallel + mark (aka --enable-parallel-mark) makes a BIG difference. + + Takis Psarogiannakopoulos + University of Cambridge + Centre for Mathematical Sciences + Department of Pure Mathematics + Wilberforce Road + Cambridge CB3 0WB ,UK , <takis@XFree86.Org> + January 2002 + + +Note (HB): + The integration of this patch is currently not complete. + The following patches against 6.1alpha3 where hard to move + to alpha4, and are not integrated. There may also be minor + problems with stylistic corrections made by me. + + +--- ltconfig.ORIG Mon Jan 28 20:22:18 2002 ++++ ltconfig Mon Jan 28 20:44:00 2002 +@@ -689,6 +689,11 @@ + pic_flag=-Kconform_pic + fi + ;; ++ dgux*) ++ pic_flag='-fPIC' ++ link_static='-Bstatic' ++ wl='-Wl,' ++ ;; + *) + pic_flag='-fPIC' + ;; +@@ -718,6 +723,12 @@ + # We can build DLLs from non-PIC. + ;; + ++ dgux*) ++ pic_flag='-KPIC' ++ link_static='-Bstatic' ++ wl='-Wl,' ++ ;; ++ + osf3* | osf4* | osf5*) + # All OSF/1 code is PIC. + wl='-Wl,' +@@ -1154,6 +1165,22 @@ + fi + ;; + ++ dgux*) ++ ld_shlibs=yes ++ # For both C/C++ ommit the deplibs. This is because we relying on the fact ++ # that compilation of execitables will put them in correct order ++ # in any case and sometimes are wrong when listed as deplibs (or missing some deplibs) ++ # However when GNU ld and --whole-archive needs to be used we have the problem ++ # that if the -fPIC *_s.a archive is linked through deplibs list we ommiting crucial ++ # .lo/.o files from the created shared lib. This I think is not the case here. ++ archive_cmds='$CC -shared -h $soname -o $lib $libobjs $linkopts' ++ thread_safe_flag_spec='-pthread' ++ wlarc= ++ hardcode_libdir_flag_spec='-L$libdir' ++ hardcode_shlibpath_var=no ++ ac_cv_archive_cmds_needs_lc=no ++ ;; ++ + cygwin* | mingw*) + # hardcode_libdir_flag_spec is actually meaningless, as there is + # no search path for DLLs. +@@ -1497,7 +1524,7 @@ + ;; + + dgux*) +- archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' ++ archive_cmds='$CC -shared -h $soname -o $lib $libobjs $linkopts' + hardcode_libdir_flag_spec='-L$libdir' + hardcode_shlibpath_var=no + ;; +@@ -2092,12 +2119,17 @@ + ;; + + dgux*) +- version_type=linux ++ version_type=dgux + need_lib_prefix=no + need_version=no +- library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' +- soname_spec='${libname}${release}.so$major' ++ library_names_spec='$libname.so$versuffix' ++ soname_spec='$libname.so$versuffix' + shlibpath_var=LD_LIBRARY_PATH ++ thread_safe_flag_spec='-pthread' ++ wlarc= ++ hardcode_libdir_flag_spec='-L$libdir' ++ hardcode_shlibpath_var=no ++ ac_cv_archive_cmds_needs_lc=no + ;; + + sysv4*MP*) + + +--- ltmain.sh.ORIG Mon Jan 28 20:31:18 2002 ++++ ltmain.sh Tue Jan 29 00:11:29 2002 +@@ -1072,11 +1072,38 @@ + esac + ;; + ++ -thread*) ++ # DG/UX GCC 2.95.x, 3.x.x rev (DG/UX) links -lthread ++ # with the switch -threads ++ if test "$arg" = "-threads"; then ++ case "$host" in ++ i[3456]86-*-dgux*) ++ deplibs="$deplibs $arg" ++ continue ++ ;; ++ esac ++ fi ++ ;; ++ ++ -pthread*) ++ # DG/UX GCC 2.95.x, 3.x.x rev (DG/UX) links -lthread ++ # with the switch -pthread ++ if test "$arg" = "-pthread"; then ++ case "$host" in ++ i[3456]86-*-dgux*) ++ deplibs="$deplibs $arg" ++ continue ++ ;; ++ esac ++ fi ++ ;; ++ + -l*) + if test "$arg" = "-lc"; then + case "$host" in +- *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*) ++ *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos* | i[3456]86-*-dgux*) + # These systems don't actually have c library (as such) ++ # It is wrong in DG/UX to add -lc when creating shared/dynamic objs/libs + continue + ;; + esac +@@ -1248,6 +1275,12 @@ + temp_deplibs= + for deplib in $dependency_libs; do + case "$deplib" in ++ -thread*) ++ temp_deplibs="$temp_deplibs $deplib" ++ ;; ++ -pthread) ++ temp_deplibs="$temp_deplibs $deplib" ++ ;; + -R*) temp_xrpath=`$echo "X$deplib" | $Xsed -e 's/^-R//'` + case " $rpath $xrpath " in + *" $temp_xrpath "*) ;; +@@ -1709,6 +1742,13 @@ + done + ;; + ++ dgux) ++ # Leave mostly blank for DG/UX ++ major= ++ versuffix=".$current.$revision"; ++ verstring= ++ ;; ++ + linux) + major=.`expr $current - $age` + versuffix="$major.$age.$revision" +@@ -1792,8 +1832,9 @@ + + dependency_libs="$deplibs" + case "$host" in +- *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*) ++ *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos* | i[3456]86-*-dgux*) + # these systems don't actually have a c library (as such)! ++ # It is wrong in DG/UX to add -lc when creating shared/dynamic objs/libs + ;; + *) + # Add libc to deplibs on all other systems. diff --git a/boehm-gc/doc/README.arm.cross b/boehm-gc/doc/README.arm.cross new file mode 100644 index 00000000000..96744edaf67 --- /dev/null +++ b/boehm-gc/doc/README.arm.cross @@ -0,0 +1,68 @@ +From: Margaret Fleck + +Here's the key details of what worked for me, in case anyone else needs them. +There may well be better ways to do some of this, but .... + -- Margaret + + +The badge4 has a StrongArm-1110 processor and a StrongArm-1111 coprocessor. + +Assume that the garbage collector distribution is unpacked into /home/arm/gc6.0, +which is visible to both the ARM machine and a linux desktop (e.g. via NFS mounting). + +Assume that you have a file /home/arm/config.site with contents something like the +example attached below. Notice that our local ARM toolchain lives in +/skiff/local. + +Go to /home/arm/gc6.0 directory. Do + CONFIG_SITE=/home/arm/config.site ./configure --target=arm-linux +--prefix=/home/arm/gc6.0 + +On your desktop, do: + make + make install +The main garbage collector library should now be in ../gc6.0/lib/libgc.so. + +To test the garbage collector, first do the following on your desktop + make gctest + ./gctest +Then do the following on the ARM machine + cd .libs + ./lt-gctest + +Do not try to do "make test" (the usual way of running the test +program). This does not work and seems to erase some of the important +files. + +The gctest program claims to have succeeded. Haven't run any further tests +with it, though I'll be doing so in the near future. + +------------------------------- +# config.site for configure + +# Modified from the one provided by Bradley D. LaRonde +# Edited by Andrej Cedilnik <acedil1@csee.umbc.edu> +# Used some of solutions by Tilman Vogel <Tilman.Vogel@web.de> +# Ported for iPAQ Familiar by Oliver Kurth <oliver.kurth@innominate.com> +# Further modified by Margaret Fleck for the badge4 + +HOSTCC=gcc + +# Names of the cross-compilers +CC=/skiff/local/bin/arm-linux-gcc +CXX=/skiff/local/bin/arm-linux-gcc + +# The cross compiler specific options +CFLAGS="-O2 -fno-exceptions" +CXXFLAGS="-O2 -fno-exceptions" +CPPFLAGS="-O2 -fno-exceptions" +LDFLAGS="" + +# Some other programs +AR=/skiff/local/bin/arm-linux-ar +RANLIB=/skiff/local/bin/arm-linux-ranlib +NM=/skiff/local/bin/arm-linux-nm +ac_cv_path_NM=/skiff/local/bin/arm-linux-nm +ac_cv_func_setpgrp_void=yes +x_includes=/skiff/local/arm-linux/include/X11 +x_libraries=/skiff/local/arm-linux/lib/X11 diff --git a/boehm-gc/doc/README.darwin b/boehm-gc/doc/README.darwin new file mode 100644 index 00000000000..3cd1b818b19 --- /dev/null +++ b/boehm-gc/doc/README.darwin @@ -0,0 +1,106 @@ +Darwin/MacOSX Support - July 22, 2003 +==================================== + +Important Usage Notes +===================== + +GC_init() MUST be called before calling any other GC functions. This +is necessary to properly register segments in dynamic libraries. This +call is required even if you code does not use dynamic libraries as the +dyld code handles registering all data segments. + +When your use of the garbage collector is confined to dylibs and you +cannot call GC_init() before your libraries' static initializers have +run and perhaps called GC_malloc(), create an initialization routine +for each library to call GC_init(): + +#include <gc/gc.h> +void my_library_init() { GC_init(); } + +Compile this code into a my_library_init.o, and link it into your +dylib. When you link the dylib, pass the -init argument with +_my_library_init (e.g. gcc -dynamiclib -o my_library.dylib a.o b.o c.o +my_library_init.o -init _my_library_init). This causes +my_library_init() to be called before any static initializers, and +will initialize the garbage collector properly. + +Note: It doesn't hurt to call GC_init() more than once, so it's best, +if you have an application or set of libraries that all use the +garbage collector, to create an initialization routine for each of +them that calls GC_init(). Better safe than sorry. + +The incremental collector is still a bit flaky on darwin. It seems to +work reliably with workarounds for a few possible bugs in place however +these workaround may not work correctly in all cases. There may also +be additional problems that I have not found. + +Implementation Information +========================== +Darwin/MacOSX support is nearly complete. Thread support is reliable on +Darwin 6.x (MacOSX 10.2) and there have been reports of success on older +Darwin versions (MacOSX 10.1). Shared library support had also been +added and the gc can be run from a shared library. There is currently only +support for Darwin/PPC although adding x86 support should be trivial. + +Thread support is implemented in terms of mach thread_suspend and +thread_resume calls. These provide a very clean interface to thread +suspension. This implementation doesn't rely on pthread_kill so the +code works on Darwin < 6.0 (MacOSX 10.1). All the code to stop the +world is located in darwin_stop_world.c. + +The original incremental collector support unfortunatelly no longer works +on recent Darwin versions. It also relied on some undocumented kernel +structures. Mach, however, does have a very clean interface to exception +handing. The current implementation uses Mach's exception handling. + +Much thanks goes to Andrew Stone, Dietmar Planitzer, Andrew Begel, +Jeff Sturm, and Jesse Rosenstock for all their work on the +Darwin/OS X port. + +-Brian Alliet +brian@brianweb.net + + +Older Information (Most of this no longer applies to the current code) +====================================================================== + +While the GC should work on MacOS X Server, MacOS X and Darwin, I only tested +it on MacOS X Server. +I've added a PPC assembly version of GC_push_regs(), thus the setjmp() hack is +no longer necessary. Incremental collection is supported via mprotect/signal. +The current solution isn't really optimal because the signal handler must decode +the faulting PPC machine instruction in order to find the correct heap address. +Further, it must poke around in the register state which the kernel saved away +in some obscure register state structure before it calls the signal handler - +needless to say the layout of this structure is no where documented. +Threads and dynamic libraries are not yet supported (adding dynamic library +support via the low-level dyld API shouldn't be that hard). + +The original MacOS X port was brought to you by Andrew Stone. + + +June, 1 2000 + +Dietmar Planitzer +dave.pl@ping.at + +Note from Andrew Begel: + +One more fix to enable gc.a to link successfully into a shared library for +MacOS X. You have to add -fno-common to the CFLAGS in the Makefile. MacOSX +disallows common symbols in anything that eventually finds its way into a +shared library. (I don't completely understand why, but -fno-common seems to +work and doesn't mess up the garbage collector's functionality). + +Feb 26, 2003 + +Jeff Sturm and Jesse Rosenstock provided a patch that adds thread support. +GC_MACOSX_THREADS should be defined in the build and in clients. Real +dynamic library support is still missing, i.e. dynamic library data segments +are still not scanned. Code that stores pointers to the garbage collected +heap in statically allocated variables should not reside in a dynamic +library. This still doesn't appear to be 100% reliable. + +Mar 10, 2003 +Brian Alliet contributed dynamic library support for MacOSX. It could also +use more testing. diff --git a/boehm-gc/doc/gcinterface.html b/boehm-gc/doc/gcinterface.html new file mode 100644 index 00000000000..7b336ec811b --- /dev/null +++ b/boehm-gc/doc/gcinterface.html @@ -0,0 +1,203 @@ +<!DOCTYPE HTML> +<HEAD> +<TITLE>Garbage Collector Interface</TITLE> +</HEAD> +<BODY> +<H1>C Interface</h1> +On many platforms, a single-threaded garbage collector library can be built +to act as a plug-in malloc replacement. (Build with -DREDIRECT_MALLOC=GC_malloc +-DIGNORE_FREE.) This is often the best way to deal with third-party libraries +which leak or prematurely free objects. -DREDIRECT_MALLOC is intended +primarily as an easy way to adapt old code, not for new development. +<P> +New code should use the interface discussed below. +<P> +Code must be linked against the GC library. On most UNIX platforms, +this will be gc.a. +<P> +The following describes the standard C interface to the garbage collector. +It is not a complete definition of the interface. It describes only the +most commonly used functionality, approximately in decreasing order of +frequency of use. The description assumes an ANSI C compiler. +The full interface is described in +<A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> +or <TT>gc.h</tt> in the distribution. +<P> +Clients should include gc.h. +<P> +In the case of multithreaded code, +gc.h should be included after the threads header file, and +after defining the appropriate GC_XXXX_THREADS macro. +(For 6.2alpha4 and later, simply defining GC_THREADS should suffice.) +Gc.h must be included +in files that use either GC or threads primitives, since threads primitives +will be redefined to cooperate with the GC on many platforms. +<DL> +<DT> <B>void * GC_MALLOC(size_t <I>nbytes</i>)</b> +<DD> +Allocates and clears <I>nbytes</i> of storage. +Requires (amortized) time proportional to <I>nbytes</i>. +The resulting object will be automatically deallocated when unreferenced. +References from objects allocated with the system malloc are usually not +considered by the collector. (See GC_MALLOC_UNCOLLECTABLE, however.) +GC_MALLOC is a macro which invokes GC_malloc by default or, if GC_DEBUG +is defined before gc.h is included, a debugging version that checks +occasionally for overwrite errors, and the like. +<DT> <B>void * GC_MALLOC_ATOMIC(size_t <I>nbytes</i>)</b> +<DD> +Allocates <I>nbytes</i> of storage. +Requires (amortized) time proportional to <I>nbytes</i>. +The resulting object will be automatically deallocated when unreferenced. +The client promises that the resulting object will never contain any pointers. +The memory is not cleared. +This is the preferred way to allocate strings, floating point arrays, +bitmaps, etc. +More precise information about pointer locations can be communicated to the +collector using the interface in +<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc_typedh.txt">gc_typed.h</a> in the distribution. +<DT> <B>void * GC_MALLOC_UNCOLLECTABLE(size_t <I>nbytes</i>)</b> +<DD> +Identical to GC_MALLOC, except that the resulting object is not automatically +deallocated. Unlike the system-provided malloc, the collector does +scan the object for pointers to garbage-collectable memory, even if the +block itself does not appear to be reachable. (Objects allocated in this way +are effectively treated as roots by the collector.) +<DT> <B> void * GC_REALLOC(void *old, size_t new_size) </b> +<DD> +Allocate a new object of the indicated size and copy (a prefix of) the +old object into the new object. The old object is reused in place if +convenient. If the original object was allocated with GC_malloc_atomic, +the new object is subject to the same constraints. If it was allocated +as an uncollectable object, then the new object is uncollectable, and +the old object (if different) is deallocated. +(Use GC_REALLOC with GC_MALLOC, etc.) +<DT> <B> void GC_FREE(void *dead) </b> +<DD> +Explicitly deallocate an object. Typically not useful for small +collectable objects. (Use GC_FREE with GC_MALLOC, etc.) +<DT> <B> void * GC_MALLOC_IGNORE_OFF_PAGE(size_t <I>nbytes</i>) </b> +<DD> +<DT> <B> void * GC_MALLOC_ATOMIC_IGNORE_OFF_PAGE(size_t <I>nbytes</i>) </b> +<DD> +Analogous to GC_MALLOC and GC_MALLOC_ATOMIC, except that the client +guarantees that as long +as the resulting object is of use, a pointer is maintained to someplace +inside the first 512 bytes of the object. This pointer should be declared +volatile to avoid interference from compiler optimizations. +(Other nonvolatile pointers to the object may exist as well.) +This is the +preferred way to allocate objects that are likely to be > 100KBytes in size. +It greatly reduces the risk that such objects will be accidentally retained +when they are no longer needed. Thus space usage may be significantly reduced. +<DT> <B> void GC_gcollect(void) </b> +<DD> +Explicitly force a garbage collection. +<DT> <B> void GC_enable_incremental(void) </b> +<DD> +Cause the garbage collector to perform a small amount of work +every few invocations of GC_malloc or the like, instead of performing +an entire collection at once. This is likely to increase total +running time. It will improve response on a platform that either has +suitable support in the garbage collector (Irix and most other Unix +versions, win32 if the collector was suitably built) or if "stubborn" +allocation is used (see <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a>). +On many platforms this interacts poorly with system calls +that write to the garbage collected heap. +<DT> <B> GC_warn_proc GC_set_warn_proc(GC_warn_proc p) </b> +<DD> +Replace the default procedure used by the collector to print warnings. +The collector +may otherwise write to sterr, most commonly because GC_malloc was used +in a situation in which GC_malloc_ignore_off_page would have been more +appropriate. See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> for details. +<DT> <B> void GC_register_finalizer(...) </b> +<DD> +Register a function to be called when an object becomes inaccessible. +This is often useful as a backup method for releasing system resources +(<I>e.g.</i> closing files) when the object referencing them becomes +inaccessible. +It is not an acceptable method to perform actions that must be performed +in a timely fashion. +See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> for details of the interface. +See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/finalization.html">here</a> for a more detailed discussion +of the design. +<P> +Note that an object may become inaccessible before client code is done +operating on its fields. Suitable synchronization is usually required. +See <A HREF="http://portal.acm.org/citation.cfm?doid=604131.604153">here</a> +or <A HREF="http://www.hpl.hp.com/techreports/2002/HPL-2002-335.html">here</a> +for details. +</dl> +<P> +If you are concerned with multiprocessor performance and scalability, +you should consider enabling and using thread local allocation (<I>e.g.</i> +GC_LOCAL_MALLOC, see <TT>gc_local_alloc.h</tt>. If your platform +supports it, you should build the collector with parallel marking support +(-DPARALLEL_MARK, or --enable-parallel-mark). +<P> +If the collector is used in an environment in which pointer location +information for heap objects is easily available, this can be passed on +to the colllector using the interfaces in either <TT>gc_typed.h</tt> +or <TT>gc_gcj.h</tt>. +<P> +The collector distribution also includes a <B>string package</b> that takes +advantage of the collector. For details see +<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/cordh.txt">cord.h</a> + +<H1>C++ Interface</h1> +There are three distinct ways to use the collector from C++: +<DL> +<DT> <B> STL allocators </b> +<DD> +Users of the <A HREF="http://www.sgi.com/tech/stl">SGI extended STL</a> +can include <TT>new_gc_alloc.h</tt> before including +STL header files. +(<TT>gc_alloc.h</tt> corresponds to now obsolete versions of the +SGI STL.) +This defines SGI-style allocators +<UL> +<LI> alloc +<LI> single_client_alloc +<LI> gc_alloc +<LI> single_client_gc_alloc +</ul> +which may be used either directly to allocate memory or to instantiate +container templates. The first two allocate uncollectable but traced +memory, while the second two allocate collectable memory. +The single_client versions are not safe for concurrent access by +multiple threads, but are faster. +<P> +For an example, click <A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_alloc_exC.txt">here</a>. +<P> +Recent versions of the collector also include a more standard-conforming +allocator implemention in <TT>gc_allocator.h</tt>. It defines +<UL> +<LI> traceable_allocator +<LI> gc_allocator +</ul> +Again the former allocates uncollectable but traced memory. +This should work with any fully standard-conforming C++ compiler. +<DT> <B> Class inheritance based interface </b> +<DD> +Users may include gc_cpp.h and then cause members of certain classes to +be allocated in garbage collectable memory by inheriting from class gc. +For details see <A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc_cpph.txt">gc_cpp.h</a>. +<DT> <B> C interface </b> +<DD> +It is also possible to use the C interface from +<A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> directly. +On platforms which use malloc to implement ::new, it should usually be possible +to use a version of the collector that has been compiled as a malloc +replacement. It is also possible to replace ::new and other allocation +functions suitably. +<P> +Note that user-implemented small-block allocation often works poorly with +an underlying garbage-collected large block allocator, since the collector +has to view all objects accessible from the user's free list as reachable. +This is likely to cause problems if GC_malloc is used with something like +the original HP version of STL. +This approach works with the SGI versions of the STL only if the +<TT>malloc_alloc</tt> allocator is used. +</dl> +</body> +</html> diff --git a/boehm-gc/doc/leak.html b/boehm-gc/doc/leak.html new file mode 100644 index 00000000000..91fa8ea8402 --- /dev/null +++ b/boehm-gc/doc/leak.html @@ -0,0 +1,197 @@ +<HTML> +<HEAD> +<TITLE>Using the Garbage Collector as Leak Detector</title> +</head> +<BODY> +<H1>Using the Garbage Collector as Leak Detector</h1> +The garbage collector may be used as a leak detector. +In this case, the primary function of the collector is to report +objects that were allocated (typically with <TT>GC_MALLOC</tt>), +not deallocated (normally with <TT>GC_FREE</tt>), but are +no longer accessible. Since the object is no longer accessible, +there in normally no way to deallocate the object at a later time; +thus it can safely be assumed that the object has been "leaked". +<P> +This is substantially different from counting leak detectors, +which simply verify that all allocated objects are eventually +deallocated. A garbage-collector based leak detector can provide +somewhat more precise information when an object was leaked. +More importantly, it does not report objects that are never +deallocated because they are part of "permanent" data structures. +Thus it does not require all objects to be deallocated at process +exit time, a potentially useless activity that often triggers +large amounts of paging. +<P> +All non-ancient versions of the garbage collector provide +leak detection support. Version 5.3 adds the following +features: +<OL> +<LI> Leak detection mode can be initiated at run-time by +setting GC_find_leak instead of building the collector with FIND_LEAK +defined. This variable should be set to a nonzero value +at program startup. +<LI> Leaked objects should be reported and then correctly garbage collected. +Prior versions either reported leaks or functioned as a garbage collector. +</ol> +For the rest of this description we will give instructions that work +with any reasonable version of the collector. +<P> +To use the collector as a leak detector, follow the following steps: +<OL> +<LI> Build the collector with -DFIND_LEAK. Otherwise use default +build options. +<LI> Change the program so that all allocation and deallocation goes +through the garbage collector. +<LI> Arrange to call <TT>GC_gcollect</tt> at appropriate points to check +for leaks. +(For sufficiently long running programs, this will happen implicitly, +but probably not with sufficient frequency.) +</ol> +The second step can usually be accomplished with the +<TT>-DREDIRECT_MALLOC=GC_malloc</tt> option when the collector is built, +or by defining <TT>malloc</tt>, <TT>calloc</tt>, +<TT>realloc</tt> and <TT>free</tt> +to call the corresponding garbage collector functions. +But this, by itself, will not yield very informative diagnostics, +since the collector does not keep track of information about +how objects were allocated. The error reports will include +only object addresses. +<P> +For more precise error reports, as much of the program as possible +should use the all uppercase variants of these functions, after +defining <TT>GC_DEBUG</tt>, and then including <TT>gc.h</tt>. +In this environment <TT>GC_MALLOC</tt> is a macro which causes +at least the file name and line number at the allocation point to +be saved as part of the object. Leak reports will then also include +this information. +<P> +Many collector features (<I>e.g</i> stubborn objects, finalization, +and disappearing links) are less useful in this context, and are not +fully supported. Their use will usually generate additional bogus +leak reports, since the collector itself drops some associated objects. +<P> +The same is generally true of thread support. However, as of 6.0alpha4, +correct leak reports should be generated with linuxthreads. +<P> +On a few platforms (currently Solaris/SPARC, Irix, and, with -DSAVE_CALL_CHAIN, +Linux/X86), <TT>GC_MALLOC</tt> +also causes some more information about its call stack to be saved +in the object. Such information is reproduced in the error +reports in very non-symbolic form, but it can be very useful with the +aid of a debugger. +<H2>An Example</h2> +The following header file <TT>leak_detector.h</tt> is included in the +"include" subdirectory of the distribution: +<PRE> +#define GC_DEBUG +#include "gc.h" +#define malloc(n) GC_MALLOC(n) +#define calloc(m,n) GC_MALLOC((m)*(n)) +#define free(p) GC_FREE(p) +#define realloc(p,n) GC_REALLOC((p),(n)) +#define CHECK_LEAKS() GC_gcollect() +</pre> +<P> +Assume the collector has been built with -DFIND_LEAK. (For very +new versions of the collector, we could instead add the statement +<TT>GC_find_leak = 1</tt> as the first statement in <TT>main</tt>. +<P> +The program to be tested for leaks can then look like: +<PRE> +#include "leak_detector.h" + +main() { + int *p[10]; + int i; + /* GC_find_leak = 1; for new collector versions not */ + /* compiled with -DFIND_LEAK. */ + for (i = 0; i < 10; ++i) { + p[i] = malloc(sizeof(int)+i); + } + for (i = 1; i < 10; ++i) { + free(p[i]); + } + for (i = 0; i < 9; ++i) { + p[i] = malloc(sizeof(int)+i); + } + CHECK_LEAKS(); +} +</pre> +<P> +On an Intel X86 Linux system this produces on the stderr stream: +<PRE> +Leaked composite object at 0x806dff0 (leak_test.c:8, sz=4) +</pre> +(On most unmentioned operating systems, the output is similar to this. +If the collector had been built on Linux/X86 with -DSAVE_CALL_CHAIN, +the output would be closer to the Solaris example. For this to work, +the program should not be compiled with -fomit_frame_pointer.) +<P> +On Irix it reports +<PRE> +Leaked composite object at 0x10040fe0 (leak_test.c:8, sz=4) + Caller at allocation: + ##PC##= 0x10004910 +</pre> +and on Solaris the error report is +<PRE> +Leaked composite object at 0xef621fc8 (leak_test.c:8, sz=4) + Call chain at allocation: + args: 4 (0x4), 200656 (0x30FD0) + ##PC##= 0x14ADC + args: 1 (0x1), -268436012 (0xEFFFFDD4) + ##PC##= 0x14A64 +</pre> +In the latter two cases some additional information is given about +how malloc was called when the leaked object was allocated. For +Solaris, the first line specifies the arguments to <TT>GC_debug_malloc</tt> +(the actual allocation routine), The second the program counter inside +main, the third the arguments to <TT>main</tt>, and finally the program +counter inside the caller to main (i.e. in the C startup code). +<P> +In the Irix case, only the address inside the caller to main is given. +<P> +In many cases, a debugger is needed to interpret the additional information. +On systems supporting the "adb" debugger, the <TT>callprocs</tt> script +can be used to replace program counter values with symbolic names. +As of version 6.1, the collector tries to generate symbolic names for +call stacks if it knows how to do so on the platform. This is true on +Linux/X86, but not on most other platforms. +<H2>Simplified leak detection under Linux</h2> +Since version 6.1, it should be possible to run the collector in leak +detection mode on a program a.out under Linux/X86 as follows: +<OL> +<LI> Ensure that a.out is a single-threaded executable. This doesn't yet work +for multithreaded programs. +<LI> If possible, ensure that the addr2line program is installed in +/usr/bin. (It comes with RedHat Linux.) +<LI> If possible, compile a.out with full debug information. +This will improve the quality of the leak reports. With this approach, it is +no longer necessary to call GC_ routines explicitly, though that can also +improve the quality of the leak reports. +<LI> Build the collector and install it in directory <I>foo</i> as follows: +<UL> +<LI> configure --prefix=<I>foo</i> --enable-full-debug --enable-redirect-malloc +--disable-threads +<LI> make +<LI> make install +</ul> +<LI> Set environment variables as follows: +<UL> +<LI> LD_PRELOAD=<I>foo</i>/lib/libgc.so +<LI> GC_FIND_LEAK +<LI> You may also want to set GC_PRINT_STATS (to confirm that the collector +is running) and/or GC_LOOP_ON_ABORT (to facilitate debugging from another +window if something goes wrong). +</ul +<LI> Simply run a.out as you normally would. Note that if you run anything +else (<I>e.g.</i> your editor) with those environment variables set, +it will also be leak tested. This may or may not be useful and/or +embarrassing. It can generate +mountains of leak reports if the application wasn't designed to avoid leaks, +<I>e.g.</i> because it's always short-lived. +</ol> +This has not yet been thropughly tested on large applications, but it's known +to do the right thing on at least some small ones. +</body> +</html> diff --git a/boehm-gc/doc/scale.html b/boehm-gc/doc/scale.html new file mode 100644 index 00000000000..2e70148dfb7 --- /dev/null +++ b/boehm-gc/doc/scale.html @@ -0,0 +1,210 @@ +<HTML> +<HEAD> +<TITLE>Garbage collector scalability</TITLE> +</HEAD> +<BODY> +<H1>Garbage collector scalability</h1> +In its default configuration, the Boehm-Demers-Weiser garbage collector +is not thread-safe. It can be made thread-safe for a number of environments +by building the collector with the appropriate +<TT>-D</tt><I>XXX</i><TT>-THREADS</tt> compilation +flag. This has primarily two effects: +<OL> +<LI> It causes the garbage collector to stop all other threads when +it needs to see a consistent memory state. +<LI> It causes the collector to acquire a lock around essentially all +allocation and garbage collection activity. +</ol> +Since a single lock is used for all allocation-related activity, only one +thread can be allocating or collecting at one point. This inherently +limits performance of multi-threaded applications on multiprocessors. +<P> +On most platforms, the allocator/collector lock is implemented as a +spin lock with exponential back-off. Longer wait times are implemented +by yielding and/or sleeping. If a collection is in progress, the pure +spinning stage is skipped. This has the advantage that uncontested and +thus most uniprocessor lock acquisitions are very cheap. It has the +disadvantage that the application may sleep for small periods of time +even when there is work to be done. And threads may be unnecessarily +woken up for short periods. Nonetheless, this scheme empirically +outperforms native queue-based mutual exclusion implementations in most +cases, sometimes drastically so. +<H2>Options for enhanced scalability</h2> +Version 6.0 of the collector adds two facilities to enhance collector +scalability on multiprocessors. As of 6.0alpha1, these are supported +only under Linux on X86 and IA64 processors, though ports to other +otherwise supported Pthreads platforms should be straightforward. +They are intended to be used together. +<UL> +<LI> +Building the collector with <TT>-DPARALLEL_MARK</tt> allows the collector to +run the mark phase in parallel in multiple threads, and thus on multiple +processors. The mark phase typically consumes the large majority of the +collection time. Thus this largely parallelizes the garbage collector +itself, though not the allocation process. Currently the marking is +performed by the thread that triggered the collection, together with +<I>N</i>-1 dedicated +threads, where <I>N</i> is the number of processors detected by the collector. +The dedicated threads are created once at initialization time. +<P> +A second effect of this flag is to switch to a more concurrent +implementation of <TT>GC_malloc_many</tt>, so that free lists can be +built, and memory can be cleared, by more than one thread concurrently. +<LI> +Building the collector with -DTHREAD_LOCAL_ALLOC adds support for thread +local allocation. It does not, by itself, cause thread local allocation +to be used. It simply allows the use of the interface in +<TT>gc_local_alloc.h</tt>. +<P> +Memory returned from thread-local allocators is completely interchangeable +with that returned by the standard allocators. It may be used by other +threads. The only difference is that, if the thread allocates enough +memory of a certain kind, it will build a thread-local free list for +objects of that kind, and allocate from that. This greatly reduces +locking. The thread-local free lists are refilled using +<TT>GC_malloc_many</tt>. +<P> +An important side effect of this flag is to replace the default +spin-then-sleep lock to be replace by a spin-then-queue based implementation. +This <I>reduces performance</i> for the standard allocation functions, +though it usually improves performance when thread-local allocation is +used heavily, and thus the number of short-duration lock acquisitions +is greatly reduced. +</ul> +<P> +The easiest way to switch an application to thread-local allocation is to +<OL> +<LI> Define the macro <TT>GC_REDIRECT_TO_LOCAL</tt>, +and then include the <TT>gc.h</tt> +header in each client source file. +<LI> Invoke <TT>GC_thr_init()</tt> before any allocation. +<LI> Allocate using <TT>GC_MALLOC</tt>, <TT>GC_MALLOC_ATOMIC</tt>, +and/or <TT>GC_GCJ_MALLOC</tt>. +</ol> +<H2>The Parallel Marking Algorithm</h2> +We use an algorithm similar to +<A HREF="http://www.yl.is.s.u-tokyo.ac.jp/gc/">that developed by +Endo, Taura, and Yonezawa</a> at the University of Tokyo. +However, the data structures and implementation are different, +and represent a smaller change to the original collector source, +probably at the expense of extreme scalability. Some of +the refinements they suggest, <I>e.g.</i> splitting large +objects, were also incorporated into out approach. +<P> +The global mark stack is transformed into a global work queue. +Unlike the usual case, it never shrinks during a mark phase. +The mark threads remove objects from the queue by copying them to a +local mark stack and changing the global descriptor to zero, indicating +that there is no more work to be done for this entry. +This removal +is done with no synchronization. Thus it is possible for more than +one worker to remove the same entry, resulting in some work duplication. +<P> +The global work queue grows only if a marker thread decides to +return some of its local mark stack to the global one. This +is done if the global queue appears to be running low, or if +the local stack is in danger of overflowing. It does require +synchronization, but should be relatively rare. +<P> +The sequential marking code is reused to process local mark stacks. +Hence the amount of additional code required for parallel marking +is minimal. +<P> +It should be possible to use generational collection in the presence of the +parallel collector, by calling <TT>GC_enable_incremental()</tt>. +This does not result in fully incremental collection, since parallel mark +phases cannot currently be interrupted, and doing so may be too +expensive. +<P> +Gcj-style mark descriptors do not currently mix with the combination +of local allocation and incremental collection. They should work correctly +with one or the other, but not both. +<P> +The number of marker threads is set on startup to the number of +available processors (or to the value of the <TT>GC_NPROCS</tt> +environment variable). If only a single processor is detected, +parallel marking is disabled. +<P> +Note that setting GC_NPROCS to 1 also causes some lock acquisitions inside +the collector to immediately yield the processor instead of busy waiting +first. In the case of a multiprocessor and a client with multiple +simultaneously runnable threads, this may have disastrous performance +consequences (e.g. a factor of 10 slowdown). +<H2>Performance</h2> +We conducted some simple experiments with a version of +<A HREF="gc_bench.html">our GC benchmark</a> that was slightly modified to +run multiple concurrent client threads in the same address space. +Each client thread does the same work as the original benchmark, but they share +a heap. +This benchmark involves very little work outside of memory allocation. +This was run with GC 6.0alpha3 on a dual processor Pentium III/500 machine +under Linux 2.2.12. +<P> +Running with a thread-unsafe collector, the benchmark ran in 9 +seconds. With the simple thread-safe collector, +built with <TT>-DLINUX_THREADS</tt>, the execution time +increased to 10.3 seconds, or 23.5 elapsed seconds with two clients. +(The times for the <TT>malloc</tt>/i<TT>free</tt> version +with glibc <TT>malloc</tt> +are 10.51 (standard library, pthreads not linked), +20.90 (one thread, pthreads linked), +and 24.55 seconds respectively. The benchmark favors a +garbage collector, since most objects are small.) +<P> +The following table gives execution times for the collector built +with parallel marking and thread-local allocation support +(<TT>-DGC_LINUX_THREADS -DPARALLEL_MARK -DTHREAD_LOCAL_ALLOC</tt>). We tested +the client using either one or two marker threads, and running +one or two client threads. Note that the client uses thread local +allocation exclusively. With -DTHREAD_LOCAL_ALLOC the collector +switches to a locking strategy that is better tuned to less frequent +lock acquisition. The standard allocation primitives thus peform +slightly worse than without -DTHREAD_LOCAL_ALLOC, and should be +avoided in time-critical code. +<P> +(The results using <TT>pthread_mutex_lock</tt> +directly for allocation locking would have been worse still, at +least for older versions of linuxthreads. +With THREAD_LOCAL_ALLOC, we first repeatedly try to acquire the +lock with pthread_mutex_try_lock(), busy_waiting between attempts. +After a fixed number of attempts, we use pthread_mutex_lock().) +<P> +These measurements do not use incremental collection, nor was prefetching +enabled in the marker. We used the C version of the benchmark. +All measurements are in elapsed seconds on an unloaded machine. +<P> +<TABLE BORDER ALIGN="CENTER"> +<TR><TH>Number of threads</th><TH>1 marker thread (secs.)</th> +<TH>2 marker threads (secs.)</th></tr> +<TR><TD>1 client</td><TD ALIGN="CENTER">10.45</td><TD ALIGN="CENTER">7.85</td> +<TR><TD>2 clients</td><TD ALIGN="CENTER">19.95</td><TD ALIGN="CENTER">12.3</td> +</table> +<PP> +The execution time for the single threaded case is slightly worse than with +simple locking. However, even the single-threaded benchmark runs faster than +even the thread-unsafe version if a second processor is available. +The execution time for two clients with thread local allocation time is +only 1.4 times the sequential execution time for a single thread in a +thread-unsafe environment, even though it involves twice the client work. +That represents close to a +factor of 2 improvement over the 2 client case with the old collector. +The old collector clearly +still suffered from some contention overhead, in spite of the fact that the +locking scheme had been fairly well tuned. +<P> +Full linear speedup (i.e. the same execution time for 1 client on one +processor as 2 clients on 2 processors) +is probably not achievable on this kind of +hardware even with such a small number of processors, +since the memory system is +a major constraint for the garbage collector, +the processors usually share a single memory bus, and thus +the aggregate memory bandwidth does not increase in +proportion to the number of processors. +<P> +These results are likely to be very sensitive to both hardware and OS +issues. Preliminary experiments with an older Pentium Pro machine running +an older kernel were far less encouraging. + +</body> +</html> |