/* * Copyright (c) 2005 Hewlett-Packard Development Company, L.P. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #if defined(HAVE_CONFIG_H) # include "config.h" #endif #include #if defined(__vxworks) int main(void) { printf("test skipped\n"); return 0; } #else #if ((defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__CYGWIN__)) \ || defined(_MSC_VER) || defined(_WIN32_WINCE)) \ && !defined(AO_USE_WIN32_PTHREADS) # define USE_WINTHREADS #endif #ifdef USE_WINTHREADS # include #else # include #endif #include #include #include "atomic_ops_stack.h" /* includes atomic_ops.h as well */ #if (defined(_WIN32_WCE) || defined(__MINGW32CE__)) && !defined(AO_HAVE_abort) # define abort() _exit(-1) /* there is no abort() in WinCE */ #endif #ifndef MAX_NTHREADS # define MAX_NTHREADS 100 #endif #ifndef DEFAULT_NTHREADS # define DEFAULT_NTHREADS 16 /* must be <= MAX_NTHREADS */ #endif #ifdef NO_TIMES # define get_msecs() 0 #elif (defined(USE_WINTHREADS) || defined(AO_USE_WIN32_PTHREADS)) \ && !defined(CPPCHECK) # include unsigned long get_msecs(void) { struct timeb tb; ftime(&tb); return (unsigned long)tb.time * 1000 + tb.millitm; } #else /* Unix */ # include # include unsigned long get_msecs(void) { struct timeval tv; gettimeofday(&tv, 0); return (unsigned long)tv.tv_sec * 1000 + tv.tv_usec/1000; } #endif /* !NO_TIMES */ struct le { AO_t next; /* must be the first field */ int data; }; typedef union le_u { AO_t next; struct le e; } list_element; #if defined(CPPCHECK) AO_stack_t the_list; /* to test AO_stack_init() */ #else AO_stack_t the_list = AO_STACK_INITIALIZER; #endif /* Add elements from 1 to n to the list (1 is pushed first). */ /* This is called from a single thread only. */ void add_elements(int n) { list_element * le; if (n == 0) return; add_elements(n-1); le = (list_element *)malloc(sizeof(list_element)); if (le == 0) { fprintf(stderr, "Out of memory\n"); exit(2); } # if defined(CPPCHECK) le->e.next = 0; /* mark field as used */ # endif le->e.data = n; AO_stack_push(&the_list, &le->next); } #ifdef VERBOSE_STACK void print_list(void) { AO_t *p; for (p = AO_REAL_HEAD_PTR(the_list); p != 0; p = AO_REAL_NEXT_PTR(*p)) printf("%d\n", ((list_element*)p)->e.data); } #endif /* VERBOSE_STACK */ /* Check that the list contains only values from 1 to n, in any order, */ /* w/o duplications. Executed when the list mutation is finished. */ void check_list(int n) { AO_t *p; int i; int err_cnt = 0; char *marks = (char*)calloc(n + 1, 1); if (0 == marks) { fprintf(stderr, "Out of memory (marks)\n"); exit(2); } for (p = AO_REAL_HEAD_PTR(the_list); p != 0; p = AO_REAL_NEXT_PTR(*p)) { i = ((list_element*)p)->e.data; if (i > n || i <= 0) { fprintf(stderr, "Found erroneous list element %d\n", i); err_cnt++; } else if (marks[i] != 0) { fprintf(stderr, "Found duplicate list element %d\n", i); abort(); } else marks[i] = 1; } for (i = 1; i <= n; ++i) if (marks[i] != 1) { fprintf(stderr, "Missing list element %d\n", i); err_cnt++; } free(marks); if (err_cnt > 0) abort(); } volatile AO_t ops_performed = 0; #ifndef LIMIT /* Total number of push/pop ops in all threads per test. */ # ifdef AO_USE_PTHREAD_DEFS # define LIMIT 20000 # else # define LIMIT 1000000 # endif #endif #ifdef AO_HAVE_fetch_and_add # define fetch_then_add(addr, val) AO_fetch_and_add(addr, val) #else /* OK to perform it in two atomic steps, but really quite */ /* unacceptable for timing purposes. */ AO_INLINE AO_t fetch_then_add(volatile AO_t * addr, AO_t val) { AO_t result = AO_load(addr); AO_store(addr, result + val); return result; } #endif #ifdef USE_WINTHREADS DWORD WINAPI run_one_test(LPVOID arg) #else void * run_one_test(void * arg) #endif { AO_t *t[MAX_NTHREADS + 1]; unsigned index = (unsigned)(size_t)arg; unsigned i; # ifdef VERBOSE_STACK unsigned j = 0; printf("starting thread %u\n", index); # endif assert(index <= MAX_NTHREADS); while (fetch_then_add(&ops_performed, index + 1) + index + 1 < LIMIT) { /* Pop index+1 elements (where index is the thread one), then */ /* push them back (in the same order of operations). */ /* Note that this is done in parallel by many threads. */ for (i = 0; i <= index; ++i) { t[i] = AO_stack_pop(&the_list); if (0 == t[i]) { /* This should not happen as at most n*(n+1)/2 elements */ /* could be popped off at a time. */ fprintf(stderr, "Failed - nothing to pop\n"); abort(); } } for (i = 0; i <= index; ++i) { AO_stack_push(&the_list, t[i]); } # ifdef VERBOSE_STACK j += index + 1; # endif } /* Repeat until LIMIT push/pop operations are performed (by all */ /* the threads simultaneously). */ # ifdef VERBOSE_STACK printf("finished thread %u: %u total ops\n", index, j); # endif return 0; } #ifndef N_EXPERIMENTS # define N_EXPERIMENTS 1 #endif unsigned long times[MAX_NTHREADS + 1][N_EXPERIMENTS]; void run_one_experiment(int max_nthreads, int exper_n) { int nthreads; assert(max_nthreads <= MAX_NTHREADS); assert(exper_n < N_EXPERIMENTS); for (nthreads = 1; nthreads <= max_nthreads; ++nthreads) { unsigned i; # ifdef USE_WINTHREADS DWORD thread_id; HANDLE thread[MAX_NTHREADS]; # else pthread_t thread[MAX_NTHREADS]; # endif int list_length = nthreads*(nthreads+1)/2; unsigned long start_time; AO_t *le; # ifdef VERBOSE_STACK printf("Before add_elements: exper_n=%d, nthreads=%d," " max_nthreads=%d, list_length=%d\n", exper_n, nthreads, max_nthreads, list_length); # endif /* Create a list with n*(n+1)/2 elements. */ assert(0 == AO_REAL_HEAD_PTR(the_list)); add_elements(list_length); # ifdef VERBOSE_STACK printf("Initial list (nthreads = %d):\n", nthreads); print_list(); # endif ops_performed = 0; start_time = get_msecs(); /* Start n-1 threads to run_one_test in parallel. */ for (i = 1; (int)i < nthreads; ++i) { int code; # ifdef USE_WINTHREADS thread[i] = CreateThread(NULL, 0, run_one_test, (LPVOID)(size_t)i, 0, &thread_id); code = thread[i] != NULL ? 0 : (int)GetLastError(); # else code = pthread_create(&thread[i], 0, run_one_test, (void *)(size_t)i); # endif if (code != 0) { fprintf(stderr, "Thread creation failed %u\n", (unsigned)code); exit(3); } } /* We use the main thread to run one test. This allows */ /* gprof profiling to work, for example. */ run_one_test(0); /* Wait for all the threads to complete. */ for (i = 1; (int)i < nthreads; ++i) { int code; # ifdef USE_WINTHREADS code = WaitForSingleObject(thread[i], INFINITE) == WAIT_OBJECT_0 ? 0 : (int)GetLastError(); # else code = pthread_join(thread[i], 0); # endif if (code != 0) { fprintf(stderr, "Thread join failed %u\n", (unsigned)code); abort(); } } times[nthreads][exper_n] = get_msecs() - start_time; # ifdef VERBOSE_STACK printf("nthreads=%d, time_ms=%lu\n", nthreads, times[nthreads][exper_n]); printf("final list (should be reordered initial list):\n"); print_list(); # endif /* Ensure that no element is lost or duplicated. */ check_list(list_length); /* And, free the entire list. */ while ((le = AO_stack_pop(&the_list)) != 0) free(le); /* Retry with larger n values. */ } } void run_all_experiments(int max_nthreads) { int exper_n; for (exper_n = 0; exper_n < N_EXPERIMENTS; ++exper_n) run_one_experiment(max_nthreads, exper_n); } /* Output the performance statistic. */ void output_stat(int max_nthreads) { int nthreads; assert(max_nthreads <= MAX_NTHREADS); for (nthreads = 1; nthreads <= max_nthreads; ++nthreads) { # ifndef NO_TIMES int exper_n; unsigned long sum = 0; # endif printf("About %d pushes + %d pops in %d threads:", LIMIT, LIMIT, nthreads); # ifndef NO_TIMES for (exper_n = 0; exper_n < N_EXPERIMENTS; ++exper_n) { # ifdef VERBOSE_STACK printf(" [%lums]", times[nthreads][exper_n]); # endif sum += times[nthreads][exper_n]; } printf(" %lu msecs\n", (sum + N_EXPERIMENTS/2)/N_EXPERIMENTS); # else printf(" completed\n"); # endif } } int main(int argc, char **argv) { int max_nthreads = DEFAULT_NTHREADS; if (2 == argc) { max_nthreads = atoi(argv[1]); if (max_nthreads < 1 || max_nthreads > MAX_NTHREADS) { fprintf(stderr, "Invalid max # of threads argument\n"); exit(1); } } else if (argc > 2) { fprintf(stderr, "Usage: %s [max # of threads]\n", argv[0]); exit(1); } if (!AO_stack_is_lock_free()) printf("Use almost-lock-free implementation\n"); # if defined(CPPCHECK) AO_stack_init(&the_list); # endif run_all_experiments(max_nthreads); output_stat(max_nthreads); return 0; } #endif