root/threadwrappers.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. clone_start
  2. __clone
  3. pthread_start
  4. pthread_create
  5. pthread_exit
  6. pthread_join
  7. pthread_tryjoin_np
  8. pthread_timedjoin_np

   1 /****************************************************************************
   2  *   Copyright (C) 2006-2013 by Jason Ansel, Kapil Arya, and Gene Cooperman *
   3  *   jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu              *
   4  *                                                                          *
   5  *  This file is part of DMTCP.                                             *
   6  *                                                                          *
   7  *  DMTCP is free software: you can redistribute it and/or                  *
   8  *  modify it under the terms of the GNU Lesser General Public License as   *
   9  *  published by the Free Software Foundation, either version 3 of the      *
  10  *  License, or (at your option) any later version.                         *
  11  *                                                                          *
  12  *  DMTCP is distributed in the hope that it will be useful,                *
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
  15  *  GNU Lesser General Public License for more details.                     *
  16  *                                                                          *
  17  *  You should have received a copy of the GNU Lesser General Public        *
  18  *  License along with DMTCP:dmtcp/src.  If not, see                        *
  19  *  <http://www.gnu.org/licenses/>.                                         *
  20  ****************************************************************************/
  21 
  22 #include <sys/syscall.h>
  23 #include "constants.h"
  24 #include "dmtcpworker.h"
  25 #include "mtcpinterface.h"
  26 #include "syscallwrappers.h"
  27 #include "dmtcp.h"
  28 #include "uniquepid.h"
  29 #include "util.h"
  30 #include "../jalib/jassert.h"
  31 #include "../jalib/jalloc.h"
  32 #include "threadsync.h"
  33 #include "processinfo.h"
  34 #include "threadlist.h"
  35 #include "siginfo.h"
  36 
  37 using namespace dmtcp;
  38 
  39 struct ThreadArg {
  40   union {
  41     int (*fn) (void *arg);
  42     void * (*pthread_fn) (void *arg); // pthread_create calls fn -> void *
  43   };
  44   void *arg;
  45   void *mtcpArg;
  46   pid_t virtualTid;
  47 };
  48 
  49 // Invoked via __clone
  50 LIB_PRIVATE
  51 int clone_start(void *arg)
  52 {
  53   Thread *thread = (Thread*) arg;
  54 
  55   ThreadSync::initThread();
  56 
  57   ThreadList::updateTid(thread);
  58 
  59   DmtcpWorker::eventHook(DMTCP_EVENT_THREAD_START, NULL);
  60 
  61   /* Thread finished initialization.  It's now safe for this thread to
  62    * participate in checkpoint.  Decrement the uninitializedThreadCount in
  63    * DmtcpWorker.
  64    */
  65   ThreadSync::decrementUninitializedThreadCount();
  66 
  67   JTRACE("Calling user function") (dmtcp_gettid());
  68   int ret = thread->fn(thread->arg);
  69 
  70   ThreadList::threadExit();
  71   return ret;
  72 }
  73 
  74 /*****************************************************************************
  75  *
  76  *  This is our clone system call wrapper
  77  *
  78  *    Note:
  79  *
  80  *      pthread_create eventually calls __clone to create threads
  81  *      It uses flags = 0x3D0F00:
  82  *            CLONE_VM = VM shared between processes
  83  *            CLONE_FS = fs info shared between processes (root, cwd, umask)
  84  *         CLONE_FILES = open files shared between processes (fd table)
  85  *       CLONE_SIGHAND = signal handlers and blocked signals shared
  86  *                               (sigaction common to parent and child)
  87  *        CLONE_THREAD = add to same thread group
  88  *       CLONE_SYSVSEM = share system V SEM_UNDO semantics
  89  *        CLONE_SETTLS = create a new TLS for the child from newtls parameter
  90  *       CLONE_PARENT_SETTID = set the TID in the parent (before MM copy)
  91  *      CLONE_CHILD_CLEARTID = clear the TID in the child and do
  92  *                               futex wake at that address
  93  *            CLONE_DETACHED = create clone detached
  94  *
  95  *****************************************************************************/
  96 //need to forward user clone
  97 extern "C" int __clone(int (*fn) (void *arg), void *child_stack, int flags,
  98                        void *arg, int *ptid,
  99                        struct user_desc *tls, int *ctid)
 100 {
 101   WRAPPER_EXECUTION_DISABLE_CKPT();
 102   ThreadSync::incrementUninitializedThreadCount();
 103 
 104   Thread *thread = ThreadList::getNewThread();
 105   ThreadList::initThread(thread, fn, arg, flags, ptid, ctid);
 106 //  if (ckpthread == NULL) {
 107 //    ckptthread = thread;
 108 //    thread->stateInit(ST_CKPNTHREAD);
 109 //  }
 110 
 111   pid_t tid = _real_clone(clone_start, child_stack, flags, thread,
 112                           ptid, tls, ctid);
 113 
 114   if (tid == -1) {
 115     JTRACE("Clone call failed")(JASSERT_ERRNO);
 116     ThreadSync::decrementUninitializedThreadCount();
 117     delete thread;
 118   } else {
 119     DmtcpWorker::eventHook(DMTCP_EVENT_THREAD_CREATED, NULL);
 120   }
 121 
 122   WRAPPER_EXECUTION_ENABLE_CKPT();
 123   return tid;
 124 }
 125 #if 0
 126 #if defined(__i386__) || defined(__x86_64__)
 127 asm (".global clone ; .type clone,@function ; clone = __clone");
 128 #elif defined(__arm__)
 129 // In arm, '@' is a comment character;  Arm uses '%' in type directive
 130 asm (".global clone ; .type clone,%function ; clone = __clone");
 131 #else
 132 # error Not implemented on this architecture
 133 #endif
 134 #endif
 135 
 136 // Invoked via pthread_create as start_routine
 137 // On return, it calls mtcp_threadiszombie()
 138 static void *pthread_start(void *arg)
 139 {
 140   struct ThreadArg *threadArg = (struct ThreadArg*) arg;
 141   void *thread_arg = threadArg->arg;
 142   void * (*pthread_fn) (void *) = threadArg->pthread_fn;
 143   pid_t virtualTid = threadArg->virtualTid;
 144 
 145   JASSERT(pthread_fn != 0x0);
 146   JALLOC_HELPER_FREE(arg); // Was allocated in calling thread in pthread_create
 147 
 148   // Unblock ckpt signal (unblocking a non-blocked signal has no effect).
 149   // Normally, DMTCP wouldn't allow the ckpt signal to be blocked. However, in
 150   // some situations (e.g., timer_create), libc would internally block all
 151   // signals before calling pthread_create to create a helper thread.  Since,
 152   // the child threads inherit parent signal mask, the helper thread has all
 153   // signals blocked.
 154   sigset_t set;
 155   sigaddset(&set, SigInfo::ckptSignal());
 156   JASSERT(_real_pthread_sigmask(SIG_UNBLOCK, &set, NULL) == 0) (JASSERT_ERRNO);
 157 
 158   ThreadSync::threadFinishedInitialization();
 159   void *result = (*pthread_fn)(thread_arg);
 160   JTRACE("Thread returned") (virtualTid);
 161   WRAPPER_EXECUTION_DISABLE_CKPT();
 162   ThreadList::threadExit();
 163   /*
 164    * This thread has finished its execution, do some cleanup on our part.
 165    *  erasing the virtualTid entry from virtualpidtable
 166    *  FIXME: What if the process gets checkpointed after erase() but before the
 167    *  thread actually exits?
 168    */
 169   DmtcpWorker::eventHook(DMTCP_EVENT_PTHREAD_RETURN, NULL);
 170   WRAPPER_EXECUTION_ENABLE_CKPT();
 171   ThreadSync::unsetOkToGrabLock();
 172   return result;
 173 }
 174 
 175 
 176 extern "C" int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
 177                               void *(*start_routine)(void*), void *arg)
 178 {
 179   int retval;
 180   // We have to use DMTCP-specific memory allocator because using glibc:malloc
 181   // can interfere with user threads.
 182   // We use JALLOC_HELPER_FREE to free this memory in two places:
 183   // 1. near the beginning of pthread_start (wrapper for start_routine),
 184   //     providing that the __clone call succeeds with no tid conflict.
 185   // 2. if the call to _real_pthread_create fails, then free memory
 186   //     near the end of this function.
 187   // We use MALLOC/FREE so that pthread_create() can be called again, without
 188   // waiting for the new thread to give up the buffer in pthread_start().
 189   struct ThreadArg *threadArg =
 190     (struct ThreadArg *) JALLOC_HELPER_MALLOC (sizeof (struct ThreadArg));
 191   threadArg->pthread_fn = start_routine;
 192   threadArg->arg = arg;
 193 
 194   /* pthread_create() should acquire the thread-creation lock. Not doing so can
 195    * result in a deadlock in the following scenario:
 196    * 1. user thread: pthread_create() - acquire wrapper-execution lock
 197    * 2. ckpt-thread: SUSPEND msg received, wait on wrlock for wrapper-exection lock
 198    * 3. user thread: __clone() - try to acquire wrapper-execution lock
 199    *
 200    * We also need to increment the uninitialized-thread-count so that it is
 201    * safe to checkpoint the newly created thread.
 202    *
 203    * There is another possible deadlock situation if we do not grab the
 204    * thread-creation lock:
 205    * 1. user thread: pthread_create(): waiting on tbl_lock inside libpthread
 206    * 2. ckpt-thread: SUSPEND msg received, wait on wrlock for wrapper-exec lock
 207    * 3. uset thread: a. exiting after returning from user fn.
 208    *                 b. grabs tbl_lock()
 209    *                 c. tries to call free() to deallocate previously allocated
 210    *                 space (stack etc.). The free() wrapper requires
 211    *                 wrapper-exec lock, which is not available.
 212    */
 213   bool threadCreationLockAcquired = ThreadSync::threadCreationLockLock();
 214   ThreadSync::incrementUninitializedThreadCount();
 215   retval = _real_pthread_create(thread, attr, pthread_start, threadArg);
 216   if (threadCreationLockAcquired) {
 217     ThreadSync::threadCreationLockUnlock();
 218   }
 219   if (retval == 0) {
 220     ProcessInfo::instance().clearPthreadJoinState(*thread);
 221   } else { // if we failed to create new pthread
 222     JALLOC_HELPER_FREE(threadArg);
 223     ThreadSync::decrementUninitializedThreadCount();
 224   }
 225   return retval;
 226 }
 227 
 228 extern "C" void pthread_exit(void * retval)
 229 {
 230   WRAPPER_EXECUTION_DISABLE_CKPT();
 231   ThreadList::threadExit();
 232   DmtcpWorker::eventHook(DMTCP_EVENT_PTHREAD_EXIT, NULL);
 233   WRAPPER_EXECUTION_ENABLE_CKPT();
 234   ThreadSync::unsetOkToGrabLock();
 235   _real_pthread_exit(retval);
 236   for (;;); // To hide compiler warning about "noreturn" function
 237 }
 238 
 239 /*
 240  * pthread_join() is a blocking call that waits for the given thread to exit.
 241  * It examines the value of 'tid' field in 'struct pthread' of the given
 242  * thread. The kernel will write '0' to this field when the thread exits.
 243  *
 244  * In pthread_join(), the thread makes a futex call in the following fashion:
 245  *   _tid = pd->tid;
 246  *   while !succeeded
 247  *     futex(&pd->tid, FUTEX_WAIT, 0, _tid, ...)
 248  * As we can see, if the checkpoint is issued during pthread_join(), on
 249  * restart, the tid would have changed, but the call to futex would still used
 250  * the previously cached tid. This causes the caller to spin with 100% cpu
 251  * usage.
 252  *
 253  * The fix is to use the non blocking pthread_tryjoin_np function. To maintain
 254  * the semantics of pthread_join(), we need to ensure that only one thread is
 255  * allowed to wait on the given thread. This is done by keeping track of
 256  * threads that are being waited on by some other thread.
 257  *
 258  * Similar measures are taken for pthread_timedjoin_np().
 259  */
 260 static struct timespec ts_100ms = {0, 100 * 1000 * 1000};
 261 extern "C" int pthread_join(pthread_t thread, void **retval)
 262 {
 263   int ret;
 264   struct timespec ts;
 265   if (!ProcessInfo::instance().beginPthreadJoin(thread)) {
 266     return EINVAL;
 267   }
 268 
 269   while (1) {
 270     WRAPPER_EXECUTION_DISABLE_CKPT();
 271     ThreadSync::unsetOkToGrabLock();
 272     JASSERT(clock_gettime(CLOCK_REALTIME, &ts) != -1);
 273     TIMESPEC_ADD(&ts, &ts_100ms, &ts);
 274     ret = _real_pthread_timedjoin_np(thread, retval, &ts);
 275     WRAPPER_EXECUTION_ENABLE_CKPT();
 276     ThreadSync::setOkToGrabLock();
 277     if (ret != ETIMEDOUT) {
 278       break;
 279     }
 280   }
 281 
 282   ProcessInfo::instance().endPthreadJoin(thread);
 283   return ret;
 284 }
 285 
 286 extern "C" int pthread_tryjoin_np(pthread_t thread, void **retval)
 287 {
 288   int ret;
 289   if (!ProcessInfo::instance().beginPthreadJoin(thread)) {
 290     return EINVAL;
 291   }
 292 
 293   WRAPPER_EXECUTION_DISABLE_CKPT();
 294   ret = _real_pthread_tryjoin_np(thread, retval);
 295   WRAPPER_EXECUTION_ENABLE_CKPT();
 296 
 297   ProcessInfo::instance().endPthreadJoin(thread);
 298   return ret;
 299 }
 300 
 301 extern "C" int pthread_timedjoin_np(pthread_t thread, void **retval,
 302                                     const struct timespec *abstime)
 303 {
 304   int ret;
 305   struct timespec ts;
 306   if (!ProcessInfo::instance().beginPthreadJoin(thread)) {
 307     return EINVAL;
 308   }
 309 
 310   /*
 311    * We continue to call pthread_tryjoin_np (and sleep) until we have gone past
 312    * the abstime provided by the caller
 313    */
 314   while (1) {
 315     WRAPPER_EXECUTION_DISABLE_CKPT();
 316     JASSERT(clock_gettime(CLOCK_REALTIME, &ts) != -1);
 317     if (TIMESPEC_CMP(&ts, abstime, <)) {
 318       TIMESPEC_ADD(&ts, &ts_100ms, &ts);
 319       ret = _real_pthread_timedjoin_np(thread, retval, &ts);
 320     } else {
 321       ret = ETIMEDOUT;
 322     }
 323     WRAPPER_EXECUTION_ENABLE_CKPT();
 324 
 325     if (ret == EBUSY || ret == 0) {
 326       break;
 327     }
 328     if (TIMESPEC_CMP(&ts, abstime, >=)) {
 329       ret = ETIMEDOUT;
 330       break;
 331     }
 332   }
 333 
 334   ProcessInfo::instance().endPthreadJoin(thread);
 335   return ret;
 336 }

/* [<][>][^][v][top][bottom][index][help] */