root/threadlist.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. lock_threads
  2. unlk_threads
  3. save_sp
  4. resetOnFork
  5. init
  6. initThread
  7. threadExit
  8. updateTid
  9. killCkpthread
  10. prepareMtcpHeader
  11. checkpointhread
  12. suspendThreads
  13. stopthisthread
  14. waitForAllRestored
  15. postRestart
  16. restarthread
  17. Thread_UpdateState
  18. Thread_SaveSigState
  19. Thread_RestoreSigState
  20. addToActiveList
  21. threadIsDead
  22. getNewThread
  23. emptyFreeList

   1 #include <pthread.h>
   2 #include <signal.h>
   3 #include <sys/types.h>
   4 #include <sys/syscall.h>
   5 #include <unistd.h>
   6 #include <semaphore.h>
   7 #include <sys/resource.h>
   8 #include <linux/version.h>
   9 #include "config.h"
  10 #ifdef HAS_PR_SET_PTRACER
  11 # include <sys/prctl.h>
  12 #endif
  13 #include "threadlist.h"
  14 #include "siginfo.h"
  15 #include "dmtcpalloc.h"
  16 #include "syscallwrappers.h"
  17 #include "mtcpinterface.h"
  18 #include "ckptserializer.h"
  19 #include "uniquepid.h"
  20 #include "jalloc.h"
  21 #include "jassert.h"
  22 #include "util.h"
  23 #include "mtcp/mtcp_header.h"
  24 
  25 // For i386 and x86_64, SETJMP currently has bugs.  Don't turn this
  26 //   on for them until they are debugged.
  27 // Default is to use  setcontext/getcontext.
  28 #if defined(__arm__) || defined(__aarch64__)
  29 # define SETJMP /* setcontext/getcontext not defined for ARM glibc */
  30 #endif
  31 
  32 #ifdef SETJMP
  33 # include <setjmp.h>
  34 #else
  35 # include <ucontext.h>
  36 #endif
  37 
  38 
  39 using namespace dmtcp;
  40 
  41 //Globals
  42 volatile int restoreInProgress = 0;
  43 Thread *motherofall = NULL;
  44 void **motherofall_saved_sp = NULL;
  45 ThreadTLSInfo *motherofall_tlsInfo = NULL;
  46 pid_t motherpid = 0;
  47 sigset_t sigpending_global;
  48 Thread *activeThreads = NULL;
  49 void *saved_sysinfo;
  50 MYINFO_GS_T myinfo_gs __attribute__ ((visibility ("hidden")));
  51 
  52 
  53 static Thread *threads_freelist = NULL;
  54 static pthread_mutex_t threadlistLock = PTHREAD_MUTEX_INITIALIZER;
  55 static pthread_mutex_t threadStateLock = PTHREAD_MUTEX_INITIALIZER;
  56 
  57 static pthread_rwlock_t *threadResumeLock = NULL;
  58 
  59 static __thread Thread *curThread = NULL;
  60 static Thread *ckptThread = NULL;
  61 static int numUserThreads = 0;
  62 static int originalstartup;
  63 
  64 static sem_t sem_start;
  65 static sem_t semNotifyCkptThread;
  66 static sem_t semWaitForCkptThreadSignal;
  67 
  68 static void *checkpointhread (void *dummy);
  69 static void suspendThreads();
  70 static void stopthisthread(int sig);
  71 static int restarthread(void *threadv);
  72 static int Thread_UpdateState(Thread *th,
  73                               ThreadState newval,
  74                               ThreadState oldval);
  75 static void Thread_SaveSigState(Thread *th);
  76 static void Thread_RestoreSigState(Thread *th);
  77 
  78 /*****************************************************************************
  79  *
  80  * Lock and unlock the 'activeThreads' list
  81  *
  82  *****************************************************************************/
  83 static void lock_threads (void) {
  84   JASSERT(_real_pthread_mutex_lock(&threadlistLock) == 0) (JASSERT_ERRNO);
  85 }
  86 static void unlk_threads (void) {
  87   JASSERT(_real_pthread_mutex_unlock(&threadlistLock) == 0) (JASSERT_ERRNO);
  88 }
  89 
  90 /*****************************************************************************
  91  *
  92  * We will use the region beyond the end of stack for our temporary stack.
  93  * glibc sigsetjmp will mangle pointers;  We need the unmangled pointer.
  94  * So, we can't rely on parsing the jmpbuf for the saved sp.
  95  *
  96  *****************************************************************************/
  97 static void save_sp(void **sp)
  98 {
  99 #if defined(__i386__) || defined(__x86_64__)
 100   asm volatile (CLEAN_FOR_64_BIT(mov %%esp,%0)
 101                 : "=g" (*sp)
 102                 : : "memory");
 103 #elif defined(__arm__) || defined(__aarch64__)
 104   asm volatile ("mov %0,sp"
 105                 : "=r" (*sp)
 106                 : : "memory");
 107 #else
 108 # error "assembly instruction not translated"
 109 #endif
 110 }
 111 
 112 /*****************************************************************************
 113  *
 114  * Get _real_ tid/pid
 115  *
 116  *****************************************************************************/
 117 
 118 /*****************************************************************************
 119  *
 120  * New process. Empty the activeThreads list
 121  *
 122  *****************************************************************************/
 123 void ThreadList::resetOnFork()
 124 {
 125   lock_threads();
 126   while (activeThreads != NULL) {
 127     ThreadList::threadIsDead(activeThreads); // takes care of updating "activeThreads" ptr.
 128   }
 129   unlk_threads();
 130 }
 131 
 132 /*****************************************************************************
 133  *
 134  *  This routine must be called at startup time to initiate checkpointing
 135  *
 136  *****************************************************************************/
 137 void ThreadList::init()
 138 {
 139   /* Save this process's pid.  Then verify that the TLS has it where it should
 140    * be. When we do a restore, we will have to modify each thread's TLS with the
 141    * new motherpid. We also assume that GS uses the first GDT entry for its
 142    * descriptor.
 143    */
 144 
 145   /* libc/getpid can lie if we had used kernel fork() instead of libc fork(). */
 146   motherpid = THREAD_REAL_TID();
 147   TLSInfo_VerifyPidTid(motherpid, motherpid);
 148 
 149   SigInfo::setupCkptSigHandler(&stopthisthread);
 150 
 151   /* Set up caller as one of our threads so we can work on it */
 152   motherofall = ThreadList::getNewThread();
 153   motherofall_saved_sp = &motherofall->saved_sp;
 154   motherofall_tlsInfo = &motherofall->tlsInfo;
 155   updateTid(motherofall);
 156 
 157   sem_init(&sem_start, 0, 0);
 158   sem_init(&semNotifyCkptThread, 0, 0);
 159   sem_init(&semWaitForCkptThreadSignal, 0, 0);
 160 
 161   originalstartup = 1;
 162   pthread_t checkpointhreadid;
 163   /* Spawn off a thread that will perform the checkpoints from time to time */
 164   JASSERT(pthread_create(&checkpointhreadid, NULL, checkpointhread, NULL) == 0);
 165 
 166   /* Stop until checkpoint thread has finished initializing.
 167    * Some programs (like gcl) implement their own glibc functions in
 168    * a non-thread-safe manner.  In case we're using non-thread-safe glibc,
 169    * don't run the checkpoint thread and user thread at the same time.
 170    */
 171   errno = 0;
 172   while (-1 == sem_wait(&sem_start) && errno == EINTR)
 173     errno = 0;
 174   sem_destroy(&sem_start);
 175 }
 176 
 177 /*****************************************************************************
 178  *
 179  *****************************************************************************/
 180 void ThreadList::initThread(Thread* th, int (*fn)(void*), void *arg, int flags,
 181                             int *ptid, int *ctid)
 182 {
 183   /* Save exactly what the caller is supplying */
 184   th->fn    = fn;
 185   th->arg   = arg;
 186   th->flags = flags;
 187   th->ptid  = ptid;
 188   th->ctid  = ctid;
 189   th->next  = NULL;
 190   th->state = ST_RUNNING;
 191 
 192   /* libpthread may recycle the thread stacks after the thread exits (due to
 193    * return, pthread_exit, or pthread_cancel) by reusing them for a different
 194    * thread created by a subsequent call to pthread_create().
 195    *
 196    * Part of thread-stack also contains the "struct pthread" with pid and tid
 197    * as member fields. While reusing the stack for the new thread, the tid
 198    * field is reset but the pid field is left unchanged (under the assumption
 199    * that pid never changes). This causes a problem if the thread exited before
 200    * checkpoint and the new thread is created after restart and hence the pid
 201    * field contains the wrong value (pre-ckpt pid as opposed to current-pid).
 202    *
 203    * The solution is to put the motherpid in the pid slot every time a new
 204    * thread is created to make sure that struct pthread has the correct value.
 205    */
 206   TLSInfo_UpdatePid();
 207 }
 208 
 209 /*****************************************************************************
 210  *
 211  * Thread exited/exiting.
 212  *
 213  *****************************************************************************/
 214 void ThreadList::threadExit()
 215 {
 216   curThread->state = ST_ZOMBIE;
 217 }
 218 
 219 /*****************************************************************************
 220  *
 221  *****************************************************************************/
 222 void ThreadList::updateTid(Thread *th)
 223 {
 224   if (curThread == NULL)
 225     curThread = th;
 226   th->tid = THREAD_REAL_TID();
 227   th->virtual_tid = dmtcp_gettid();
 228   JTRACE("starting thread") (th->tid) (th->virtual_tid);
 229   // Check and remove any thread descriptor which has the same tid as ours.
 230   // Also, remove any dead threads from the list.
 231   ThreadList::addToActiveList();
 232 }
 233 
 234 /*************************************************************************
 235  *
 236  *  Send a signal to ckpt-thread to wake it up from select call and exit.
 237  *
 238  *************************************************************************/
 239 void ThreadList::killCkpthread()
 240 {
 241   JTRACE("Kill checkpinthread") (ckptThread->tid);
 242   THREAD_TGKILL(motherpid, ckptThread->tid, SigInfo::ckptSignal());
 243 }
 244 
 245 /*************************************************************************
 246  *
 247  *  Prepare MTCP Header
 248  *
 249  *************************************************************************/
 250 static void prepareMtcpHeader(MtcpHeader *mtcpHdr)
 251 {
 252   memset(mtcpHdr, 0, sizeof(*mtcpHdr));
 253   strncpy(mtcpHdr->signature, MTCP_SIGNATURE, strlen(MTCP_SIGNATURE) + 1);
 254   mtcpHdr->saved_brk = sbrk(0);
 255   // TODO: Now that we have a separate mtcp dir, the code dealing with
 256   // restoreBuf should go in there.
 257   mtcpHdr->restore_addr = (void*) ProcessInfo::instance().restoreBufAddr();
 258   mtcpHdr->restore_size = ProcessInfo::instance().restoreBufLen();
 259 
 260   mtcpHdr->vdsoStart = (void*) ProcessInfo::instance().vdsoStart();
 261   mtcpHdr->vdsoEnd = (void*) ProcessInfo::instance().vdsoEnd();
 262   mtcpHdr->vvarStart = (void*) ProcessInfo::instance().vvarStart();
 263   mtcpHdr->vvarEnd = (void*) ProcessInfo::instance().vvarEnd();
 264 
 265   mtcpHdr->post_restart = &ThreadList::postRestart;
 266   memcpy(&mtcpHdr->motherofall_tls_info,
 267          &motherofall->tlsInfo,
 268          sizeof(motherofall->tlsInfo));
 269   mtcpHdr->tls_pid_offset = TLSInfo_GetPidOffset();
 270   mtcpHdr->tls_tid_offset = TLSInfo_GetTidOffset();
 271   mtcpHdr->myinfo_gs = myinfo_gs;
 272 }
 273 
 274 /*************************************************************************
 275  *
 276  *  This executes as a thread.  It sleeps for the checkpoint interval
 277  *    seconds, then wakes to write the checkpoint file.
 278  *
 279  *************************************************************************/
 280 static void *checkpointhread (void *dummy)
 281 {
 282   /* This is the start function of the checkpoint thread.
 283    * We also call sigsetjmp/getcontext to get a snapshot of this call frame,
 284    * since we will never exit this call frame.  We always return
 285    * to this call frame at time of startup, on restart.  Hence, restart
 286    * will forget any modifications to our local variables since restart.
 287    */
 288 
 289   ckptThread = curThread;
 290   ckptThread->state = ST_CKPNTHREAD;
 291 
 292   /* For checkpoint thread, we want to block delivery of all but some special
 293    * signals
 294    */
 295   {
 296     /*
 297      * For the checkpoint thread, we should not block SIGSETXID which is used
 298      * by the setsid family of system calls to change the session leader. Glibc
 299      * uses this signal to notify the process threads of the change in session
 300      * leader information. This signal is not documented and is used internally
 301      * by glibc. It is defined in <glibc-src-root>/nptl/pthreadP.h
 302      * screen was getting affected by this since it used setsid to change the
 303      * session leaders.
 304      * Similarly, SIGCANCEL/SIGTIMER is undocumented, but used by glibc.
 305      */
 306 #define SIGSETXID (__SIGRTMIN + 1)
 307 #define SIGCANCEL (__SIGRTMIN) /* aka SIGTIMER */
 308     sigset_t set;
 309 
 310     sigfillset(&set);
 311     sigdelset(&set, SIGSETXID);
 312     sigdelset(&set, SIGCANCEL);
 313 
 314     JASSERT(pthread_sigmask(SIG_SETMASK, &set, NULL) == 0);
 315   }
 316 
 317   Thread_SaveSigState(ckptThread);
 318   TLSInfo_SaveTLSState(&ckptThread->tlsInfo);
 319   /* Release user thread after we've initialized. */
 320   sem_post(&sem_start);
 321 
 322   /* Set up our restart point.  I.e., we get jumped to here after a restore. */
 323 #ifdef SETJMP
 324   JASSERT(sigsetjmp(ckptThread->jmpbuf, 1) >= 0) (JASSERT_ERRNO);
 325 #else
 326   JASSERT(getcontext(&ckptThread->savctx) == 0) (JASSERT_ERRNO);
 327 #endif
 328   save_sp(&ckptThread->saved_sp);
 329   JTRACE("after sigsetjmp/getcontext")
 330     (curThread->tid) (curThread->virtual_tid) (curThread->saved_sp);
 331 
 332   if (originalstartup) {
 333     originalstartup = 0;
 334   } else {
 335     /* We are being restored.  Wait for all other threads to finish being
 336      * restored before resuming checkpointing.
 337      */
 338     JTRACE("waiting for other threads after restore");
 339     ThreadList::waitForAllRestored(ckptThread);
 340     JTRACE("resuming after restore");
 341   }
 342 
 343   /* This is a sleep-checkpoint-resume loop by the checkpoint thread.
 344    * On restart, we arrive back at getcontext, above, and then re-enter the loop.
 345    */
 346   while (1) {
 347     /* Wait a while between writing checkpoint files */
 348     JTRACE("before callbackSleepBetweenCheckpoint(0)");
 349     callbackSleepBetweenCheckpoint(0);
 350 
 351     restoreInProgress = 0;
 352 
 353     // We need to reinitialize the lock.
 354     pthread_rwlock_t rwLock = PTHREAD_RWLOCK_INITIALIZER;
 355     threadResumeLock = &rwLock;
 356     JASSERT(_real_pthread_rwlock_wrlock(threadResumeLock) == 0) (JASSERT_ERRNO);
 357 
 358     suspendThreads();
 359     SigInfo::saveSigHandlers();
 360     /* Do this once, same for all threads.  But restore for each thread. */
 361     if (TLSInfo_HaveThreadSysinfoOffset())
 362       saved_sysinfo = TLSInfo_GetThreadSysinfo();
 363 
 364     /* All other threads halted in 'stopthisthread' routine (they are all
 365      * in state ST_SUSPENDED).  It's safe to write checkpoint file now.
 366      */
 367 
 368     // Update generation, in case user callback calls dmtcp_get_generation().
 369     uint32_t computation_generation =
 370                SharedData::getCompId()._computation_generation;
 371     ProcessInfo::instance().set_generation(computation_generation);
 372 
 373     JTRACE("before callbackSleepBetweenCheckpoint(0)");
 374     callbackPreCheckpoint();
 375 
 376     // Remove stale threads from activeThreads list.
 377     ThreadList::emptyFreeList();
 378 
 379     MtcpHeader mtcpHdr;
 380     prepareMtcpHeader(&mtcpHdr);
 381     /* That's it, folks.  We just did the checkpoint.  After this, we will meet
 382      *   on the flip side of checkpoint.
 383      */
 384     CkptSerializer::writeCkptImage(&mtcpHdr, sizeof(mtcpHdr));
 385 
 386     /* NOTE: This code is only for the checkpoint thread.  If you're looking for
 387      *      what the user threads do at checkpoint time, see:  stopthisthread()
 388      *
 389      * There are two ways for the checkpoint thread to return from a checkpoint:
 390      *                 resume and restart
 391      * If we're here, we just resume'd after checkpoint.  It's the same process.
 392      * If we chose checkpoint, 'bin/mtcp_restart' created a new process.  The
 393      *   source code is in 'src/mtcp'.  The program 'bin/mtcp_restart' will map
 394      *   our memory into the new process, and then meet us back here by calling
 395      *   the function specified by 'mtcpHdr->post_restart':
 396      *                                        ThreadList::postRestart().
 397      *   Actually, postRestart() will start the user threads and then call
 398      *   restarthread() for the 'motherofall' thread.  Then, restarthread()
 399      *   will call setcontext(), in order to arrive back at getcontext() here
 400      *   in this function, just before the 'while(1)' loop.
 401      * FIXME:  The 'motherofall' thread is the primary thread of the process.
 402      *   On launch, 'motherofall' was the user thread exeicuting main().
 403      *   and the checkpoint thread was the second thread.  But now,
 404      *   motherofall will be the checkpoint thread.  Why do we switch at the
 405      *   time of restart?  Should we fix this?
 406      */
 407     JTRACE("before callbackPostCheckpoint(0, NULL)");
 408     callbackPostCheckpoint(0, NULL);
 409 
 410     /* Resume all threads. */
 411     JTRACE("resuming everything");
 412     JASSERT(_real_pthread_rwlock_unlock(threadResumeLock) == 0) (JASSERT_ERRNO);
 413     JTRACE("everything resumed");
 414   }
 415   return NULL;
 416 }
 417 
 418 static void suspendThreads()
 419 {
 420   int needrescan;
 421   Thread *thread;
 422   Thread *next;
 423 
 424   /* Halt all other threads - force them to call stopthisthread
 425    * If any have blocked checkpointing, wait for them to unblock before
 426    * signalling
 427    */
 428   lock_threads();
 429   do {
 430     needrescan = 0;
 431     numUserThreads = 0;
 432     for (thread = activeThreads; thread != NULL; thread = next) {
 433       next = thread->next;
 434       int ret;
 435       /* Do various things based on thread's state */
 436       switch (thread->state) {
 437 
 438         case ST_RUNNING:
 439           /* Thread is running. Send it a signal so it will call stopthisthread.
 440            * We will need to rescan (hopefully it will be suspended by then)
 441            */
 442           if (Thread_UpdateState(thread, ST_SIGNALED, ST_RUNNING)) {
 443             if (THREAD_TGKILL(motherpid, thread->tid, SigInfo::ckptSignal()) < 0) {
 444               JASSERT(errno == ESRCH) (JASSERT_ERRNO) (thread->tid)
 445                 .Text("error signalling thread");
 446               ThreadList::threadIsDead(thread);
 447             } else {
 448               needrescan = 1;
 449             }
 450           }
 451           break;
 452 
 453         case ST_ZOMBIE:
 454           ret = THREAD_TGKILL(motherpid, thread->tid, 0);
 455           JASSERT(ret == 0 || errno == ESRCH);
 456           if (ret == -1 && errno == ESRCH) {
 457             ThreadList::threadIsDead(thread);
 458           }
 459           break;
 460 
 461         case ST_SIGNALED:
 462           if (THREAD_TGKILL(motherpid, thread->tid, 0) == -1 && errno == ESRCH) {
 463             ThreadList::threadIsDead(thread);
 464           } else {
 465             needrescan = 1;
 466           }
 467           break;
 468 
 469         case ST_SUSPINPROG:
 470           numUserThreads++;
 471           break;
 472 
 473         case ST_SUSPENDED:
 474           numUserThreads++;
 475           break;
 476 
 477         case ST_CKPNTHREAD:
 478           break;
 479 
 480         default:
 481           JASSERT(false);
 482       }
 483     }
 484     if (needrescan) usleep(10);
 485   } while (needrescan);
 486   unlk_threads();
 487 
 488   for (int i = 0; i < numUserThreads; i++) {
 489     sem_wait(&semNotifyCkptThread);
 490   }
 491 
 492   JASSERT(activeThreads != NULL);
 493   JTRACE("everything suspended") (numUserThreads);
 494 }
 495 
 496 /*************************************************************************
 497  *
 498  *  Signal handler for user threads.
 499  *
 500  *************************************************************************/
 501 void stopthisthread (int signum)
 502 {
 503   // If this is checkpoint thread, exit immediately
 504   if (curThread == ckptThread) return;
 505 
 506   /* Possible state change scenarios:
 507    * 1. STOPSIGNAL received from ckpt-thread. In this case, the ckpt-thread
 508    * already changed the state to ST_SIGNALED. No need to check for locks.
 509    * Proceed normally.
 510    *
 511    * 2. STOPSIGNAL received from Superior thread. In this case we change the
 512    * state to ST_SIGNALED, if currently in ST_RUNNING. If we are holding
 513    * any locks (callback_holds_any_locks), we return from the signal handler.
 514    *
 515    * 3. STOPSIGNAL raised by this thread itself, after releasing all the locks.
 516    * In this case, we had already changed the state to ST_SIGNALED as a
 517    * result of step (2), so the ckpt-thread will never send us a signal.
 518    *
 519    * 4. STOPSIGNAL received from Superior thread. Ckpt-threads sends a signal
 520    * before we had a chance to change state from ST_RUNNING ->
 521    * ST_SIGNALED. This puts the STOPSIGNAL in the queue. The ckpt-thread will
 522    * later call sigaction(STOPSIGNAL, SIG_IGN) followed by
 523    * sigaction(STOPSIGNAL, stopthisthread) to discard all pending signals.
 524    */
 525   if (Thread_UpdateState(curThread, ST_SIGNALED, ST_RUNNING)) {
 526     int retval;
 527     callbackHoldsAnyLocks(&retval);
 528     if (retval) return;
 529   }
 530 
 531   // make sure we don't get called twice for same thread
 532   if (Thread_UpdateState(curThread, ST_SUSPINPROG, ST_SIGNALED)) {
 533     Thread_SaveSigState(curThread); // save sig state (and block sig delivery)
 534     TLSInfo_SaveTLSState(&curThread->tlsInfo); // save thread local storage state
 535 
 536     /* Set up our restart point, ie, we get jumped to here after a restore */
 537 #ifdef SETJMP
 538     JASSERT(sigsetjmp(curThread->jmpbuf, 1) >= 0);
 539 #else
 540     JASSERT(getcontext(&curThread->savctx) == 0);
 541 #endif
 542     save_sp(&curThread->saved_sp);
 543 
 544     JTRACE("Thread after sigsetjmp/getcontext")
 545       (curThread->tid) (curThread->virtual_tid)
 546       (curThread->saved_sp) (__builtin_return_address(0));
 547 
 548     if (!restoreInProgress) {
 549       /* We are a user thread and all context is saved.
 550        * Wait for ckpt thread to write ckpt, and resume.
 551        */
 552 
 553       /* This sets a static variable in dmtcp.  It must be passed
 554        * from this user thread to ckpt thread before writing ckpt image
 555        */
 556       if (dmtcp_ptrace_enabled == NULL) {
 557         callbackPreSuspendUserThread();
 558       }
 559 
 560       /* Tell the checkpoint thread that we're all saved away */
 561       JASSERT(Thread_UpdateState(curThread, ST_SUSPENDED, ST_SUSPINPROG));
 562       sem_post(&semNotifyCkptThread);
 563 
 564       /* This sets a static variable in dmtcp.  It must be passed
 565        * from this user thread to ckpt thread before writing ckpt image
 566        */
 567       if (dmtcp_ptrace_enabled != NULL && dmtcp_ptrace_enabled()) {
 568         callbackPreSuspendUserThread();
 569       }
 570 
 571       /* Then wait for the ckpt thread to write the ckpt file then wake us up */
 572       JTRACE("User thread suspended") (curThread->tid);
 573 
 574       // We can't use sem_wait here because sem_wait registers a cleanup
 575       // handler before going into blocking wait. The handler is popped before
 576       // returning from it. However, on restart, the thread will do a longjump
 577       // and thus will never come out of the sem_wait, thus the handler is
 578       // never popped. This causes a problem later on during pthread_exit. The
 579       // pthread_exit routine executes all registered cleanup handlers.
 580       // However, the sem_wait cleanup handler is now invalid and thus we get a
 581       // segfault.
 582       // The change in sem_wait behavior was first introduce in glibc 2.21.
 583       JASSERT(_real_pthread_rwlock_rdlock(threadResumeLock) == 0)
 584         (JASSERT_ERRNO);
 585       JASSERT(_real_pthread_rwlock_unlock(threadResumeLock) == 0)
 586         (JASSERT_ERRNO);
 587 
 588       JTRACE("User thread resuming") (curThread->tid);
 589     } else {
 590       /* Else restoreinprog >= 1;  This stuff executes to do a restart */
 591       ThreadList::waitForAllRestored(curThread);
 592       JTRACE("User thread restored") (curThread->tid);
 593     }
 594 
 595     JASSERT(Thread_UpdateState(curThread, ST_RUNNING, ST_SUSPENDED));
 596 
 597 
 598     callbackPreResumeUserThread(restoreInProgress);
 599     JTRACE("User thread returning to user code")
 600       (curThread->tid) (__builtin_return_address(0));
 601   }
 602 }
 603 
 604 /*****************************************************************************
 605  *
 606  *  Wait for all threads to finish restoring their context, then release them
 607  *  all to continue on their way.
 608  *
 609  *****************************************************************************/
 610 void ThreadList::waitForAllRestored(Thread *thread)
 611 {
 612   if (thread == ckptThread) {
 613     int i;
 614     for (i = 0; i < numUserThreads; i++) {
 615       sem_wait(&semNotifyCkptThread);
 616     }
 617 
 618     JTRACE("before callback_post_ckpt(1=restarting)");
 619     callbackPostCheckpoint(1, NULL); //mtcp_restoreargv_start_addr);
 620     JTRACE("after callback_post_ckpt(1=restarting)");
 621 
 622     SigInfo::restoreSigHandlers();
 623 
 624     /* raise the signals which were pending for the entire process at the time
 625      * of checkpoint. It is assumed that if a signal is pending for all threads
 626      * including the ckpt-thread, then it was sent to the process as opposed to
 627      * sent to individual threads.
 628      */
 629     for (i = SIGRTMAX; i > 0; --i) {
 630       if (sigismember(&sigpending_global, i) == 1) {
 631         kill(getpid(), i);
 632       }
 633     }
 634 
 635     // if this was last of all, wake everyone up
 636     for (i = 0; i < numUserThreads; i++) {
 637       sem_post(&semWaitForCkptThreadSignal);
 638     }
 639   } else {
 640     sem_post(&semNotifyCkptThread);
 641     sem_wait(&semWaitForCkptThreadSignal);
 642     Thread_RestoreSigState(thread);
 643   }
 644 }
 645 
 646 /*****************************************************************************
 647  *
 648  *****************************************************************************/
 649 void ThreadList::postRestart(void)
 650 {
 651   Thread *thread;
 652   sigset_t tmp;
 653 
 654   /* If DMTCP_RESTART_PAUSE set, sleep 15 seconds and allow gdb attach. */
 655   if (getenv("MTCP_RESTART_PAUSE") || getenv("DMTCP_RESTART_PAUSE")) {
 656 #ifdef HAS_PR_SET_PTRACER
 657     prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0); // Allow 'gdb attach'
 658 #endif
 659     struct timespec delay = {15, 0}; /* 15 seconds */
 660     printf("Pausing 15 seconds. Do:  gdb <PROGNAME> %ld\n",
 661            (long)THREAD_REAL_TID());
 662     nanosleep(&delay, NULL);
 663 #ifdef HAS_PR_SET_PTRACER
 664     prctl(PR_SET_PTRACER, 0, 0, 0, 0); ; // Revert permission to default.
 665 #endif
 666   }
 667 
 668   /* Fill in the new mother process id */
 669   motherpid = THREAD_REAL_TID();
 670   motherofall->tid = motherpid;
 671 
 672   restoreInProgress = 1;
 673 
 674   sigfillset(&tmp);
 675   for (thread = activeThreads; thread != NULL; thread = thread->next) {
 676     struct MtcpRestartThreadArg mtcpRestartThreadArg;
 677     sigandset(&sigpending_global, &tmp, &(thread->sigpending));
 678     tmp = sigpending_global;
 679 
 680     if (thread == motherofall) continue;
 681 
 682     /* DMTCP needs to know virtual_tid of the thread being recreated by the
 683      *  following clone() call.
 684      *
 685      * Threads are created by using syscall which is intercepted by DMTCP and
 686      *  the virtual_tid is sent to DMTCP as a field of MtcpRestartThreadArg
 687      *  structure. DMTCP will automatically extract the actual argument
 688      *  (clonearg->arg) from clone_arg and will pass it on to the real
 689      *  clone call.
 690      */
 691     void *clonearg = thread;
 692     if (dmtcp_real_to_virtual_pid != NULL) {
 693       mtcpRestartThreadArg.arg = thread;
 694       mtcpRestartThreadArg.virtualTid = thread->virtual_tid;
 695       clonearg = &mtcpRestartThreadArg;
 696     }
 697 
 698     /* Create the thread so it can finish restoring itself. */
 699     pid_t tid = _real_clone(restarthread,
 700                             // -128 for red zone
 701                             (void*)((char*)thread->saved_sp - 128),
 702                             /* Don't do CLONE_SETTLS (it'll puke).  We do it
 703                              * later via restoreTLSState. */
 704                             thread->flags & ~CLONE_SETTLS,
 705                             clonearg, thread->ptid, NULL, thread->ctid);
 706 
 707     JASSERT (tid > 0); // (JASSERT_ERRNO) .Text("Error recreating thread");
 708     JTRACE("Thread recreated") (thread->tid) (tid);
 709   }
 710   restarthread (motherofall);
 711 }
 712 
 713 /*****************************************************************************
 714  *
 715  *****************************************************************************/
 716 static int restarthread (void *threadv)
 717 {
 718   Thread *thread = (Thread*) threadv;
 719   thread->tid = THREAD_REAL_TID();
 720   // This function and related ones are defined in src/mtcp/restore_libc.c
 721   TLSInfo_RestoreTLSState(&thread->tlsInfo);
 722 
 723   if (TLSInfo_HaveThreadSysinfoOffset())
 724     TLSInfo_SetThreadSysinfo(saved_sysinfo);
 725 
 726   /* Jump to the stopthisthread routine just after sigsetjmp/getcontext call.
 727    * Note that if this is the restored checkpointhread, it jumps to the
 728    * checkpointhread routine
 729    */
 730   JTRACE("calling siglongjmp/setcontext") (thread->tid) (thread->virtual_tid);
 731 #ifdef SETJMP
 732   siglongjmp(thread->jmpbuf, 1); /* Shouldn't return */
 733 #else
 734   setcontext(&thread->savctx); /* Shouldn't return */
 735 #endif
 736   JASSERT(false);
 737   return (0); /* NOTREACHED : stop compiler warning */
 738 }
 739 
 740 /*****************************************************************************
 741  *
 742  *****************************************************************************/
 743 int Thread_UpdateState(Thread *th, ThreadState newval, ThreadState oldval)
 744 {
 745   int res = 0;
 746   JASSERT(_real_pthread_mutex_lock(&threadStateLock) == 0);
 747   if (oldval == th->state) {;
 748     th->state = newval;
 749     res = 1;
 750   }
 751   JASSERT(_real_pthread_mutex_unlock(&threadStateLock) == 0);
 752   return res;
 753 }
 754 
 755 /*****************************************************************************
 756  *
 757  *  Save signal mask and list of pending signals delivery
 758  *
 759  *****************************************************************************/
 760 void Thread_SaveSigState(Thread *th)
 761 {
 762   // Save signal block mask
 763   JASSERT(pthread_sigmask (SIG_SETMASK, NULL, &th->sigblockmask) == 0);
 764 
 765   // Save pending signals
 766   sigpending(&th->sigpending);
 767 }
 768 
 769 /*****************************************************************************
 770  *
 771  *  Restore signal mask and all pending signals
 772  *
 773  *****************************************************************************/
 774 void Thread_RestoreSigState (Thread *th)
 775 {
 776   int i;
 777   JTRACE("restoring signal mask for thread") (th->virtual_tid);
 778   JASSERT(pthread_sigmask (SIG_SETMASK, &th->sigblockmask, NULL) == 0);
 779 
 780   // Raise the signals which were pending for only this thread at the time of
 781   // checkpoint.
 782   for (i = SIGRTMAX; i > 0; --i) {
 783     if (sigismember(&th->sigpending, i)  == 1  &&
 784         sigismember(&th->sigblockmask, i) == 1 &&
 785         sigismember(&sigpending_global, i) == 0 &&
 786         i != dmtcp_get_ckpt_signal()) {
 787       if (i != SIGCHLD) {
 788         JNOTE("\n*** WARNING:  SIGCHLD was delivered prior to ckpt.\n"
 789                "*** Will raise it on restart.  If not desired, change\n"
 790                "*** this line raising SIGCHLD.");
 791       }
 792       raise(i);
 793     }
 794   }
 795 }
 796 
 797 
 798 /*****************************************************************************
 799  *
 800  * If there is a thread descriptor with the same tid, it must be from a dead
 801  * thread. Remove it now.
 802  *
 803  *****************************************************************************/
 804 void ThreadList::addToActiveList()
 805 {
 806   int tid;
 807   Thread *thread;
 808   Thread *next_thread;
 809 
 810   lock_threads();
 811 
 812   tid = curThread->tid;
 813   JASSERT (tid != 0);
 814 
 815   // First remove duplicate descriptors.
 816   for (thread = activeThreads; thread != NULL; thread = next_thread) {
 817     next_thread = thread->next;
 818     if (thread != curThread && thread->tid == tid) {
 819       JTRACE("Removing duplicate thread descriptor")
 820         (thread->tid) (thread->virtual_tid);
 821       // There will be at most one duplicate descriptor.
 822       threadIsDead(thread);
 823       continue;
 824     }
 825     /* NOTE:  ST_ZOMBIE is used only for the sake of efficiency.  We
 826      *   test threads in state ST_ZOMBIE using tgkill to remove them
 827      *   early (before reaching a checkpoint) so that the
 828      *   threadrdescriptor list does not grow too long.
 829      */
 830     if (thread->state == ST_ZOMBIE) {
 831       /* if no thread with this tid, then we can remove zombie descriptor */
 832       if (-1 == THREAD_TGKILL(motherpid, thread->tid, 0)) {
 833         JTRACE("Killing zombie thread") (thread->tid);
 834         threadIsDead(thread);
 835       }
 836     }
 837   }
 838 
 839   curThread->next = activeThreads;
 840   curThread->prev = NULL;
 841   if (activeThreads != NULL) {
 842     activeThreads->prev = curThread;
 843   }
 844   activeThreads = curThread;
 845 
 846   unlk_threads();
 847   return;
 848 }
 849 
 850 /*****************************************************************************
 851  *
 852  *  Thread has exited - move it from activeThreads list to freelist.
 853  *
 854  *  threadisdead() used to free() the Thread struct before returning. However,
 855  *  if we do that while in the middle of a checkpoint, the call to free() might
 856  *  deadlock in JAllocator. For this reason, we put the to-be-removed threads
 857  *  on this threads_freelist and call free() only when it is safe to do so.
 858  *
 859  *  This has an added benefit of reduced number of calls to malloc() as the
 860  *  Thread structs in the freelist can be recycled.
 861  *
 862  *****************************************************************************/
 863 void ThreadList::threadIsDead (Thread *thread)
 864 {
 865   JASSERT(thread != NULL);
 866   JTRACE("Putting thread on freelist") (thread->tid);
 867 
 868   /* Remove thread block from 'threads' list */
 869   if (thread->prev != NULL) {
 870     thread->prev->next = thread->next;
 871   }
 872   if (thread->next != NULL) {
 873     thread->next->prev = thread->prev;
 874   }
 875   if (thread == activeThreads) {
 876     activeThreads = activeThreads->next;
 877   }
 878 
 879   thread->next = threads_freelist;
 880   threads_freelist = thread;
 881 }
 882 
 883 /*****************************************************************************
 884  *
 885  * Return thread from freelist.
 886  *
 887  *****************************************************************************/
 888 Thread *ThreadList::getNewThread()
 889 {
 890   Thread *thread;
 891 
 892   lock_threads();
 893   if (threads_freelist == NULL) {
 894     thread = (Thread*) JALLOC_HELPER_MALLOC(sizeof(Thread));
 895     JASSERT(thread != NULL);
 896   } else {
 897     thread = threads_freelist;
 898     threads_freelist = threads_freelist->next;
 899   }
 900   unlk_threads();
 901   memset(thread, 0, sizeof (*thread));
 902   return thread;
 903 }
 904 
 905 /*****************************************************************************
 906  *
 907  * Call free() on all threads_freelist items
 908  *
 909  *****************************************************************************/
 910 void ThreadList::emptyFreeList()
 911 {
 912   lock_threads();
 913 
 914   while (threads_freelist != NULL) {
 915     Thread *thread = threads_freelist;
 916     threads_freelist = threads_freelist->next;
 917     JALLOC_HELPER_FREE(thread);
 918   }
 919 
 920   unlk_threads();
 921 }
 922 

/* [<][>][^][v][top][bottom][index][help] */