root/threadlist.cpp
/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- lock_threads
- unlk_threads
- save_sp
- resetOnFork
- init
- initThread
- threadExit
- updateTid
- killCkpthread
- prepareMtcpHeader
- checkpointhread
- suspendThreads
- stopthisthread
- waitForAllRestored
- postRestart
- restarthread
- Thread_UpdateState
- Thread_SaveSigState
- Thread_RestoreSigState
- addToActiveList
- threadIsDead
- getNewThread
- emptyFreeList
1 #include <pthread.h>
2 #include <signal.h>
3 #include <sys/types.h>
4 #include <sys/syscall.h>
5 #include <unistd.h>
6 #include <semaphore.h>
7 #include <sys/resource.h>
8 #include <linux/version.h>
9 #include "config.h"
10 #ifdef HAS_PR_SET_PTRACER
11 # include <sys/prctl.h>
12 #endif
13 #include "threadlist.h"
14 #include "siginfo.h"
15 #include "dmtcpalloc.h"
16 #include "syscallwrappers.h"
17 #include "mtcpinterface.h"
18 #include "ckptserializer.h"
19 #include "uniquepid.h"
20 #include "jalloc.h"
21 #include "jassert.h"
22 #include "util.h"
23 #include "mtcp/mtcp_header.h"
24
25 // For i386 and x86_64, SETJMP currently has bugs. Don't turn this
26 // on for them until they are debugged.
27 // Default is to use setcontext/getcontext.
28 #if defined(__arm__) || defined(__aarch64__)
29 # define SETJMP /* setcontext/getcontext not defined for ARM glibc */
30 #endif
31
32 #ifdef SETJMP
33 # include <setjmp.h>
34 #else
35 # include <ucontext.h>
36 #endif
37
38
39 using namespace dmtcp;
40
41 //Globals
42 volatile int restoreInProgress = 0;
43 Thread *motherofall = NULL;
44 void **motherofall_saved_sp = NULL;
45 ThreadTLSInfo *motherofall_tlsInfo = NULL;
46 pid_t motherpid = 0;
47 sigset_t sigpending_global;
48 Thread *activeThreads = NULL;
49 void *saved_sysinfo;
50 MYINFO_GS_T myinfo_gs __attribute__ ((visibility ("hidden")));
51
52
53 static Thread *threads_freelist = NULL;
54 static pthread_mutex_t threadlistLock = PTHREAD_MUTEX_INITIALIZER;
55 static pthread_mutex_t threadStateLock = PTHREAD_MUTEX_INITIALIZER;
56
57 static pthread_rwlock_t *threadResumeLock = NULL;
58
59 static __thread Thread *curThread = NULL;
60 static Thread *ckptThread = NULL;
61 static int numUserThreads = 0;
62 static int originalstartup;
63
64 static sem_t sem_start;
65 static sem_t semNotifyCkptThread;
66 static sem_t semWaitForCkptThreadSignal;
67
68 static void *checkpointhread (void *dummy);
69 static void suspendThreads();
70 static void stopthisthread(int sig);
71 static int restarthread(void *threadv);
72 static int Thread_UpdateState(Thread *th,
73 ThreadState newval,
74 ThreadState oldval);
75 static void Thread_SaveSigState(Thread *th);
76 static void Thread_RestoreSigState(Thread *th);
77
78 /*****************************************************************************
79 *
80 * Lock and unlock the 'activeThreads' list
81 *
82 *****************************************************************************/
83 static void lock_threads (void) {
84 JASSERT(_real_pthread_mutex_lock(&threadlistLock) == 0) (JASSERT_ERRNO);
85 }
86 static void unlk_threads (void) {
87 JASSERT(_real_pthread_mutex_unlock(&threadlistLock) == 0) (JASSERT_ERRNO);
88 }
89
90 /*****************************************************************************
91 *
92 * We will use the region beyond the end of stack for our temporary stack.
93 * glibc sigsetjmp will mangle pointers; We need the unmangled pointer.
94 * So, we can't rely on parsing the jmpbuf for the saved sp.
95 *
96 *****************************************************************************/
97 static void save_sp(void **sp)
98 {
99 #if defined(__i386__) || defined(__x86_64__)
100 asm volatile (CLEAN_FOR_64_BIT(mov %%esp,%0)
101 : "=g" (*sp)
102 : : "memory");
103 #elif defined(__arm__) || defined(__aarch64__)
104 asm volatile ("mov %0,sp"
105 : "=r" (*sp)
106 : : "memory");
107 #else
108 # error "assembly instruction not translated"
109 #endif
110 }
111
112 /*****************************************************************************
113 *
114 * Get _real_ tid/pid
115 *
116 *****************************************************************************/
117
118 /*****************************************************************************
119 *
120 * New process. Empty the activeThreads list
121 *
122 *****************************************************************************/
123 void ThreadList::resetOnFork()
124 {
125 lock_threads();
126 while (activeThreads != NULL) {
127 ThreadList::threadIsDead(activeThreads); // takes care of updating "activeThreads" ptr.
128 }
129 unlk_threads();
130 }
131
132 /*****************************************************************************
133 *
134 * This routine must be called at startup time to initiate checkpointing
135 *
136 *****************************************************************************/
137 void ThreadList::init()
138 {
139 /* Save this process's pid. Then verify that the TLS has it where it should
140 * be. When we do a restore, we will have to modify each thread's TLS with the
141 * new motherpid. We also assume that GS uses the first GDT entry for its
142 * descriptor.
143 */
144
145 /* libc/getpid can lie if we had used kernel fork() instead of libc fork(). */
146 motherpid = THREAD_REAL_TID();
147 TLSInfo_VerifyPidTid(motherpid, motherpid);
148
149 SigInfo::setupCkptSigHandler(&stopthisthread);
150
151 /* Set up caller as one of our threads so we can work on it */
152 motherofall = ThreadList::getNewThread();
153 motherofall_saved_sp = &motherofall->saved_sp;
154 motherofall_tlsInfo = &motherofall->tlsInfo;
155 updateTid(motherofall);
156
157 sem_init(&sem_start, 0, 0);
158 sem_init(&semNotifyCkptThread, 0, 0);
159 sem_init(&semWaitForCkptThreadSignal, 0, 0);
160
161 originalstartup = 1;
162 pthread_t checkpointhreadid;
163 /* Spawn off a thread that will perform the checkpoints from time to time */
164 JASSERT(pthread_create(&checkpointhreadid, NULL, checkpointhread, NULL) == 0);
165
166 /* Stop until checkpoint thread has finished initializing.
167 * Some programs (like gcl) implement their own glibc functions in
168 * a non-thread-safe manner. In case we're using non-thread-safe glibc,
169 * don't run the checkpoint thread and user thread at the same time.
170 */
171 errno = 0;
172 while (-1 == sem_wait(&sem_start) && errno == EINTR)
173 errno = 0;
174 sem_destroy(&sem_start);
175 }
176
177 /*****************************************************************************
178 *
179 *****************************************************************************/
180 void ThreadList::initThread(Thread* th, int (*fn)(void*), void *arg, int flags,
181 int *ptid, int *ctid)
182 {
183 /* Save exactly what the caller is supplying */
184 th->fn = fn;
185 th->arg = arg;
186 th->flags = flags;
187 th->ptid = ptid;
188 th->ctid = ctid;
189 th->next = NULL;
190 th->state = ST_RUNNING;
191
192 /* libpthread may recycle the thread stacks after the thread exits (due to
193 * return, pthread_exit, or pthread_cancel) by reusing them for a different
194 * thread created by a subsequent call to pthread_create().
195 *
196 * Part of thread-stack also contains the "struct pthread" with pid and tid
197 * as member fields. While reusing the stack for the new thread, the tid
198 * field is reset but the pid field is left unchanged (under the assumption
199 * that pid never changes). This causes a problem if the thread exited before
200 * checkpoint and the new thread is created after restart and hence the pid
201 * field contains the wrong value (pre-ckpt pid as opposed to current-pid).
202 *
203 * The solution is to put the motherpid in the pid slot every time a new
204 * thread is created to make sure that struct pthread has the correct value.
205 */
206 TLSInfo_UpdatePid();
207 }
208
209 /*****************************************************************************
210 *
211 * Thread exited/exiting.
212 *
213 *****************************************************************************/
214 void ThreadList::threadExit()
215 {
216 curThread->state = ST_ZOMBIE;
217 }
218
219 /*****************************************************************************
220 *
221 *****************************************************************************/
222 void ThreadList::updateTid(Thread *th)
223 {
224 if (curThread == NULL)
225 curThread = th;
226 th->tid = THREAD_REAL_TID();
227 th->virtual_tid = dmtcp_gettid();
228 JTRACE("starting thread") (th->tid) (th->virtual_tid);
229 // Check and remove any thread descriptor which has the same tid as ours.
230 // Also, remove any dead threads from the list.
231 ThreadList::addToActiveList();
232 }
233
234 /*************************************************************************
235 *
236 * Send a signal to ckpt-thread to wake it up from select call and exit.
237 *
238 *************************************************************************/
239 void ThreadList::killCkpthread()
240 {
241 JTRACE("Kill checkpinthread") (ckptThread->tid);
242 THREAD_TGKILL(motherpid, ckptThread->tid, SigInfo::ckptSignal());
243 }
244
245 /*************************************************************************
246 *
247 * Prepare MTCP Header
248 *
249 *************************************************************************/
250 static void prepareMtcpHeader(MtcpHeader *mtcpHdr)
251 {
252 memset(mtcpHdr, 0, sizeof(*mtcpHdr));
253 strncpy(mtcpHdr->signature, MTCP_SIGNATURE, strlen(MTCP_SIGNATURE) + 1);
254 mtcpHdr->saved_brk = sbrk(0);
255 // TODO: Now that we have a separate mtcp dir, the code dealing with
256 // restoreBuf should go in there.
257 mtcpHdr->restore_addr = (void*) ProcessInfo::instance().restoreBufAddr();
258 mtcpHdr->restore_size = ProcessInfo::instance().restoreBufLen();
259
260 mtcpHdr->vdsoStart = (void*) ProcessInfo::instance().vdsoStart();
261 mtcpHdr->vdsoEnd = (void*) ProcessInfo::instance().vdsoEnd();
262 mtcpHdr->vvarStart = (void*) ProcessInfo::instance().vvarStart();
263 mtcpHdr->vvarEnd = (void*) ProcessInfo::instance().vvarEnd();
264
265 mtcpHdr->post_restart = &ThreadList::postRestart;
266 memcpy(&mtcpHdr->motherofall_tls_info,
267 &motherofall->tlsInfo,
268 sizeof(motherofall->tlsInfo));
269 mtcpHdr->tls_pid_offset = TLSInfo_GetPidOffset();
270 mtcpHdr->tls_tid_offset = TLSInfo_GetTidOffset();
271 mtcpHdr->myinfo_gs = myinfo_gs;
272 }
273
274 /*************************************************************************
275 *
276 * This executes as a thread. It sleeps for the checkpoint interval
277 * seconds, then wakes to write the checkpoint file.
278 *
279 *************************************************************************/
280 static void *checkpointhread (void *dummy)
281 {
282 /* This is the start function of the checkpoint thread.
283 * We also call sigsetjmp/getcontext to get a snapshot of this call frame,
284 * since we will never exit this call frame. We always return
285 * to this call frame at time of startup, on restart. Hence, restart
286 * will forget any modifications to our local variables since restart.
287 */
288
289 ckptThread = curThread;
290 ckptThread->state = ST_CKPNTHREAD;
291
292 /* For checkpoint thread, we want to block delivery of all but some special
293 * signals
294 */
295 {
296 /*
297 * For the checkpoint thread, we should not block SIGSETXID which is used
298 * by the setsid family of system calls to change the session leader. Glibc
299 * uses this signal to notify the process threads of the change in session
300 * leader information. This signal is not documented and is used internally
301 * by glibc. It is defined in <glibc-src-root>/nptl/pthreadP.h
302 * screen was getting affected by this since it used setsid to change the
303 * session leaders.
304 * Similarly, SIGCANCEL/SIGTIMER is undocumented, but used by glibc.
305 */
306 #define SIGSETXID (__SIGRTMIN + 1)
307 #define SIGCANCEL (__SIGRTMIN) /* aka SIGTIMER */
308 sigset_t set;
309
310 sigfillset(&set);
311 sigdelset(&set, SIGSETXID);
312 sigdelset(&set, SIGCANCEL);
313
314 JASSERT(pthread_sigmask(SIG_SETMASK, &set, NULL) == 0);
315 }
316
317 Thread_SaveSigState(ckptThread);
318 TLSInfo_SaveTLSState(&ckptThread->tlsInfo);
319 /* Release user thread after we've initialized. */
320 sem_post(&sem_start);
321
322 /* Set up our restart point. I.e., we get jumped to here after a restore. */
323 #ifdef SETJMP
324 JASSERT(sigsetjmp(ckptThread->jmpbuf, 1) >= 0) (JASSERT_ERRNO);
325 #else
326 JASSERT(getcontext(&ckptThread->savctx) == 0) (JASSERT_ERRNO);
327 #endif
328 save_sp(&ckptThread->saved_sp);
329 JTRACE("after sigsetjmp/getcontext")
330 (curThread->tid) (curThread->virtual_tid) (curThread->saved_sp);
331
332 if (originalstartup) {
333 originalstartup = 0;
334 } else {
335 /* We are being restored. Wait for all other threads to finish being
336 * restored before resuming checkpointing.
337 */
338 JTRACE("waiting for other threads after restore");
339 ThreadList::waitForAllRestored(ckptThread);
340 JTRACE("resuming after restore");
341 }
342
343 /* This is a sleep-checkpoint-resume loop by the checkpoint thread.
344 * On restart, we arrive back at getcontext, above, and then re-enter the loop.
345 */
346 while (1) {
347 /* Wait a while between writing checkpoint files */
348 JTRACE("before callbackSleepBetweenCheckpoint(0)");
349 callbackSleepBetweenCheckpoint(0);
350
351 restoreInProgress = 0;
352
353 // We need to reinitialize the lock.
354 pthread_rwlock_t rwLock = PTHREAD_RWLOCK_INITIALIZER;
355 threadResumeLock = &rwLock;
356 JASSERT(_real_pthread_rwlock_wrlock(threadResumeLock) == 0) (JASSERT_ERRNO);
357
358 suspendThreads();
359 SigInfo::saveSigHandlers();
360 /* Do this once, same for all threads. But restore for each thread. */
361 if (TLSInfo_HaveThreadSysinfoOffset())
362 saved_sysinfo = TLSInfo_GetThreadSysinfo();
363
364 /* All other threads halted in 'stopthisthread' routine (they are all
365 * in state ST_SUSPENDED). It's safe to write checkpoint file now.
366 */
367
368 // Update generation, in case user callback calls dmtcp_get_generation().
369 uint32_t computation_generation =
370 SharedData::getCompId()._computation_generation;
371 ProcessInfo::instance().set_generation(computation_generation);
372
373 JTRACE("before callbackSleepBetweenCheckpoint(0)");
374 callbackPreCheckpoint();
375
376 // Remove stale threads from activeThreads list.
377 ThreadList::emptyFreeList();
378
379 MtcpHeader mtcpHdr;
380 prepareMtcpHeader(&mtcpHdr);
381 /* That's it, folks. We just did the checkpoint. After this, we will meet
382 * on the flip side of checkpoint.
383 */
384 CkptSerializer::writeCkptImage(&mtcpHdr, sizeof(mtcpHdr));
385
386 /* NOTE: This code is only for the checkpoint thread. If you're looking for
387 * what the user threads do at checkpoint time, see: stopthisthread()
388 *
389 * There are two ways for the checkpoint thread to return from a checkpoint:
390 * resume and restart
391 * If we're here, we just resume'd after checkpoint. It's the same process.
392 * If we chose checkpoint, 'bin/mtcp_restart' created a new process. The
393 * source code is in 'src/mtcp'. The program 'bin/mtcp_restart' will map
394 * our memory into the new process, and then meet us back here by calling
395 * the function specified by 'mtcpHdr->post_restart':
396 * ThreadList::postRestart().
397 * Actually, postRestart() will start the user threads and then call
398 * restarthread() for the 'motherofall' thread. Then, restarthread()
399 * will call setcontext(), in order to arrive back at getcontext() here
400 * in this function, just before the 'while(1)' loop.
401 * FIXME: The 'motherofall' thread is the primary thread of the process.
402 * On launch, 'motherofall' was the user thread exeicuting main().
403 * and the checkpoint thread was the second thread. But now,
404 * motherofall will be the checkpoint thread. Why do we switch at the
405 * time of restart? Should we fix this?
406 */
407 JTRACE("before callbackPostCheckpoint(0, NULL)");
408 callbackPostCheckpoint(0, NULL);
409
410 /* Resume all threads. */
411 JTRACE("resuming everything");
412 JASSERT(_real_pthread_rwlock_unlock(threadResumeLock) == 0) (JASSERT_ERRNO);
413 JTRACE("everything resumed");
414 }
415 return NULL;
416 }
417
418 static void suspendThreads()
419 {
420 int needrescan;
421 Thread *thread;
422 Thread *next;
423
424 /* Halt all other threads - force them to call stopthisthread
425 * If any have blocked checkpointing, wait for them to unblock before
426 * signalling
427 */
428 lock_threads();
429 do {
430 needrescan = 0;
431 numUserThreads = 0;
432 for (thread = activeThreads; thread != NULL; thread = next) {
433 next = thread->next;
434 int ret;
435 /* Do various things based on thread's state */
436 switch (thread->state) {
437
438 case ST_RUNNING:
439 /* Thread is running. Send it a signal so it will call stopthisthread.
440 * We will need to rescan (hopefully it will be suspended by then)
441 */
442 if (Thread_UpdateState(thread, ST_SIGNALED, ST_RUNNING)) {
443 if (THREAD_TGKILL(motherpid, thread->tid, SigInfo::ckptSignal()) < 0) {
444 JASSERT(errno == ESRCH) (JASSERT_ERRNO) (thread->tid)
445 .Text("error signalling thread");
446 ThreadList::threadIsDead(thread);
447 } else {
448 needrescan = 1;
449 }
450 }
451 break;
452
453 case ST_ZOMBIE:
454 ret = THREAD_TGKILL(motherpid, thread->tid, 0);
455 JASSERT(ret == 0 || errno == ESRCH);
456 if (ret == -1 && errno == ESRCH) {
457 ThreadList::threadIsDead(thread);
458 }
459 break;
460
461 case ST_SIGNALED:
462 if (THREAD_TGKILL(motherpid, thread->tid, 0) == -1 && errno == ESRCH) {
463 ThreadList::threadIsDead(thread);
464 } else {
465 needrescan = 1;
466 }
467 break;
468
469 case ST_SUSPINPROG:
470 numUserThreads++;
471 break;
472
473 case ST_SUSPENDED:
474 numUserThreads++;
475 break;
476
477 case ST_CKPNTHREAD:
478 break;
479
480 default:
481 JASSERT(false);
482 }
483 }
484 if (needrescan) usleep(10);
485 } while (needrescan);
486 unlk_threads();
487
488 for (int i = 0; i < numUserThreads; i++) {
489 sem_wait(&semNotifyCkptThread);
490 }
491
492 JASSERT(activeThreads != NULL);
493 JTRACE("everything suspended") (numUserThreads);
494 }
495
496 /*************************************************************************
497 *
498 * Signal handler for user threads.
499 *
500 *************************************************************************/
501 void stopthisthread (int signum)
502 {
503 // If this is checkpoint thread, exit immediately
504 if (curThread == ckptThread) return;
505
506 /* Possible state change scenarios:
507 * 1. STOPSIGNAL received from ckpt-thread. In this case, the ckpt-thread
508 * already changed the state to ST_SIGNALED. No need to check for locks.
509 * Proceed normally.
510 *
511 * 2. STOPSIGNAL received from Superior thread. In this case we change the
512 * state to ST_SIGNALED, if currently in ST_RUNNING. If we are holding
513 * any locks (callback_holds_any_locks), we return from the signal handler.
514 *
515 * 3. STOPSIGNAL raised by this thread itself, after releasing all the locks.
516 * In this case, we had already changed the state to ST_SIGNALED as a
517 * result of step (2), so the ckpt-thread will never send us a signal.
518 *
519 * 4. STOPSIGNAL received from Superior thread. Ckpt-threads sends a signal
520 * before we had a chance to change state from ST_RUNNING ->
521 * ST_SIGNALED. This puts the STOPSIGNAL in the queue. The ckpt-thread will
522 * later call sigaction(STOPSIGNAL, SIG_IGN) followed by
523 * sigaction(STOPSIGNAL, stopthisthread) to discard all pending signals.
524 */
525 if (Thread_UpdateState(curThread, ST_SIGNALED, ST_RUNNING)) {
526 int retval;
527 callbackHoldsAnyLocks(&retval);
528 if (retval) return;
529 }
530
531 // make sure we don't get called twice for same thread
532 if (Thread_UpdateState(curThread, ST_SUSPINPROG, ST_SIGNALED)) {
533 Thread_SaveSigState(curThread); // save sig state (and block sig delivery)
534 TLSInfo_SaveTLSState(&curThread->tlsInfo); // save thread local storage state
535
536 /* Set up our restart point, ie, we get jumped to here after a restore */
537 #ifdef SETJMP
538 JASSERT(sigsetjmp(curThread->jmpbuf, 1) >= 0);
539 #else
540 JASSERT(getcontext(&curThread->savctx) == 0);
541 #endif
542 save_sp(&curThread->saved_sp);
543
544 JTRACE("Thread after sigsetjmp/getcontext")
545 (curThread->tid) (curThread->virtual_tid)
546 (curThread->saved_sp) (__builtin_return_address(0));
547
548 if (!restoreInProgress) {
549 /* We are a user thread and all context is saved.
550 * Wait for ckpt thread to write ckpt, and resume.
551 */
552
553 /* This sets a static variable in dmtcp. It must be passed
554 * from this user thread to ckpt thread before writing ckpt image
555 */
556 if (dmtcp_ptrace_enabled == NULL) {
557 callbackPreSuspendUserThread();
558 }
559
560 /* Tell the checkpoint thread that we're all saved away */
561 JASSERT(Thread_UpdateState(curThread, ST_SUSPENDED, ST_SUSPINPROG));
562 sem_post(&semNotifyCkptThread);
563
564 /* This sets a static variable in dmtcp. It must be passed
565 * from this user thread to ckpt thread before writing ckpt image
566 */
567 if (dmtcp_ptrace_enabled != NULL && dmtcp_ptrace_enabled()) {
568 callbackPreSuspendUserThread();
569 }
570
571 /* Then wait for the ckpt thread to write the ckpt file then wake us up */
572 JTRACE("User thread suspended") (curThread->tid);
573
574 // We can't use sem_wait here because sem_wait registers a cleanup
575 // handler before going into blocking wait. The handler is popped before
576 // returning from it. However, on restart, the thread will do a longjump
577 // and thus will never come out of the sem_wait, thus the handler is
578 // never popped. This causes a problem later on during pthread_exit. The
579 // pthread_exit routine executes all registered cleanup handlers.
580 // However, the sem_wait cleanup handler is now invalid and thus we get a
581 // segfault.
582 // The change in sem_wait behavior was first introduce in glibc 2.21.
583 JASSERT(_real_pthread_rwlock_rdlock(threadResumeLock) == 0)
584 (JASSERT_ERRNO);
585 JASSERT(_real_pthread_rwlock_unlock(threadResumeLock) == 0)
586 (JASSERT_ERRNO);
587
588 JTRACE("User thread resuming") (curThread->tid);
589 } else {
590 /* Else restoreinprog >= 1; This stuff executes to do a restart */
591 ThreadList::waitForAllRestored(curThread);
592 JTRACE("User thread restored") (curThread->tid);
593 }
594
595 JASSERT(Thread_UpdateState(curThread, ST_RUNNING, ST_SUSPENDED));
596
597
598 callbackPreResumeUserThread(restoreInProgress);
599 JTRACE("User thread returning to user code")
600 (curThread->tid) (__builtin_return_address(0));
601 }
602 }
603
604 /*****************************************************************************
605 *
606 * Wait for all threads to finish restoring their context, then release them
607 * all to continue on their way.
608 *
609 *****************************************************************************/
610 void ThreadList::waitForAllRestored(Thread *thread)
611 {
612 if (thread == ckptThread) {
613 int i;
614 for (i = 0; i < numUserThreads; i++) {
615 sem_wait(&semNotifyCkptThread);
616 }
617
618 JTRACE("before callback_post_ckpt(1=restarting)");
619 callbackPostCheckpoint(1, NULL); //mtcp_restoreargv_start_addr);
620 JTRACE("after callback_post_ckpt(1=restarting)");
621
622 SigInfo::restoreSigHandlers();
623
624 /* raise the signals which were pending for the entire process at the time
625 * of checkpoint. It is assumed that if a signal is pending for all threads
626 * including the ckpt-thread, then it was sent to the process as opposed to
627 * sent to individual threads.
628 */
629 for (i = SIGRTMAX; i > 0; --i) {
630 if (sigismember(&sigpending_global, i) == 1) {
631 kill(getpid(), i);
632 }
633 }
634
635 // if this was last of all, wake everyone up
636 for (i = 0; i < numUserThreads; i++) {
637 sem_post(&semWaitForCkptThreadSignal);
638 }
639 } else {
640 sem_post(&semNotifyCkptThread);
641 sem_wait(&semWaitForCkptThreadSignal);
642 Thread_RestoreSigState(thread);
643 }
644 }
645
646 /*****************************************************************************
647 *
648 *****************************************************************************/
649 void ThreadList::postRestart(void)
650 {
651 Thread *thread;
652 sigset_t tmp;
653
654 /* If DMTCP_RESTART_PAUSE set, sleep 15 seconds and allow gdb attach. */
655 if (getenv("MTCP_RESTART_PAUSE") || getenv("DMTCP_RESTART_PAUSE")) {
656 #ifdef HAS_PR_SET_PTRACER
657 prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0); // Allow 'gdb attach'
658 #endif
659 struct timespec delay = {15, 0}; /* 15 seconds */
660 printf("Pausing 15 seconds. Do: gdb <PROGNAME> %ld\n",
661 (long)THREAD_REAL_TID());
662 nanosleep(&delay, NULL);
663 #ifdef HAS_PR_SET_PTRACER
664 prctl(PR_SET_PTRACER, 0, 0, 0, 0); ; // Revert permission to default.
665 #endif
666 }
667
668 /* Fill in the new mother process id */
669 motherpid = THREAD_REAL_TID();
670 motherofall->tid = motherpid;
671
672 restoreInProgress = 1;
673
674 sigfillset(&tmp);
675 for (thread = activeThreads; thread != NULL; thread = thread->next) {
676 struct MtcpRestartThreadArg mtcpRestartThreadArg;
677 sigandset(&sigpending_global, &tmp, &(thread->sigpending));
678 tmp = sigpending_global;
679
680 if (thread == motherofall) continue;
681
682 /* DMTCP needs to know virtual_tid of the thread being recreated by the
683 * following clone() call.
684 *
685 * Threads are created by using syscall which is intercepted by DMTCP and
686 * the virtual_tid is sent to DMTCP as a field of MtcpRestartThreadArg
687 * structure. DMTCP will automatically extract the actual argument
688 * (clonearg->arg) from clone_arg and will pass it on to the real
689 * clone call.
690 */
691 void *clonearg = thread;
692 if (dmtcp_real_to_virtual_pid != NULL) {
693 mtcpRestartThreadArg.arg = thread;
694 mtcpRestartThreadArg.virtualTid = thread->virtual_tid;
695 clonearg = &mtcpRestartThreadArg;
696 }
697
698 /* Create the thread so it can finish restoring itself. */
699 pid_t tid = _real_clone(restarthread,
700 // -128 for red zone
701 (void*)((char*)thread->saved_sp - 128),
702 /* Don't do CLONE_SETTLS (it'll puke). We do it
703 * later via restoreTLSState. */
704 thread->flags & ~CLONE_SETTLS,
705 clonearg, thread->ptid, NULL, thread->ctid);
706
707 JASSERT (tid > 0); // (JASSERT_ERRNO) .Text("Error recreating thread");
708 JTRACE("Thread recreated") (thread->tid) (tid);
709 }
710 restarthread (motherofall);
711 }
712
713 /*****************************************************************************
714 *
715 *****************************************************************************/
716 static int restarthread (void *threadv)
717 {
718 Thread *thread = (Thread*) threadv;
719 thread->tid = THREAD_REAL_TID();
720 // This function and related ones are defined in src/mtcp/restore_libc.c
721 TLSInfo_RestoreTLSState(&thread->tlsInfo);
722
723 if (TLSInfo_HaveThreadSysinfoOffset())
724 TLSInfo_SetThreadSysinfo(saved_sysinfo);
725
726 /* Jump to the stopthisthread routine just after sigsetjmp/getcontext call.
727 * Note that if this is the restored checkpointhread, it jumps to the
728 * checkpointhread routine
729 */
730 JTRACE("calling siglongjmp/setcontext") (thread->tid) (thread->virtual_tid);
731 #ifdef SETJMP
732 siglongjmp(thread->jmpbuf, 1); /* Shouldn't return */
733 #else
734 setcontext(&thread->savctx); /* Shouldn't return */
735 #endif
736 JASSERT(false);
737 return (0); /* NOTREACHED : stop compiler warning */
738 }
739
740 /*****************************************************************************
741 *
742 *****************************************************************************/
743 int Thread_UpdateState(Thread *th, ThreadState newval, ThreadState oldval)
744 {
745 int res = 0;
746 JASSERT(_real_pthread_mutex_lock(&threadStateLock) == 0);
747 if (oldval == th->state) {;
748 th->state = newval;
749 res = 1;
750 }
751 JASSERT(_real_pthread_mutex_unlock(&threadStateLock) == 0);
752 return res;
753 }
754
755 /*****************************************************************************
756 *
757 * Save signal mask and list of pending signals delivery
758 *
759 *****************************************************************************/
760 void Thread_SaveSigState(Thread *th)
761 {
762 // Save signal block mask
763 JASSERT(pthread_sigmask (SIG_SETMASK, NULL, &th->sigblockmask) == 0);
764
765 // Save pending signals
766 sigpending(&th->sigpending);
767 }
768
769 /*****************************************************************************
770 *
771 * Restore signal mask and all pending signals
772 *
773 *****************************************************************************/
774 void Thread_RestoreSigState (Thread *th)
775 {
776 int i;
777 JTRACE("restoring signal mask for thread") (th->virtual_tid);
778 JASSERT(pthread_sigmask (SIG_SETMASK, &th->sigblockmask, NULL) == 0);
779
780 // Raise the signals which were pending for only this thread at the time of
781 // checkpoint.
782 for (i = SIGRTMAX; i > 0; --i) {
783 if (sigismember(&th->sigpending, i) == 1 &&
784 sigismember(&th->sigblockmask, i) == 1 &&
785 sigismember(&sigpending_global, i) == 0 &&
786 i != dmtcp_get_ckpt_signal()) {
787 if (i != SIGCHLD) {
788 JNOTE("\n*** WARNING: SIGCHLD was delivered prior to ckpt.\n"
789 "*** Will raise it on restart. If not desired, change\n"
790 "*** this line raising SIGCHLD.");
791 }
792 raise(i);
793 }
794 }
795 }
796
797
798 /*****************************************************************************
799 *
800 * If there is a thread descriptor with the same tid, it must be from a dead
801 * thread. Remove it now.
802 *
803 *****************************************************************************/
804 void ThreadList::addToActiveList()
805 {
806 int tid;
807 Thread *thread;
808 Thread *next_thread;
809
810 lock_threads();
811
812 tid = curThread->tid;
813 JASSERT (tid != 0);
814
815 // First remove duplicate descriptors.
816 for (thread = activeThreads; thread != NULL; thread = next_thread) {
817 next_thread = thread->next;
818 if (thread != curThread && thread->tid == tid) {
819 JTRACE("Removing duplicate thread descriptor")
820 (thread->tid) (thread->virtual_tid);
821 // There will be at most one duplicate descriptor.
822 threadIsDead(thread);
823 continue;
824 }
825 /* NOTE: ST_ZOMBIE is used only for the sake of efficiency. We
826 * test threads in state ST_ZOMBIE using tgkill to remove them
827 * early (before reaching a checkpoint) so that the
828 * threadrdescriptor list does not grow too long.
829 */
830 if (thread->state == ST_ZOMBIE) {
831 /* if no thread with this tid, then we can remove zombie descriptor */
832 if (-1 == THREAD_TGKILL(motherpid, thread->tid, 0)) {
833 JTRACE("Killing zombie thread") (thread->tid);
834 threadIsDead(thread);
835 }
836 }
837 }
838
839 curThread->next = activeThreads;
840 curThread->prev = NULL;
841 if (activeThreads != NULL) {
842 activeThreads->prev = curThread;
843 }
844 activeThreads = curThread;
845
846 unlk_threads();
847 return;
848 }
849
850 /*****************************************************************************
851 *
852 * Thread has exited - move it from activeThreads list to freelist.
853 *
854 * threadisdead() used to free() the Thread struct before returning. However,
855 * if we do that while in the middle of a checkpoint, the call to free() might
856 * deadlock in JAllocator. For this reason, we put the to-be-removed threads
857 * on this threads_freelist and call free() only when it is safe to do so.
858 *
859 * This has an added benefit of reduced number of calls to malloc() as the
860 * Thread structs in the freelist can be recycled.
861 *
862 *****************************************************************************/
863 void ThreadList::threadIsDead (Thread *thread)
864 {
865 JASSERT(thread != NULL);
866 JTRACE("Putting thread on freelist") (thread->tid);
867
868 /* Remove thread block from 'threads' list */
869 if (thread->prev != NULL) {
870 thread->prev->next = thread->next;
871 }
872 if (thread->next != NULL) {
873 thread->next->prev = thread->prev;
874 }
875 if (thread == activeThreads) {
876 activeThreads = activeThreads->next;
877 }
878
879 thread->next = threads_freelist;
880 threads_freelist = thread;
881 }
882
883 /*****************************************************************************
884 *
885 * Return thread from freelist.
886 *
887 *****************************************************************************/
888 Thread *ThreadList::getNewThread()
889 {
890 Thread *thread;
891
892 lock_threads();
893 if (threads_freelist == NULL) {
894 thread = (Thread*) JALLOC_HELPER_MALLOC(sizeof(Thread));
895 JASSERT(thread != NULL);
896 } else {
897 thread = threads_freelist;
898 threads_freelist = threads_freelist->next;
899 }
900 unlk_threads();
901 memset(thread, 0, sizeof (*thread));
902 return thread;
903 }
904
905 /*****************************************************************************
906 *
907 * Call free() on all threads_freelist items
908 *
909 *****************************************************************************/
910 void ThreadList::emptyFreeList()
911 {
912 lock_threads();
913
914 while (threads_freelist != NULL) {
915 Thread *thread = threads_freelist;
916 threads_freelist = threads_freelist->next;
917 JALLOC_HELPER_FREE(thread);
918 }
919
920 unlk_threads();
921 }
922