root/threadwrappers.cpp
/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- clone_start
- __clone
- pthread_start
- pthread_create
- pthread_exit
- pthread_join
- pthread_tryjoin_np
- pthread_timedjoin_np
1 /****************************************************************************
2 * Copyright (C) 2006-2013 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3 * jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu *
4 * *
5 * This file is part of DMTCP. *
6 * *
7 * DMTCP is free software: you can redistribute it and/or *
8 * modify it under the terms of the GNU Lesser General Public License as *
9 * published by the Free Software Foundation, either version 3 of the *
10 * License, or (at your option) any later version. *
11 * *
12 * DMTCP is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with DMTCP:dmtcp/src. If not, see *
19 * <http://www.gnu.org/licenses/>. *
20 ****************************************************************************/
21
22 #include <sys/syscall.h>
23 #include "constants.h"
24 #include "dmtcpworker.h"
25 #include "mtcpinterface.h"
26 #include "syscallwrappers.h"
27 #include "dmtcp.h"
28 #include "uniquepid.h"
29 #include "util.h"
30 #include "../jalib/jassert.h"
31 #include "../jalib/jalloc.h"
32 #include "threadsync.h"
33 #include "processinfo.h"
34 #include "threadlist.h"
35 #include "siginfo.h"
36
37 using namespace dmtcp;
38
39 struct ThreadArg {
40 union {
41 int (*fn) (void *arg);
42 void * (*pthread_fn) (void *arg); // pthread_create calls fn -> void *
43 };
44 void *arg;
45 void *mtcpArg;
46 pid_t virtualTid;
47 };
48
49 // Invoked via __clone
50 LIB_PRIVATE
51 int clone_start(void *arg)
52 {
53 Thread *thread = (Thread*) arg;
54
55 ThreadSync::initThread();
56
57 ThreadList::updateTid(thread);
58
59 DmtcpWorker::eventHook(DMTCP_EVENT_THREAD_START, NULL);
60
61 /* Thread finished initialization. It's now safe for this thread to
62 * participate in checkpoint. Decrement the uninitializedThreadCount in
63 * DmtcpWorker.
64 */
65 ThreadSync::decrementUninitializedThreadCount();
66
67 JTRACE("Calling user function") (dmtcp_gettid());
68 int ret = thread->fn(thread->arg);
69
70 ThreadList::threadExit();
71 return ret;
72 }
73
74 /*****************************************************************************
75 *
76 * This is our clone system call wrapper
77 *
78 * Note:
79 *
80 * pthread_create eventually calls __clone to create threads
81 * It uses flags = 0x3D0F00:
82 * CLONE_VM = VM shared between processes
83 * CLONE_FS = fs info shared between processes (root, cwd, umask)
84 * CLONE_FILES = open files shared between processes (fd table)
85 * CLONE_SIGHAND = signal handlers and blocked signals shared
86 * (sigaction common to parent and child)
87 * CLONE_THREAD = add to same thread group
88 * CLONE_SYSVSEM = share system V SEM_UNDO semantics
89 * CLONE_SETTLS = create a new TLS for the child from newtls parameter
90 * CLONE_PARENT_SETTID = set the TID in the parent (before MM copy)
91 * CLONE_CHILD_CLEARTID = clear the TID in the child and do
92 * futex wake at that address
93 * CLONE_DETACHED = create clone detached
94 *
95 *****************************************************************************/
96 //need to forward user clone
97 extern "C" int __clone(int (*fn) (void *arg), void *child_stack, int flags,
98 void *arg, int *ptid,
99 struct user_desc *tls, int *ctid)
100 {
101 WRAPPER_EXECUTION_DISABLE_CKPT();
102 ThreadSync::incrementUninitializedThreadCount();
103
104 Thread *thread = ThreadList::getNewThread();
105 ThreadList::initThread(thread, fn, arg, flags, ptid, ctid);
106 // if (ckpthread == NULL) {
107 // ckptthread = thread;
108 // thread->stateInit(ST_CKPNTHREAD);
109 // }
110
111 pid_t tid = _real_clone(clone_start, child_stack, flags, thread,
112 ptid, tls, ctid);
113
114 if (tid == -1) {
115 JTRACE("Clone call failed")(JASSERT_ERRNO);
116 ThreadSync::decrementUninitializedThreadCount();
117 delete thread;
118 } else {
119 DmtcpWorker::eventHook(DMTCP_EVENT_THREAD_CREATED, NULL);
120 }
121
122 WRAPPER_EXECUTION_ENABLE_CKPT();
123 return tid;
124 }
125 #if 0
126 #if defined(__i386__) || defined(__x86_64__)
127 asm (".global clone ; .type clone,@function ; clone = __clone");
128 #elif defined(__arm__)
129 // In arm, '@' is a comment character; Arm uses '%' in type directive
130 asm (".global clone ; .type clone,%function ; clone = __clone");
131 #else
132 # error Not implemented on this architecture
133 #endif
134 #endif
135
136 // Invoked via pthread_create as start_routine
137 // On return, it calls mtcp_threadiszombie()
138 static void *pthread_start(void *arg)
139 {
140 struct ThreadArg *threadArg = (struct ThreadArg*) arg;
141 void *thread_arg = threadArg->arg;
142 void * (*pthread_fn) (void *) = threadArg->pthread_fn;
143 pid_t virtualTid = threadArg->virtualTid;
144
145 JASSERT(pthread_fn != 0x0);
146 JALLOC_HELPER_FREE(arg); // Was allocated in calling thread in pthread_create
147
148 // Unblock ckpt signal (unblocking a non-blocked signal has no effect).
149 // Normally, DMTCP wouldn't allow the ckpt signal to be blocked. However, in
150 // some situations (e.g., timer_create), libc would internally block all
151 // signals before calling pthread_create to create a helper thread. Since,
152 // the child threads inherit parent signal mask, the helper thread has all
153 // signals blocked.
154 sigset_t set;
155 sigaddset(&set, SigInfo::ckptSignal());
156 JASSERT(_real_pthread_sigmask(SIG_UNBLOCK, &set, NULL) == 0) (JASSERT_ERRNO);
157
158 ThreadSync::threadFinishedInitialization();
159 void *result = (*pthread_fn)(thread_arg);
160 JTRACE("Thread returned") (virtualTid);
161 WRAPPER_EXECUTION_DISABLE_CKPT();
162 ThreadList::threadExit();
163 /*
164 * This thread has finished its execution, do some cleanup on our part.
165 * erasing the virtualTid entry from virtualpidtable
166 * FIXME: What if the process gets checkpointed after erase() but before the
167 * thread actually exits?
168 */
169 DmtcpWorker::eventHook(DMTCP_EVENT_PTHREAD_RETURN, NULL);
170 WRAPPER_EXECUTION_ENABLE_CKPT();
171 ThreadSync::unsetOkToGrabLock();
172 return result;
173 }
174
175
176 extern "C" int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
177 void *(*start_routine)(void*), void *arg)
178 {
179 int retval;
180 // We have to use DMTCP-specific memory allocator because using glibc:malloc
181 // can interfere with user threads.
182 // We use JALLOC_HELPER_FREE to free this memory in two places:
183 // 1. near the beginning of pthread_start (wrapper for start_routine),
184 // providing that the __clone call succeeds with no tid conflict.
185 // 2. if the call to _real_pthread_create fails, then free memory
186 // near the end of this function.
187 // We use MALLOC/FREE so that pthread_create() can be called again, without
188 // waiting for the new thread to give up the buffer in pthread_start().
189 struct ThreadArg *threadArg =
190 (struct ThreadArg *) JALLOC_HELPER_MALLOC (sizeof (struct ThreadArg));
191 threadArg->pthread_fn = start_routine;
192 threadArg->arg = arg;
193
194 /* pthread_create() should acquire the thread-creation lock. Not doing so can
195 * result in a deadlock in the following scenario:
196 * 1. user thread: pthread_create() - acquire wrapper-execution lock
197 * 2. ckpt-thread: SUSPEND msg received, wait on wrlock for wrapper-exection lock
198 * 3. user thread: __clone() - try to acquire wrapper-execution lock
199 *
200 * We also need to increment the uninitialized-thread-count so that it is
201 * safe to checkpoint the newly created thread.
202 *
203 * There is another possible deadlock situation if we do not grab the
204 * thread-creation lock:
205 * 1. user thread: pthread_create(): waiting on tbl_lock inside libpthread
206 * 2. ckpt-thread: SUSPEND msg received, wait on wrlock for wrapper-exec lock
207 * 3. uset thread: a. exiting after returning from user fn.
208 * b. grabs tbl_lock()
209 * c. tries to call free() to deallocate previously allocated
210 * space (stack etc.). The free() wrapper requires
211 * wrapper-exec lock, which is not available.
212 */
213 bool threadCreationLockAcquired = ThreadSync::threadCreationLockLock();
214 ThreadSync::incrementUninitializedThreadCount();
215 retval = _real_pthread_create(thread, attr, pthread_start, threadArg);
216 if (threadCreationLockAcquired) {
217 ThreadSync::threadCreationLockUnlock();
218 }
219 if (retval == 0) {
220 ProcessInfo::instance().clearPthreadJoinState(*thread);
221 } else { // if we failed to create new pthread
222 JALLOC_HELPER_FREE(threadArg);
223 ThreadSync::decrementUninitializedThreadCount();
224 }
225 return retval;
226 }
227
228 extern "C" void pthread_exit(void * retval)
229 {
230 WRAPPER_EXECUTION_DISABLE_CKPT();
231 ThreadList::threadExit();
232 DmtcpWorker::eventHook(DMTCP_EVENT_PTHREAD_EXIT, NULL);
233 WRAPPER_EXECUTION_ENABLE_CKPT();
234 ThreadSync::unsetOkToGrabLock();
235 _real_pthread_exit(retval);
236 for (;;); // To hide compiler warning about "noreturn" function
237 }
238
239 /*
240 * pthread_join() is a blocking call that waits for the given thread to exit.
241 * It examines the value of 'tid' field in 'struct pthread' of the given
242 * thread. The kernel will write '0' to this field when the thread exits.
243 *
244 * In pthread_join(), the thread makes a futex call in the following fashion:
245 * _tid = pd->tid;
246 * while !succeeded
247 * futex(&pd->tid, FUTEX_WAIT, 0, _tid, ...)
248 * As we can see, if the checkpoint is issued during pthread_join(), on
249 * restart, the tid would have changed, but the call to futex would still used
250 * the previously cached tid. This causes the caller to spin with 100% cpu
251 * usage.
252 *
253 * The fix is to use the non blocking pthread_tryjoin_np function. To maintain
254 * the semantics of pthread_join(), we need to ensure that only one thread is
255 * allowed to wait on the given thread. This is done by keeping track of
256 * threads that are being waited on by some other thread.
257 *
258 * Similar measures are taken for pthread_timedjoin_np().
259 */
260 static struct timespec ts_100ms = {0, 100 * 1000 * 1000};
261 extern "C" int pthread_join(pthread_t thread, void **retval)
262 {
263 int ret;
264 struct timespec ts;
265 if (!ProcessInfo::instance().beginPthreadJoin(thread)) {
266 return EINVAL;
267 }
268
269 while (1) {
270 WRAPPER_EXECUTION_DISABLE_CKPT();
271 ThreadSync::unsetOkToGrabLock();
272 JASSERT(clock_gettime(CLOCK_REALTIME, &ts) != -1);
273 TIMESPEC_ADD(&ts, &ts_100ms, &ts);
274 ret = _real_pthread_timedjoin_np(thread, retval, &ts);
275 WRAPPER_EXECUTION_ENABLE_CKPT();
276 ThreadSync::setOkToGrabLock();
277 if (ret != ETIMEDOUT) {
278 break;
279 }
280 }
281
282 ProcessInfo::instance().endPthreadJoin(thread);
283 return ret;
284 }
285
286 extern "C" int pthread_tryjoin_np(pthread_t thread, void **retval)
287 {
288 int ret;
289 if (!ProcessInfo::instance().beginPthreadJoin(thread)) {
290 return EINVAL;
291 }
292
293 WRAPPER_EXECUTION_DISABLE_CKPT();
294 ret = _real_pthread_tryjoin_np(thread, retval);
295 WRAPPER_EXECUTION_ENABLE_CKPT();
296
297 ProcessInfo::instance().endPthreadJoin(thread);
298 return ret;
299 }
300
301 extern "C" int pthread_timedjoin_np(pthread_t thread, void **retval,
302 const struct timespec *abstime)
303 {
304 int ret;
305 struct timespec ts;
306 if (!ProcessInfo::instance().beginPthreadJoin(thread)) {
307 return EINVAL;
308 }
309
310 /*
311 * We continue to call pthread_tryjoin_np (and sleep) until we have gone past
312 * the abstime provided by the caller
313 */
314 while (1) {
315 WRAPPER_EXECUTION_DISABLE_CKPT();
316 JASSERT(clock_gettime(CLOCK_REALTIME, &ts) != -1);
317 if (TIMESPEC_CMP(&ts, abstime, <)) {
318 TIMESPEC_ADD(&ts, &ts_100ms, &ts);
319 ret = _real_pthread_timedjoin_np(thread, retval, &ts);
320 } else {
321 ret = ETIMEDOUT;
322 }
323 WRAPPER_EXECUTION_ENABLE_CKPT();
324
325 if (ret == EBUSY || ret == 0) {
326 break;
327 }
328 if (TIMESPEC_CMP(&ts, abstime, >=)) {
329 ret = ETIMEDOUT;
330 break;
331 }
332 }
333
334 ProcessInfo::instance().endPthreadJoin(thread);
335 return ret;
336 }