root/processinfo.cpp
/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- _do_lock_tbl
- _do_unlock_tbl
- dmtcp_ProcessInfo_EventHook
- instance
- growStack
- init
- updateCkptDirFileSubdir
- postExec
- resetOnFork
- restoreHeap
- restart
- restoreProcessGroupInfo
- insertChild
- eraseChild
- isChild
- beginPthreadJoin
- clearPthreadJoinState
- endPthreadJoin
- setCkptFilename
- setCkptDir
- refresh
- refreshChildTable
- serialize
1 /****************************************************************************
2 * Copyright (C) 2006-2013 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3 * jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu *
4 * *
5 * This file is part of DMTCP. *
6 * *
7 * DMTCP is free software: you can redistribute it and/or *
8 * modify it under the terms of the GNU Lesser General Public License as *
9 * published by the Free Software Foundation, either version 3 of the *
10 * License, or (at your option) any later version. *
11 * *
12 * DMTCP is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with DMTCP:dmtcp/src. If not, see *
19 * <http://www.gnu.org/licenses/>. *
20 ****************************************************************************/
21
22 #include <fenv.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <sys/syscall.h>
26 #include <sys/time.h>
27 #include <sys/resource.h>
28 #include "util.h"
29 #include "syscallwrappers.h"
30 #include "uniquepid.h"
31 #include "processinfo.h"
32 #include "procselfmaps.h"
33 #include "coordinatorapi.h"
34 #include "../jalib/jconvert.h"
35 #include "../jalib/jfilesystem.h"
36
37 using namespace dmtcp;
38
39 static pthread_mutex_t tblLock = PTHREAD_MUTEX_INITIALIZER;
40
41 static int roundingMode;
42
43 static void _do_lock_tbl()
44 {
45 JASSERT(_real_pthread_mutex_lock(&tblLock) == 0) (JASSERT_ERRNO);
46 }
47
48 static void _do_unlock_tbl()
49 {
50 JASSERT(_real_pthread_mutex_unlock(&tblLock) == 0) (JASSERT_ERRNO);
51 }
52
53 void dmtcp_ProcessInfo_EventHook(DmtcpEvent_t event, DmtcpEventData_t *data)
54 {
55 switch (event) {
56 case DMTCP_EVENT_INIT:
57 ProcessInfo::instance().init();
58 break;
59
60 case DMTCP_EVENT_PRE_EXEC:
61 {
62 jalib::JBinarySerializeWriterRaw wr("", data->serializerInfo.fd);
63 ProcessInfo::instance().refresh();
64 ProcessInfo::instance().serialize(wr);
65 }
66 break;
67
68 case DMTCP_EVENT_POST_EXEC:
69 {
70 jalib::JBinarySerializeReaderRaw rd("", data->serializerInfo.fd);
71 ProcessInfo::instance().serialize(rd);
72 ProcessInfo::instance().postExec();
73 }
74 break;
75
76 case DMTCP_EVENT_DRAIN:
77 ProcessInfo::instance().refresh();
78 break;
79
80 case DMTCP_EVENT_RESTART:
81 fesetround(roundingMode);
82 ProcessInfo::instance().restart();
83 break;
84
85 case DMTCP_EVENT_REFILL:
86 if (data->refillInfo.isRestart) {
87 ProcessInfo::instance().restoreProcessGroupInfo();
88 }
89 break;
90
91 case DMTCP_EVENT_THREADS_SUSPEND:
92 roundingMode = fegetround();
93 break;
94
95 case DMTCP_EVENT_THREADS_RESUME:
96 if (data->refillInfo.isRestart) {
97 _real_close(PROTECTED_ENVIRON_FD);
98 }
99 break;
100
101 default:
102 break;
103 }
104 }
105
106 ProcessInfo::ProcessInfo()
107 {
108 char buf[PATH_MAX];
109 _do_lock_tbl();
110 _pid = -1;
111 _ppid = -1;
112 _gid = -1;
113 _sid = -1;
114 _isRootOfProcessTree = false;
115 _noCoordinator = false;
116 _generation = 0;
117 // _generation, above, is per-process.
118 // This constrasts with DmtcpUniqueProcessId:_computation_generation, which is
119 // shared among all process on a node; used in variable sharedDataHeader.
120 // _generation is updated when _this_ process begins its checkpoint.
121 _childTable.clear();
122 _pthreadJoinId.clear();
123 _procSelfExe = jalib::Filesystem::ResolveSymlink("/proc/self/exe");
124 _uppid = UniquePid();
125 JASSERT(getcwd(buf, sizeof buf) != NULL);
126 _launchCWD = buf;
127 #ifdef CONFIG_M32
128 _elfType = Elf_32;
129 #else
130 _elfType = Elf_64;
131 #endif
132 _restoreBufLen = RESTORE_TOTAL_SIZE;
133 _restoreBufAddr = 0;
134 _do_unlock_tbl();
135 }
136
137 static ProcessInfo *pInfo = NULL;
138 ProcessInfo& ProcessInfo::instance()
139 {
140 if (pInfo == NULL) {
141 pInfo = new ProcessInfo();
142 }
143 return *pInfo;
144 }
145
146 void ProcessInfo::growStack()
147 {
148 /* Grow the stack to the stack limit */
149 struct rlimit rlim;
150 size_t stackSize;
151 const rlim_t eightMB = 8 * MB;
152 JASSERT(getrlimit(RLIMIT_STACK, &rlim) == 0) (JASSERT_ERRNO);
153 if (rlim.rlim_cur == RLIM_INFINITY) {
154 if (rlim.rlim_max == RLIM_INFINITY) {
155 stackSize = 8 * 1024 * 1024;
156 } else {
157 stackSize = MIN(rlim.rlim_max, eightMB);
158 }
159 } else {
160 stackSize = rlim.rlim_cur;
161 }
162
163 // Find the current stack area, heap, stack, vDSO and vvar areas.
164 ProcMapsArea area;
165 ProcMapsArea stackArea = {0};
166 size_t allocSize;
167 void *tmpbuf;
168 ProcSelfMaps procSelfMaps;
169 while (procSelfMaps.getNextArea(&area)) {
170 if (strcmp(area.name, "[heap]") == 0) {
171 // Record start of heap which will later be used to restore heap
172 _savedHeapStart = (unsigned long) area.addr;
173 } else if (strcmp(area.name, "[vdso]") == 0) {
174 _vdsoStart = (unsigned long) area.addr;
175 _vdsoEnd = (unsigned long) area.endAddr;
176 } else if (strcmp(area.name, "[vvar]") == 0) {
177 _vvarStart = (unsigned long) area.addr;
178 _vvarEnd = (unsigned long) area.endAddr;
179 } else if ((VA) &area >= area.addr && (VA) &area < area.endAddr) {
180 JTRACE("Original stack area") ((void*)area.addr) (area.size);
181 stackArea = area;
182 /*
183 * When using Matlab with dmtcp_launch, sometimes the bottom most
184 * page of stack (the page with highest address) which contains the
185 * environment strings and the argv[] was not shown in /proc/self/maps.
186 * This is arguably a bug in the Linux kernel as of version 2.6.32, etc.
187 * This happens on some odd combination of environment passed on to
188 * Matlab process. As a result, the page was not checkpointed and hence
189 * the process segfaulted on restart. The fix is to try to mprotect this
190 * page with RWX permission to make the page visible again. This call
191 * will fail if no stack page was invisible to begin with.
192 */
193 // FIXME : If the area following the stack is not empty, don't
194 // exercise this path.
195 int ret = mprotect(area.addr + area.size, 0x1000,
196 PROT_READ | PROT_WRITE | PROT_EXEC);
197 if (ret == 0) {
198 JNOTE("bottom-most page of stack (page with highest address) was\n"
199 " invisible in /proc/self/maps. It is made visible again now.");
200 }
201 }
202 }
203 JASSERT(stackArea.addr != NULL);
204
205 // Grow the stack
206 {
207 allocSize = stackSize - stackArea.size - 4095;
208 tmpbuf = alloca(allocSize);
209 JASSERT(tmpbuf != NULL) (JASSERT_ERRNO);
210 memset(tmpbuf, 0, allocSize);
211 }
212
213 #ifdef DEBUG
214 {
215 ProcSelfMaps maps;
216 while (maps.getNextArea(&area)) {
217 if ((VA)&area >= area.addr && (VA)&area < area.endAddr) { // Stack found
218 JTRACE("New stack size") ((void*)area.addr) (area.size);
219 break;
220 }
221 }
222 }
223 #endif
224 }
225
226 void ProcessInfo::init()
227 {
228 if (_pid == -1) {
229 // This is a brand new process.
230 _pid = getpid();
231 _ppid = getppid();
232 _isRootOfProcessTree = true;
233 _uppid = UniquePid();
234 _procSelfExe = jalib::Filesystem::ResolveSymlink("/proc/self/exe");
235 }
236
237 #ifdef CONFIG_M32
238 _elfType = Elf_32;
239 #else
240 _elfType = Elf_64;
241 #endif
242
243 _vdsoStart = _vdsoEnd = _vvarStart = _vvarEnd = 0;
244
245 growStack();
246
247 // Reserve space for restoreBuf
248 _restoreBufLen = RESTORE_TOTAL_SIZE;
249 // Allocate two extra pages -- one at the start, one at the end -- to work as
250 // guard pages for the restore area.
251 void *addr = mmap(NULL, _restoreBufLen + (2 * 4096), PROT_READ,
252 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
253 JASSERT(addr != MAP_FAILED) (JASSERT_ERRNO);
254 _restoreBufAddr = (uint64_t) addr + 4096;
255 JASSERT(mprotect((void*)_restoreBufAddr, _restoreBufLen, PROT_NONE) == 0)
256 ((void*)_restoreBufAddr) (_restoreBufLen) (JASSERT_ERRNO);
257
258 if (_ckptDir.empty()) {
259 updateCkptDirFileSubdir();
260 }
261 }
262
263 void ProcessInfo::updateCkptDirFileSubdir(string newCkptDir)
264 {
265 if (newCkptDir != "") {
266 _ckptDir = newCkptDir;
267 }
268
269 if (_ckptDir.empty()) {
270 const char *dir = getenv(ENV_VAR_CHECKPOINT_DIR);
271 if (dir == NULL) {
272 dir = ".";
273 }
274 _ckptDir = dir;
275 }
276
277 ostringstream o;
278 o << _ckptDir << "/"
279 << CKPT_FILE_PREFIX
280 << jalib::Filesystem::GetProgramName()
281 << '_' << UniquePid::ThisProcess();
282
283 _ckptFileName = o.str() + CKPT_FILE_SUFFIX;
284 _ckptFilesSubDir = o.str() + CKPT_FILES_SUBDIR_SUFFIX;
285 }
286
287 void ProcessInfo::postExec()
288 {
289 _procname = jalib::Filesystem::GetProgramName();
290 _upid = UniquePid::ThisProcess();
291 _uppid = UniquePid::ParentProcess();
292 updateCkptDirFileSubdir();
293 }
294
295 void ProcessInfo::resetOnFork()
296 {
297 pthread_mutex_t newlock = PTHREAD_MUTEX_INITIALIZER;
298 tblLock = newlock;
299 _ppid = _pid;
300 _pid = getpid();
301 _isRootOfProcessTree = false;
302 _childTable.clear();
303 _pthreadJoinId.clear();
304 _ckptFileName.clear();
305 _ckptFilesSubDir.clear();
306 updateCkptDirFileSubdir();
307 }
308
309 void ProcessInfo::restoreHeap()
310 {
311 /* If the original start of heap is lower than the current end of heap, we
312 * want to mmap the area between _savedBrk and current break. This
313 * happens when the size of checkpointed program is smaller then the size of
314 * mtcp_restart program.
315 */
316 uint64_t curBrk = (uint64_t) sbrk(0);
317 if (curBrk > _savedBrk) {
318 JNOTE("Area between saved_break and curr_break not mapped, mapping it now")
319 (_savedBrk) (curBrk);
320 size_t oldsize = _savedBrk - _savedHeapStart;
321 size_t newsize = curBrk - _savedHeapStart;
322
323 JASSERT(mremap((void*) _savedHeapStart, oldsize, newsize, 0) != NULL)
324 (_savedBrk) (curBrk)
325 .Text("mremap failed to map area between saved break and current break");
326 } else if (curBrk < _savedBrk) {
327 if (brk((void*)_savedBrk) != 0) {
328 JNOTE("Failed to restore area between saved_break and curr_break.")
329 (_savedBrk) (curBrk) (JASSERT_ERRNO);
330 }
331 }
332 }
333
334 void ProcessInfo::restart()
335 {
336 JASSERT(mprotect((void*)_restoreBufAddr, _restoreBufLen, PROT_NONE) == 0)
337 ((void*)_restoreBufAddr) (_restoreBufLen) (JASSERT_ERRNO);
338
339 restoreHeap();
340
341 // Update the ckptDir
342 string ckptDir = jalib::Filesystem::GetDeviceName(PROTECTED_CKPT_DIR_FD);
343 JASSERT(ckptDir.length() > 0);
344 _real_close(PROTECTED_CKPT_DIR_FD);
345 updateCkptDirFileSubdir(ckptDir);
346
347 if (_launchCWD != _ckptCWD) {
348 string rpath = "";
349 size_t llen = _launchCWD.length();
350 if (Util::strStartsWith(_ckptCWD.c_str(), _launchCWD.c_str()) &&
351 _ckptCWD[llen] == '/') {
352 // _launchCWD = "/A/B"; _ckptCWD = "/A/B/C" -> rpath = "./c"
353 rpath = "./" + _ckptCWD.substr(llen + 1);
354 if (chdir(rpath.c_str()) == 0) {
355 JTRACE("Changed cwd") (_launchCWD) (_ckptCWD) (_launchCWD + rpath);
356 } else {
357 JWARNING(chdir(_ckptCWD.c_str()) == 0) (_ckptCWD) (_launchCWD)
358 (JASSERT_ERRNO) .Text("Failed to change directory to _ckptCWD");
359 }
360 }
361 }
362 }
363
364 void ProcessInfo::restoreProcessGroupInfo()
365 {
366 // Restore group assignment
367 if (dmtcp_virtual_to_real_pid && dmtcp_virtual_to_real_pid(_gid) != _gid) {
368 pid_t cgid = getpgid(0);
369 // Group ID is known inside checkpointed processes
370 if (_gid != cgid) {
371 JTRACE("Restore Group Assignment")
372 (_gid) (_fgid) (cgid) (_pid) (_ppid) (getppid());
373 JWARNING(setpgid(0, _gid) == 0) (_gid) (JASSERT_ERRNO)
374 .Text("Cannot change group information");
375 } else {
376 JTRACE("Group is already assigned") (_gid) (cgid);
377 }
378 } else {
379 JTRACE("SKIP Group information, GID unknown");
380 }
381 }
382
383 void ProcessInfo::insertChild(pid_t pid, UniquePid uniquePid)
384 {
385 _do_lock_tbl();
386 iterator i = _childTable.find(pid);
387 JWARNING(i == _childTable.end()) (pid) (uniquePid) (i->second)
388 .Text("child pid already exists!");
389
390 _childTable[pid] = uniquePid;
391 _do_unlock_tbl();
392
393 JTRACE("Creating new virtualPid -> realPid mapping.") (pid) (uniquePid);
394 }
395
396 void ProcessInfo::eraseChild(pid_t virtualPid)
397 {
398 _do_lock_tbl();
399 iterator i = _childTable.find(virtualPid);
400 if (i != _childTable.end())
401 _childTable.erase(virtualPid);
402 _do_unlock_tbl();
403 }
404
405 bool ProcessInfo::isChild(const UniquePid& upid)
406 {
407 bool res = false;
408 _do_lock_tbl();
409 for (iterator i = _childTable.begin(); i != _childTable.end(); i++) {
410 if (i->second == upid) {
411 res = true;
412 break;
413 }
414 }
415 _do_unlock_tbl();
416 return res;
417 }
418
419 bool ProcessInfo::beginPthreadJoin(pthread_t thread)
420 {
421 bool res = false;
422 _do_lock_tbl();
423 map<pthread_t, pthread_t>::iterator i = _pthreadJoinId.find(thread);
424 if (i == _pthreadJoinId.end()) {
425 _pthreadJoinId[thread] = pthread_self();
426 res = true;
427 }
428 _do_unlock_tbl();
429 return res;
430 }
431
432 void ProcessInfo::clearPthreadJoinState(pthread_t thread)
433 {
434 _do_lock_tbl();
435 if (_pthreadJoinId.find(thread) != _pthreadJoinId.end()) {
436 _pthreadJoinId.erase(thread);
437 }
438 _do_unlock_tbl();
439 }
440
441 void ProcessInfo::endPthreadJoin(pthread_t thread)
442 {
443 _do_lock_tbl();
444 if (_pthreadJoinId.find(thread) != _pthreadJoinId.end() &&
445 pthread_equal(_pthreadJoinId[thread], pthread_self())) {
446 _pthreadJoinId.erase(thread);
447 }
448 _do_unlock_tbl();
449 }
450
451 void ProcessInfo::setCkptFilename(const char *filename)
452 {
453 JASSERT(filename != NULL);
454 if (filename[0] == '/') {
455 _ckptDir = jalib::Filesystem::DirName(filename);
456 _ckptFileName = filename;
457 } else {
458 _ckptFileName = _ckptDir + "/" + filename;
459 }
460
461 if (Util::strEndsWith(_ckptFileName, CKPT_FILE_SUFFIX)) {
462 string ckptFileBaseName =
463 _ckptFileName.substr(0, _ckptFileName.length() - CKPT_FILE_SUFFIX_LEN);
464 _ckptFilesSubDir = ckptFileBaseName +CKPT_FILES_SUBDIR_SUFFIX;
465 } else {
466 _ckptFilesSubDir = _ckptFileName + CKPT_FILES_SUBDIR_SUFFIX;
467 }
468 }
469
470
471 void ProcessInfo::setCkptDir(const char *dir)
472 {
473 JASSERT(dir != NULL);
474 _ckptDir = dir;
475 _ckptFileName = _ckptDir + "/" + jalib::Filesystem::BaseName(_ckptFileName);
476 _ckptFilesSubDir = _ckptDir + "/" + jalib::Filesystem::BaseName(_ckptFilesSubDir);
477
478 JTRACE("setting ckptdir") (_ckptDir) (_ckptFilesSubDir);
479 //JASSERT(access(_ckptDir.c_str(), X_OK|W_OK) == 0) (_ckptDir)
480 //.Text("Missing execute- or write-access to checkpoint dir.");
481 }
482
483 void ProcessInfo::refresh()
484 {
485 JASSERT(_pid == getpid()) (_pid) (getpid());
486
487 _gid = getpgid(0);
488 _sid = getsid(0);
489
490 _fgid = -1;
491 // Try to open the controlling terminal
492 int tfd = _real_open("/dev/tty", O_RDWR);
493 if (tfd != -1) {
494 _fgid = tcgetpgrp(tfd);
495 _real_close(tfd);
496 }
497
498 if (_ppid != getppid()) {
499 // Our original parent died; we are the root of the process tree now.
500 //
501 // On older systems, a process is inherited by init (pid = 1) after its
502 // parent dies. However, with the new per-user init process, the parent
503 // pid is no longer "1"; it's the pid of the user-specific init process.
504 _ppid = getppid();
505 _isRootOfProcessTree = true;
506 _uppid = UniquePid();
507 } else {
508 _uppid = UniquePid::ParentProcess();
509 }
510
511 _procname = jalib::Filesystem::GetProgramName();
512 _hostname = jalib::Filesystem::GetCurrentHostname();
513 _upid = UniquePid::ThisProcess();
514 _noCoordinator = dmtcp_no_coordinator();
515
516 char buf[PATH_MAX];
517 JASSERT(getcwd(buf, sizeof buf) != NULL);
518 _ckptCWD = buf;
519
520 _sessionIds.clear();
521 refreshChildTable();
522
523 JTRACE("CHECK GROUP PID")(_gid)(_fgid)(_ppid)(_pid);
524 }
525
526 void ProcessInfo::refreshChildTable()
527 {
528 iterator i = _childTable.begin();
529 while (i != _childTable.end()) {
530 pid_t pid = i->first;
531 iterator j = i++;
532 /* Check to see if the child process is alive*/
533 if (kill(pid, 0) == -1 && errno == ESRCH) {
534 _childTable.erase(j);
535 } else {
536 _sessionIds[pid] = getsid(pid);
537 }
538 }
539 }
540
541 void ProcessInfo::serialize(jalib::JBinarySerializer& o)
542 {
543 JSERIALIZE_ASSERT_POINT("ProcessInfo:");
544 _savedBrk = (uint64_t) sbrk(0);
545
546 o & _elfType;
547 o & _isRootOfProcessTree & _pid & _sid & _ppid & _gid & _fgid & _generation;
548 o & _procname & _hostname & _launchCWD & _ckptCWD & _upid & _uppid;
549 o & _compGroup & _numPeers & _noCoordinator & _argvSize & _envSize;
550 o & _restoreBufAddr & _savedHeapStart & _savedBrk;
551 o & _vdsoStart & _vdsoEnd & _vvarStart & _vvarEnd;
552 o & _ckptDir & _ckptFileName & _ckptFilesSubDir;
553
554 JTRACE("Serialized process information")
555 (_sid) (_ppid) (_gid) (_fgid) (_isRootOfProcessTree)
556 (_procname) (_hostname) (_launchCWD) (_ckptCWD) (_upid) (_uppid)
557 (_compGroup) (_numPeers) (_noCoordinator) (_argvSize) (_envSize) (_elfType);
558
559 JASSERT(!_noCoordinator || _numPeers == 1) (_noCoordinator) (_numPeers);
560
561 if (_isRootOfProcessTree) {
562 JTRACE("This process is Root of Process Tree");
563 }
564
565 JTRACE("Serializing ChildPid Table") (_childTable.size()) (o.filename());
566 o.serializeMap(_childTable);
567
568 JSERIALIZE_ASSERT_POINT("EOF");
569 }