root/processinfo.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. _do_lock_tbl
  2. _do_unlock_tbl
  3. dmtcp_ProcessInfo_EventHook
  4. instance
  5. growStack
  6. init
  7. updateCkptDirFileSubdir
  8. postExec
  9. resetOnFork
  10. restoreHeap
  11. restart
  12. restoreProcessGroupInfo
  13. insertChild
  14. eraseChild
  15. isChild
  16. beginPthreadJoin
  17. clearPthreadJoinState
  18. endPthreadJoin
  19. setCkptFilename
  20. setCkptDir
  21. refresh
  22. refreshChildTable
  23. serialize

   1 /****************************************************************************
   2  *   Copyright (C) 2006-2013 by Jason Ansel, Kapil Arya, and Gene Cooperman *
   3  *   jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu              *
   4  *                                                                          *
   5  *  This file is part of DMTCP.                                             *
   6  *                                                                          *
   7  *  DMTCP is free software: you can redistribute it and/or                  *
   8  *  modify it under the terms of the GNU Lesser General Public License as   *
   9  *  published by the Free Software Foundation, either version 3 of the      *
  10  *  License, or (at your option) any later version.                         *
  11  *                                                                          *
  12  *  DMTCP is distributed in the hope that it will be useful,                *
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
  15  *  GNU Lesser General Public License for more details.                     *
  16  *                                                                          *
  17  *  You should have received a copy of the GNU Lesser General Public        *
  18  *  License along with DMTCP:dmtcp/src.  If not, see                        *
  19  *  <http://www.gnu.org/licenses/>.                                         *
  20  ****************************************************************************/
  21 
  22 #include <fenv.h>
  23 #include <fcntl.h>
  24 #include <unistd.h>
  25 #include <sys/syscall.h>
  26 #include <sys/time.h>
  27 #include <sys/resource.h>
  28 #include "util.h"
  29 #include "syscallwrappers.h"
  30 #include "uniquepid.h"
  31 #include "processinfo.h"
  32 #include "procselfmaps.h"
  33 #include "coordinatorapi.h"
  34 #include  "../jalib/jconvert.h"
  35 #include  "../jalib/jfilesystem.h"
  36 
  37 using namespace dmtcp;
  38 
  39 static pthread_mutex_t tblLock = PTHREAD_MUTEX_INITIALIZER;
  40 
  41 static int roundingMode;
  42 
  43 static void _do_lock_tbl()
  44 {
  45   JASSERT(_real_pthread_mutex_lock(&tblLock) == 0) (JASSERT_ERRNO);
  46 }
  47 
  48 static void _do_unlock_tbl()
  49 {
  50   JASSERT(_real_pthread_mutex_unlock(&tblLock) == 0) (JASSERT_ERRNO);
  51 }
  52 
  53 void dmtcp_ProcessInfo_EventHook(DmtcpEvent_t event, DmtcpEventData_t *data)
  54 {
  55   switch (event) {
  56     case DMTCP_EVENT_INIT:
  57       ProcessInfo::instance().init();
  58       break;
  59 
  60     case DMTCP_EVENT_PRE_EXEC:
  61       {
  62         jalib::JBinarySerializeWriterRaw wr("", data->serializerInfo.fd);
  63         ProcessInfo::instance().refresh();
  64         ProcessInfo::instance().serialize(wr);
  65       }
  66       break;
  67 
  68     case DMTCP_EVENT_POST_EXEC:
  69       {
  70         jalib::JBinarySerializeReaderRaw rd("", data->serializerInfo.fd);
  71         ProcessInfo::instance().serialize(rd);
  72         ProcessInfo::instance().postExec();
  73       }
  74       break;
  75 
  76     case DMTCP_EVENT_DRAIN:
  77       ProcessInfo::instance().refresh();
  78       break;
  79 
  80     case DMTCP_EVENT_RESTART:
  81       fesetround(roundingMode);
  82       ProcessInfo::instance().restart();
  83       break;
  84 
  85     case DMTCP_EVENT_REFILL:
  86       if (data->refillInfo.isRestart) {
  87         ProcessInfo::instance().restoreProcessGroupInfo();
  88       }
  89       break;
  90 
  91     case DMTCP_EVENT_THREADS_SUSPEND:
  92       roundingMode = fegetround();
  93       break;
  94 
  95     case DMTCP_EVENT_THREADS_RESUME:
  96       if (data->refillInfo.isRestart) {
  97         _real_close(PROTECTED_ENVIRON_FD);
  98       }
  99       break;
 100 
 101     default:
 102       break;
 103   }
 104 }
 105 
 106 ProcessInfo::ProcessInfo()
 107 {
 108   char buf[PATH_MAX];
 109   _do_lock_tbl();
 110   _pid = -1;
 111   _ppid = -1;
 112   _gid = -1;
 113   _sid = -1;
 114   _isRootOfProcessTree = false;
 115   _noCoordinator = false;
 116   _generation = 0;
 117     // _generation, above, is per-process.
 118     // This constrasts with DmtcpUniqueProcessId:_computation_generation, which is
 119     //   shared among all process on a node; used in variable sharedDataHeader.
 120     // _generation is updated when _this_ process begins its checkpoint.
 121   _childTable.clear();
 122   _pthreadJoinId.clear();
 123   _procSelfExe = jalib::Filesystem::ResolveSymlink("/proc/self/exe");
 124   _uppid = UniquePid();
 125   JASSERT(getcwd(buf, sizeof buf) != NULL);
 126   _launchCWD = buf;
 127 #ifdef CONFIG_M32
 128   _elfType = Elf_32;
 129 #else
 130   _elfType = Elf_64;
 131 #endif
 132   _restoreBufLen = RESTORE_TOTAL_SIZE;
 133   _restoreBufAddr = 0;
 134   _do_unlock_tbl();
 135 }
 136 
 137 static ProcessInfo *pInfo = NULL;
 138 ProcessInfo& ProcessInfo::instance()
 139 {
 140   if (pInfo == NULL) {
 141     pInfo = new ProcessInfo();
 142   }
 143   return *pInfo;
 144 }
 145 
 146 void ProcessInfo::growStack()
 147 {
 148   /* Grow the stack to the stack limit */
 149   struct rlimit rlim;
 150   size_t stackSize;
 151   const rlim_t eightMB = 8 * MB;
 152   JASSERT(getrlimit(RLIMIT_STACK, &rlim) == 0) (JASSERT_ERRNO);
 153   if (rlim.rlim_cur == RLIM_INFINITY) {
 154     if (rlim.rlim_max == RLIM_INFINITY) {
 155       stackSize = 8 * 1024 * 1024;
 156     } else {
 157       stackSize = MIN(rlim.rlim_max, eightMB);
 158     }
 159   } else {
 160     stackSize = rlim.rlim_cur;
 161   }
 162 
 163   // Find the current stack area, heap, stack, vDSO and vvar areas.
 164   ProcMapsArea area;
 165   ProcMapsArea stackArea = {0};
 166   size_t allocSize;
 167   void *tmpbuf;
 168   ProcSelfMaps procSelfMaps;
 169   while (procSelfMaps.getNextArea(&area)) {
 170     if (strcmp(area.name, "[heap]") == 0) {
 171       // Record start of heap which will later be used to restore heap
 172       _savedHeapStart = (unsigned long) area.addr;
 173     } else if (strcmp(area.name, "[vdso]") == 0) {
 174       _vdsoStart = (unsigned long) area.addr;
 175       _vdsoEnd = (unsigned long) area.endAddr;
 176     } else if (strcmp(area.name, "[vvar]") == 0) {
 177       _vvarStart = (unsigned long) area.addr;
 178       _vvarEnd = (unsigned long) area.endAddr;
 179     } else if ((VA) &area >= area.addr && (VA) &area < area.endAddr) {
 180       JTRACE("Original stack area") ((void*)area.addr) (area.size);
 181       stackArea = area;
 182       /*
 183        * When using Matlab with dmtcp_launch, sometimes the bottom most
 184        * page of stack (the page with highest address) which contains the
 185        * environment strings and the argv[] was not shown in /proc/self/maps.
 186        * This is arguably a bug in the Linux kernel as of version 2.6.32, etc.
 187        * This happens on some odd combination of environment passed on to
 188        * Matlab process. As a result, the page was not checkpointed and hence
 189        * the process segfaulted on restart. The fix is to try to mprotect this
 190        * page with RWX permission to make the page visible again. This call
 191        * will fail if no stack page was invisible to begin with.
 192        */
 193       // FIXME : If the area following the stack is not empty, don't
 194       //         exercise this path.
 195       int ret = mprotect(area.addr + area.size, 0x1000,
 196                          PROT_READ | PROT_WRITE | PROT_EXEC);
 197       if (ret == 0) {
 198         JNOTE("bottom-most page of stack (page with highest address) was\n"
 199               "  invisible in /proc/self/maps. It is made visible again now.");
 200       }
 201     }
 202   }
 203   JASSERT(stackArea.addr != NULL);
 204 
 205   // Grow the stack
 206   {
 207     allocSize = stackSize - stackArea.size - 4095;
 208     tmpbuf = alloca(allocSize);
 209     JASSERT(tmpbuf != NULL) (JASSERT_ERRNO);
 210     memset(tmpbuf, 0, allocSize);
 211   }
 212 
 213 #ifdef DEBUG
 214   {
 215     ProcSelfMaps maps;
 216     while (maps.getNextArea(&area)) {
 217       if ((VA)&area >= area.addr && (VA)&area < area.endAddr) { // Stack found
 218         JTRACE("New stack size") ((void*)area.addr) (area.size);
 219         break;
 220       }
 221     }
 222   }
 223 #endif
 224 }
 225 
 226 void ProcessInfo::init()
 227 {
 228   if (_pid == -1) {
 229     // This is a brand new process.
 230     _pid = getpid();
 231     _ppid = getppid();
 232     _isRootOfProcessTree = true;
 233     _uppid = UniquePid();
 234     _procSelfExe = jalib::Filesystem::ResolveSymlink("/proc/self/exe");
 235   }
 236 
 237 #ifdef CONFIG_M32
 238   _elfType = Elf_32;
 239 #else
 240   _elfType = Elf_64;
 241 #endif
 242 
 243   _vdsoStart = _vdsoEnd = _vvarStart = _vvarEnd = 0;
 244 
 245   growStack();
 246 
 247   // Reserve space for restoreBuf
 248   _restoreBufLen = RESTORE_TOTAL_SIZE;
 249   // Allocate two extra pages -- one at the start, one at the end -- to work as
 250   // guard pages for the restore area.
 251   void *addr =  mmap(NULL, _restoreBufLen + (2 * 4096), PROT_READ,
 252                      MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 253   JASSERT(addr != MAP_FAILED) (JASSERT_ERRNO);
 254   _restoreBufAddr = (uint64_t) addr + 4096;
 255   JASSERT(mprotect((void*)_restoreBufAddr, _restoreBufLen, PROT_NONE) == 0)
 256     ((void*)_restoreBufAddr) (_restoreBufLen) (JASSERT_ERRNO);
 257 
 258   if (_ckptDir.empty()) {
 259     updateCkptDirFileSubdir();
 260   }
 261 }
 262 
 263 void ProcessInfo::updateCkptDirFileSubdir(string newCkptDir)
 264 {
 265   if (newCkptDir != "") {
 266     _ckptDir = newCkptDir;
 267   }
 268 
 269   if (_ckptDir.empty()) {
 270     const char *dir = getenv(ENV_VAR_CHECKPOINT_DIR);
 271     if (dir == NULL) {
 272       dir = ".";
 273     }
 274     _ckptDir = dir;
 275   }
 276 
 277   ostringstream o;
 278   o << _ckptDir << "/"
 279     << CKPT_FILE_PREFIX
 280     << jalib::Filesystem::GetProgramName()
 281     << '_' << UniquePid::ThisProcess();
 282 
 283   _ckptFileName = o.str() + CKPT_FILE_SUFFIX;
 284   _ckptFilesSubDir = o.str() + CKPT_FILES_SUBDIR_SUFFIX;
 285 }
 286 
 287 void ProcessInfo::postExec()
 288 {
 289   _procname   = jalib::Filesystem::GetProgramName();
 290   _upid       = UniquePid::ThisProcess();
 291   _uppid      = UniquePid::ParentProcess();
 292   updateCkptDirFileSubdir();
 293 }
 294 
 295 void ProcessInfo::resetOnFork()
 296 {
 297   pthread_mutex_t newlock = PTHREAD_MUTEX_INITIALIZER;
 298   tblLock = newlock;
 299   _ppid = _pid;
 300   _pid = getpid();
 301   _isRootOfProcessTree = false;
 302   _childTable.clear();
 303   _pthreadJoinId.clear();
 304   _ckptFileName.clear();
 305   _ckptFilesSubDir.clear();
 306   updateCkptDirFileSubdir();
 307 }
 308 
 309 void ProcessInfo::restoreHeap()
 310 {
 311   /* If the original start of heap is lower than the current end of heap, we
 312    * want to mmap the area between _savedBrk and current break. This
 313    * happens when the size of checkpointed program is smaller then the size of
 314    * mtcp_restart program.
 315    */
 316   uint64_t curBrk = (uint64_t) sbrk(0);
 317   if (curBrk > _savedBrk) {
 318     JNOTE("Area between saved_break and curr_break not mapped, mapping it now")
 319       (_savedBrk) (curBrk);
 320     size_t oldsize = _savedBrk - _savedHeapStart;
 321     size_t newsize = curBrk - _savedHeapStart;
 322 
 323     JASSERT(mremap((void*) _savedHeapStart, oldsize, newsize, 0) != NULL)
 324       (_savedBrk) (curBrk)
 325       .Text("mremap failed to map area between saved break and current break");
 326   } else if (curBrk < _savedBrk) {
 327     if (brk((void*)_savedBrk) != 0) {
 328       JNOTE("Failed to restore area between saved_break and curr_break.")
 329         (_savedBrk) (curBrk) (JASSERT_ERRNO);
 330     }
 331   }
 332 }
 333 
 334 void ProcessInfo::restart()
 335 {
 336   JASSERT(mprotect((void*)_restoreBufAddr, _restoreBufLen, PROT_NONE) == 0)
 337     ((void*)_restoreBufAddr) (_restoreBufLen) (JASSERT_ERRNO);
 338 
 339   restoreHeap();
 340 
 341   // Update the ckptDir
 342   string ckptDir = jalib::Filesystem::GetDeviceName(PROTECTED_CKPT_DIR_FD);
 343   JASSERT(ckptDir.length() > 0);
 344   _real_close(PROTECTED_CKPT_DIR_FD);
 345   updateCkptDirFileSubdir(ckptDir);
 346 
 347   if (_launchCWD != _ckptCWD) {
 348     string rpath = "";
 349     size_t llen = _launchCWD.length();
 350     if (Util::strStartsWith(_ckptCWD.c_str(), _launchCWD.c_str()) &&
 351         _ckptCWD[llen] == '/') {
 352       // _launchCWD = "/A/B"; _ckptCWD = "/A/B/C" -> rpath = "./c"
 353       rpath = "./" + _ckptCWD.substr(llen + 1);
 354       if (chdir(rpath.c_str()) == 0) {
 355         JTRACE("Changed cwd") (_launchCWD) (_ckptCWD) (_launchCWD + rpath);
 356       } else {
 357         JWARNING(chdir(_ckptCWD.c_str()) == 0) (_ckptCWD) (_launchCWD)
 358           (JASSERT_ERRNO) .Text("Failed to change directory to _ckptCWD");
 359       }
 360     }
 361   }
 362 }
 363 
 364 void ProcessInfo::restoreProcessGroupInfo()
 365 {
 366   // Restore group assignment
 367   if (dmtcp_virtual_to_real_pid && dmtcp_virtual_to_real_pid(_gid) != _gid) {
 368     pid_t cgid = getpgid(0);
 369     // Group ID is known inside checkpointed processes
 370     if (_gid != cgid) {
 371       JTRACE("Restore Group Assignment")
 372         (_gid) (_fgid) (cgid) (_pid) (_ppid) (getppid());
 373       JWARNING(setpgid(0, _gid) == 0) (_gid) (JASSERT_ERRNO)
 374         .Text("Cannot change group information");
 375     } else {
 376       JTRACE("Group is already assigned") (_gid) (cgid);
 377     }
 378   } else {
 379     JTRACE("SKIP Group information, GID unknown");
 380   }
 381 }
 382 
 383 void ProcessInfo::insertChild(pid_t pid, UniquePid uniquePid)
 384 {
 385   _do_lock_tbl();
 386   iterator i = _childTable.find(pid);
 387   JWARNING(i == _childTable.end()) (pid) (uniquePid) (i->second)
 388     .Text("child pid already exists!");
 389 
 390   _childTable[pid] = uniquePid;
 391   _do_unlock_tbl();
 392 
 393   JTRACE("Creating new virtualPid -> realPid mapping.") (pid) (uniquePid);
 394 }
 395 
 396 void ProcessInfo::eraseChild(pid_t virtualPid)
 397 {
 398   _do_lock_tbl();
 399   iterator i = _childTable.find(virtualPid);
 400   if (i != _childTable.end())
 401     _childTable.erase(virtualPid);
 402   _do_unlock_tbl();
 403 }
 404 
 405 bool ProcessInfo::isChild(const UniquePid& upid)
 406 {
 407   bool res = false;
 408   _do_lock_tbl();
 409   for (iterator i = _childTable.begin(); i != _childTable.end(); i++) {
 410     if (i->second == upid) {
 411       res = true;
 412       break;
 413     }
 414   }
 415   _do_unlock_tbl();
 416   return res;
 417 }
 418 
 419 bool ProcessInfo::beginPthreadJoin(pthread_t thread)
 420 {
 421   bool res = false;
 422   _do_lock_tbl();
 423   map<pthread_t, pthread_t>::iterator i = _pthreadJoinId.find(thread);
 424   if (i == _pthreadJoinId.end()) {
 425     _pthreadJoinId[thread] = pthread_self();
 426     res = true;
 427   }
 428   _do_unlock_tbl();
 429   return res;
 430 }
 431 
 432 void ProcessInfo::clearPthreadJoinState(pthread_t thread)
 433 {
 434   _do_lock_tbl();
 435   if (_pthreadJoinId.find(thread) != _pthreadJoinId.end()) {
 436     _pthreadJoinId.erase(thread);
 437   }
 438   _do_unlock_tbl();
 439 }
 440 
 441 void ProcessInfo::endPthreadJoin(pthread_t thread)
 442 {
 443   _do_lock_tbl();
 444   if (_pthreadJoinId.find(thread) != _pthreadJoinId.end() &&
 445       pthread_equal(_pthreadJoinId[thread], pthread_self())) {
 446     _pthreadJoinId.erase(thread);
 447   }
 448   _do_unlock_tbl();
 449 }
 450 
 451 void ProcessInfo::setCkptFilename(const char *filename)
 452 {
 453   JASSERT(filename != NULL);
 454   if (filename[0] == '/') {
 455     _ckptDir = jalib::Filesystem::DirName(filename);
 456     _ckptFileName = filename;
 457   } else {
 458     _ckptFileName = _ckptDir + "/" + filename;
 459   }
 460 
 461   if (Util::strEndsWith(_ckptFileName, CKPT_FILE_SUFFIX)) {
 462     string ckptFileBaseName =
 463       _ckptFileName.substr(0, _ckptFileName.length() - CKPT_FILE_SUFFIX_LEN);
 464     _ckptFilesSubDir = ckptFileBaseName +CKPT_FILES_SUBDIR_SUFFIX;
 465   } else {
 466     _ckptFilesSubDir = _ckptFileName + CKPT_FILES_SUBDIR_SUFFIX;
 467   }
 468 }
 469 
 470 
 471 void ProcessInfo::setCkptDir(const char *dir)
 472 {
 473   JASSERT(dir != NULL);
 474   _ckptDir = dir;
 475   _ckptFileName = _ckptDir + "/" + jalib::Filesystem::BaseName(_ckptFileName);
 476   _ckptFilesSubDir = _ckptDir + "/" + jalib::Filesystem::BaseName(_ckptFilesSubDir);
 477 
 478   JTRACE("setting ckptdir") (_ckptDir) (_ckptFilesSubDir);
 479   //JASSERT(access(_ckptDir.c_str(), X_OK|W_OK) == 0) (_ckptDir)
 480     //.Text("Missing execute- or write-access to checkpoint dir.");
 481 }
 482 
 483 void ProcessInfo::refresh()
 484 {
 485   JASSERT(_pid == getpid()) (_pid) (getpid());
 486 
 487   _gid = getpgid(0);
 488   _sid = getsid(0);
 489 
 490   _fgid = -1;
 491   // Try to open the controlling terminal
 492   int tfd = _real_open("/dev/tty", O_RDWR);
 493   if (tfd != -1) {
 494     _fgid = tcgetpgrp(tfd);
 495     _real_close(tfd);
 496   }
 497 
 498   if (_ppid != getppid()) {
 499     // Our original parent died; we are the root of the process tree now.
 500     //
 501     // On older systems, a process is inherited by init (pid = 1) after its
 502     // parent dies. However, with the new per-user init process, the parent
 503     // pid is no longer "1"; it's the pid of the user-specific init process.
 504     _ppid = getppid();
 505     _isRootOfProcessTree = true;
 506     _uppid = UniquePid();
 507   } else {
 508     _uppid = UniquePid::ParentProcess();
 509   }
 510 
 511   _procname = jalib::Filesystem::GetProgramName();
 512   _hostname = jalib::Filesystem::GetCurrentHostname();
 513   _upid = UniquePid::ThisProcess();
 514   _noCoordinator = dmtcp_no_coordinator();
 515 
 516   char buf[PATH_MAX];
 517   JASSERT(getcwd(buf, sizeof buf) != NULL);
 518   _ckptCWD = buf;
 519 
 520   _sessionIds.clear();
 521   refreshChildTable();
 522 
 523   JTRACE("CHECK GROUP PID")(_gid)(_fgid)(_ppid)(_pid);
 524 }
 525 
 526 void ProcessInfo::refreshChildTable()
 527 {
 528   iterator i = _childTable.begin();
 529   while (i != _childTable.end()) {
 530     pid_t pid = i->first;
 531     iterator j = i++;
 532     /* Check to see if the child process is alive*/
 533     if (kill(pid, 0) == -1 && errno == ESRCH) {
 534       _childTable.erase(j);
 535     } else {
 536       _sessionIds[pid] = getsid(pid);
 537     }
 538   }
 539 }
 540 
 541 void ProcessInfo::serialize(jalib::JBinarySerializer& o)
 542 {
 543   JSERIALIZE_ASSERT_POINT("ProcessInfo:");
 544   _savedBrk = (uint64_t) sbrk(0);
 545 
 546   o & _elfType;
 547   o & _isRootOfProcessTree & _pid & _sid & _ppid & _gid & _fgid & _generation;
 548   o & _procname & _hostname & _launchCWD & _ckptCWD & _upid & _uppid;
 549   o & _compGroup & _numPeers & _noCoordinator & _argvSize & _envSize;
 550   o & _restoreBufAddr & _savedHeapStart & _savedBrk;
 551   o & _vdsoStart & _vdsoEnd & _vvarStart & _vvarEnd;
 552   o & _ckptDir & _ckptFileName & _ckptFilesSubDir;
 553 
 554   JTRACE("Serialized process information")
 555     (_sid) (_ppid) (_gid) (_fgid) (_isRootOfProcessTree)
 556     (_procname) (_hostname) (_launchCWD) (_ckptCWD) (_upid) (_uppid)
 557     (_compGroup) (_numPeers) (_noCoordinator) (_argvSize) (_envSize) (_elfType);
 558 
 559   JASSERT(!_noCoordinator || _numPeers == 1) (_noCoordinator) (_numPeers);
 560 
 561   if (_isRootOfProcessTree) {
 562     JTRACE("This process is Root of Process Tree");
 563   }
 564 
 565   JTRACE("Serializing ChildPid Table") (_childTable.size()) (o.filename());
 566   o.serializeMap(_childTable);
 567 
 568   JSERIALIZE_ASSERT_POINT("EOF");
 569 }

/* [<][>][^][v][top][bottom][index][help] */