root/plugin/ipc/file/fileconnlist.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. dmtcp_FileConnList_EventHook
  2. dmtcp_FileConn_ProcessFdEvent
  3. instance
  4. preLockSaveOptions
  5. drain
  6. postRestart
  7. refill
  8. resume
  9. prepareShmList
  10. recreateShmFileAndMap
  11. restoreShmArea
  12. remapShmMaps
  13. scanForPreExisting
  14. findDuplication
  15. processFileConnection
  16. createDummyConnection

   1 /****************************************************************************
   2  *   Copyright (C) 2006-2013 by Jason Ansel, Kapil Arya, and Gene Cooperman *
   3  *   jansel@csail.mit.edu, kapil@ccs.neu.edu, and gene@ccs.neu.edu          *
   4  *                                                                          *
   5  *   This file is part of the dmtcp/src module of DMTCP (DMTCP:dmtcp/src).  *
   6  *                                                                          *
   7  *  DMTCP:dmtcp/src is free software: you can redistribute it and/or        *
   8  *  modify it under the terms of the GNU Lesser General Public License as   *
   9  *  published by the Free Software Foundation, either version 3 of the      *
  10  *  License, or (at your option) any later version.                         *
  11  *                                                                          *
  12  *  DMTCP:dmtcp/src is distributed in the hope that it will be useful,      *
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
  15  *  GNU Lesser General Public License for more details.                     *
  16  *                                                                          *
  17  *  You should have received a copy of the GNU Lesser General Public        *
  18  *  License along with DMTCP:dmtcp/src.  If not, see                        *
  19  *  <http://www.gnu.org/licenses/>.                                         *
  20  ****************************************************************************/
  21 
  22 /*
  23  * Ckpt policy for handling files and shared memory segments.
  24  *
  25  * TODO(kapil): Fill the holes in this policy.
  26  *
  27  * Regular File:
  28  * - Ckpt file descriptor
  29  * - Leader election
  30  * - Ckpt file based on heuristics
  31  * Unlinked File:
  32  * - Ckpt file descriptor
  33  * - Leader election
  34  * - Ckpt file
  35  *
  36  * Shared-memory area with regular file:
  37  * - TODO(kapil): Any file descriptor with the file? If yes, use that to
  38  *   ckpt-file.
  39  * - Open a file descriptor
  40  * - Ckpt file based on heuristics
  41  * - recreate file on restart
  42  * - close fd on restart.
  43  *
  44  * Shared-memory area with unlinked file:
  45  * + Ckpt:
  46  *   - TODO(kapil): Any file descriptor pointing to the file? If yes, delegate
  47  *     ckpt to the file descriptor.
  48  *   - everyone saves the contents of the shared-area.
  49  * + Restart
  50  *   - File already exists: verify that the file is at least as large as
  51  *     (area.offset+area.size).
  52  *   - File doesn't exist: try to recreate the file; write content
  53  * - on restart, everyone tries to recreate the file and write their data
  54  *   (offset, length) to the file.
  55  * - everyone tries to unlink the file in a subsequent barrier.
  56  */
  57 
  58 // THESE INCLUDES ARE IN RANDOM ORDER.  LET'S CLEAN IT UP AFTER RELEASE. - Gene
  59 #include <sys/types.h>
  60 #include <sys/stat.h>
  61 #include <sys/types.h>
  62 #include <sys/socket.h>
  63 #include <sys/syscall.h>
  64 #include <sys/mman.h>
  65 #include <unistd.h>
  66 #include <mqueue.h>
  67 #include <stdint.h>
  68 #include <signal.h>
  69 #include "procselfmaps.h"
  70 #include "util.h"
  71 #include "shareddata.h"
  72 #include "jfilesystem.h"
  73 #include "jbuffer.h"
  74 #include "jconvert.h"
  75 #include "fileconnection.h"
  76 #include "fileconnlist.h"
  77 #include "filewrappers.h"
  78 
  79 using namespace dmtcp;
  80 
  81 void dmtcp_FileConnList_EventHook(DmtcpEvent_t event, DmtcpEventData_t *data)
  82 {
  83   FileConnList::instance().eventHook(event, data);
  84 }
  85 
  86 static vector<ProcMapsArea> shmAreas;
  87 static vector<ProcMapsArea> unlinkedShmAreas;
  88 static vector<ProcMapsArea> missingUnlinkedShmFiles;
  89 static vector<FileConnection*> shmAreaConn;
  90 
  91 void dmtcp_FileConn_ProcessFdEvent(int event, int arg1, int arg2)
  92 {
  93   if (event == SYS_close) {
  94     FileConnList::instance().processClose(arg1);
  95   } else if (event == SYS_dup) {
  96     FileConnList::instance().processDup(arg1, arg2);
  97   } else {
  98     JASSERT(false);
  99   }
 100 }
 101 
 102 static FileConnList *fileConnList = NULL;
 103 FileConnList& FileConnList::instance()
 104 {
 105   if (fileConnList == NULL) {
 106     fileConnList = new FileConnList();
 107   }
 108   return *fileConnList;
 109 }
 110 
 111 void FileConnList::preLockSaveOptions()
 112 {
 113   // Now create a list of all shared-memory areas.
 114   prepareShmList();
 115 
 116   ConnectionList::preLockSaveOptions();
 117 }
 118 
 119 void FileConnList::drain()
 120 {
 121   ConnectionList::drain();
 122 
 123   vector<SharedData::InodeConnIdMap> inodeConnIdMaps;
 124   for (iterator i = begin(); i != end(); ++i) {
 125     Connection* con =  i->second;
 126     if (con->hasLock() && con->conType() == Connection::FILE) {
 127       FileConnection *fileCon = (FileConnection*) con;
 128       if (fileCon->checkpointed() == true) {
 129         SharedData::InodeConnIdMap map;
 130         map.devnum = fileCon->devnum();
 131         map.inode = fileCon->inode();
 132         memcpy(map.id, &i->first, sizeof (i->first));
 133         inodeConnIdMaps.push_back(map);
 134       }
 135     }
 136   }
 137   if (inodeConnIdMaps.size() > 0) {
 138     SharedData::insertInodeConnIdMaps(inodeConnIdMaps);
 139   }
 140 }
 141 
 142 void FileConnList::postRestart()
 143 {
 144   /* It is possible to have two different connection-ids for a pre-existing
 145    * CTTY in two or more different process trees. In this case, only one of the
 146    * several process trees would be able to acquire a lock on the underlying
 147    * fd.  The send-receive fd logic fails in this case due to different
 148    * connection-ids.  Therefore, we let every process do a postRestart to
 149    * reopen the CTTY.
 150    *
 151    * TODO: A better fix would be to have a unique connection-id for each
 152    * pre-existing CTTY that is then used by all process trees.  It can be
 153    * implemented by using the SharedData area.
 154    */
 155   for (iterator i = begin(); i != end(); ++i) {
 156     Connection* con =  i->second;
 157     if (!con->hasLock() && con->conType() == Connection::PTY &&
 158         con->isPreExistingCTTY()) {
 159       PtyConnection *pcon = (PtyConnection*) con;
 160       pcon->postRestart();
 161     }
 162   }
 163 
 164   /* Try to map the file as is, if it already exists on the disk.
 165    */
 166   for (size_t i = 0; i < unlinkedShmAreas.size(); i++) {
 167     if (jalib::Filesystem::FileExists(unlinkedShmAreas[i].name)) {
 168       // TODO(kapil): Verify the file contents.
 169       JWARNING(false) (unlinkedShmAreas[i].name)
 170         .Text("File was unlinked at ckpt but is currently present on disk; "
 171               "remove it and try again.");
 172       restoreShmArea(unlinkedShmAreas[i]);
 173     } else {
 174       missingUnlinkedShmFiles.push_back(unlinkedShmAreas[i]);
 175     }
 176   }
 177 
 178   ConnectionList::postRestart();
 179 }
 180 
 181 void FileConnList::refill(bool isRestart)
 182 {
 183   // Check comments in PtyConnection::preRefill()/refill()
 184   for (iterator i = begin(); i != end(); ++i) {
 185     Connection* con =  i->second;
 186     if (con->hasLock() && con->conType() == Connection::PTY) {
 187       PtyConnection *pcon = (PtyConnection*) con;
 188       pcon->preRefill(isRestart);
 189     }
 190   }
 191 
 192   if (isRestart) {
 193     // The backing file will be created as a result of restoreShmArea. We need
 194     // to unlink all such files in the resume() call below.
 195     for (size_t i = 0; i < missingUnlinkedShmFiles.size(); i++) {
 196       recreateShmFileAndMap(missingUnlinkedShmFiles[i]);
 197     }
 198   }
 199 
 200   ConnectionList::refill(isRestart);
 201 }
 202 
 203 void FileConnList::resume(bool isRestart)
 204 {
 205   ConnectionList::resume(isRestart);
 206   remapShmMaps();
 207 
 208   if (isRestart) {
 209     // Now unlink the files that we created as a side-effect of restoreShmArea.
 210     for (size_t i = 0; i < missingUnlinkedShmFiles.size(); i++) {
 211       JWARNING(unlink(missingUnlinkedShmFiles[i].name) != -1)
 212         (missingUnlinkedShmFiles[i].name) (JASSERT_ERRNO)
 213         .Text("The file was unlinked at the time of checkpoint. "
 214               "Unlinking it after restart failed");
 215     }
 216   }
 217 }
 218 
 219 void FileConnList::prepareShmList()
 220 {
 221   ProcSelfMaps procSelfMaps;
 222   ProcMapsArea area;
 223 
 224   shmAreas.clear();
 225   unlinkedShmAreas.clear();
 226   missingUnlinkedShmFiles.clear();
 227   shmAreaConn.clear();
 228   while (procSelfMaps.getNextArea(&area)) {
 229     if ((area.flags & MAP_SHARED) && area.prot != 0) {
 230       if (strstr(area.name, "ptraceSharedInfo") != NULL ||
 231           strstr(area.name, "dmtcpPidMap") != NULL ||
 232           strstr(area.name, "dmtcpSharedArea") != NULL ||
 233           strstr(area.name, "dmtcpSharedArea") != NULL ||
 234           strstr(area.name, "synchronization-log") != NULL ||
 235           strstr(area.name, "infiniband") != NULL ||
 236           strstr(area.name, "synchronization-read-log") != NULL) {
 237         continue;
 238       }
 239 
 240       if (Util::isNscdArea(area) ||
 241           Util::isIBShmArea(area) ||
 242           Util::isSysVShmArea(area)) {
 243         continue;
 244       }
 245 
 246       /* Invalidate shared memory pages so that the next read to it (when we are
 247        * writing them to ckpt file) will cause them to be reloaded from the
 248        * disk.
 249        */
 250       JWARNING(msync(area.addr, area.size, MS_INVALIDATE) == 0)
 251         (area.addr) (area.size) (area.name) (area.offset) (JASSERT_ERRNO);
 252 
 253       if (jalib::Filesystem::FileExists(area.name)) {
 254         if (_real_access(area.name, W_OK) == 0) {
 255           JTRACE("Will checkpoint shared memory area") (area.name);
 256           int flags = Util::memProtToOpenFlags(area.prot);
 257           int fd = _real_open(area.name, flags, 0);
 258           JASSERT(fd != -1) (JASSERT_ERRNO) (area.name);
 259           FileConnection *fileConn =
 260             new FileConnection(area.name, flags, 0, FileConnection::FILE_SHM);
 261           add(fd, fileConn);
 262           shmAreas.push_back(area);
 263           shmAreaConn.push_back(fileConn);
 264           /* Instead of unmapping the shared memory area, we make it
 265            * non-readable. This way mtcp will skip the region while at the same
 266            * time, we prevent JALLOC arena to grow over it.
 267            *
 268            * By munmapping the area, a bug was observed on CCIS linux with
 269            * 'make check-java'. Once the region was unmapped, the JALLOC arena
 270            * grew over it. During restart, the JALLOC'd area was reclaimed for
 271            * remapping the shm file without informing JALLOC. Finally, during
 272            * the second checkpoint cycle, the area was again unmapped and later
 273            * JALLOC tried to access it, causing a SIGSEGV.
 274            */
 275           JASSERT(_real_mmap(area.addr, area.size, PROT_NONE,
 276                              MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
 277                              -1, 0) != MAP_FAILED) (JASSERT_ERRNO);
 278         } else {
 279           JTRACE("Will not checkpoint shared memory area") (area.name);
 280         }
 281       } else {
 282         // TODO: Shared memory areas with unlinked backing files.
 283         JASSERT(Util::strEndsWith(area.name, DELETED_FILE_SUFFIX)) (area.name);
 284         if (Util::strStartsWith(area.name, DEV_ZERO_DELETED_STR) ||
 285             Util::strStartsWith(area.name, DEV_NULL_DELETED_STR)) {
 286           JWARNING(false) (area.name)
 287             .Text("Ckpt/Restart of anonymous shared memory not supported.");
 288         } else {
 289           JTRACE("Will recreate shm file on restart.") (area.name);
 290 
 291           // Remove the DELETED suffix.
 292           area.name[strlen(area.name) - strlen(DELETED_FILE_SUFFIX)] = '\0';
 293           unlinkedShmAreas.push_back(area);
 294         }
 295       }
 296     }
 297   }
 298 }
 299 
 300 void FileConnList::recreateShmFileAndMap(const ProcMapsArea& area)
 301 {
 302   // TODO(kapil): Handle /dev/zero, /dev/random, etc.
 303   // Recreate file in dmtcp-tmpdir;
 304   string filename = Util::removeSuffix(area.name, DELETED_FILE_SUFFIX);
 305   JASSERT(Util::createDirectoryTree(area.name)) (area.name)
 306     .Text("Unable to create directory in File Path");
 307 
 308   /* Now try to create the file with O_EXCL. If we fail with EEXIST, there
 309    * are two possible scenarios:
 310    * - The file was created by a different restarting process with data from
 311    *   checkpointed copy. It is possible that the data is "in flight", so we
 312    *   should wait until the next barrier to compare the data from our copy.
 313    * - The file existed before restart. After the next barrier, abort if the
 314    *   contents differ from our checkpointed copy.
 315    */
 316   int fd = _real_open(area.name, O_CREAT | O_EXCL | O_RDWR,
 317                       S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 318   JASSERT(fd != -1 || errno == EEXIST) (area.name);
 319 
 320   if (fd == -1) {
 321     fd = _real_open(area.name, O_RDWR);
 322     JASSERT(fd != -1) (JASSERT_ERRNO);
 323   }
 324 
 325   // Get to the correct offset.
 326   JASSERT(lseek(fd, area.offset, SEEK_SET) == area.offset) (JASSERT_ERRNO);
 327   // Now populate file contents from memory.
 328   JASSERT(Util::writeAll(fd, area.addr, area.size) == (ssize_t)area.size)
 329     (JASSERT_ERRNO);
 330   restoreShmArea(area, fd);
 331 }
 332 
 333 void FileConnList::restoreShmArea(const ProcMapsArea& area, int fd)
 334 {
 335   if (fd == -1) {
 336     fd = _real_open(area.name, Util::memProtToOpenFlags(area.prot));
 337   }
 338 
 339   JASSERT(fd != -1) (area.name) (JASSERT_ERRNO);
 340 
 341   JTRACE("Restoring shared memory area") (area.name) ((void*)area.addr);
 342   void *addr = _real_mmap(area.addr, area.size, area.prot,
 343                           MAP_FIXED | area.flags, fd, area.offset);
 344   JASSERT(addr != MAP_FAILED) (area.flags) (area.prot) (JASSERT_ERRNO)
 345     .Text("mmap failed");
 346   _real_close(fd);
 347 }
 348 
 349 void FileConnList::remapShmMaps()
 350 {
 351   for (size_t i = 0; i < shmAreas.size(); i++) {
 352     ProcMapsArea *area = &shmAreas[i];
 353     FileConnection *fileCon = shmAreaConn[i];
 354     int fd = fileCon->getFds()[0];
 355     JTRACE("Restoring shared memory area") (area->name) ((void*)area->addr);
 356     void *addr = _real_mmap(area->addr, area->size, area->prot,
 357                             MAP_FIXED | area->flags,
 358                             fd, area->offset);
 359     JASSERT(addr != MAP_FAILED) (area->flags) (area->prot) (JASSERT_ERRNO) .Text("mmap failed");
 360     _real_close(fd);
 361     processClose(fd);
 362   }
 363   shmAreas.clear();
 364   shmAreaConn.clear();
 365 }
 366 
 367 //examine /proc/self/fd for unknown connections
 368 void FileConnList::scanForPreExisting()
 369 {
 370   // FIXME: Detect stdin/out/err fds to detect duplicates.
 371   vector<int> fds = jalib::Filesystem::ListOpenFds();
 372   string ctty = jalib::Filesystem::GetControllingTerm();
 373   string parentCtty = jalib::Filesystem::GetControllingTerm(getppid());
 374   for (size_t i = 0; i < fds.size(); ++i) {
 375     int fd = fds[i];
 376     if (!Util::isValidFd(fd)) continue;
 377     if (dmtcp_is_protected_fd(fd)) continue;
 378     struct stat statbuf;
 379     JASSERT(fstat(fd, &statbuf) == 0);
 380     bool isRegularFile = (S_ISREG(statbuf.st_mode) || S_ISCHR(statbuf.st_mode) ||
 381                           S_ISDIR(statbuf.st_mode) || S_ISBLK(statbuf.st_mode));
 382 
 383     string device = jalib::Filesystem::GetDeviceName(fd);
 384 
 385     JTRACE("scanning pre-existing device") (fd) (device);
 386     if (device == ctty || device == parentCtty) {
 387       // Search if this is duplicate connection
 388       iterator conit;
 389       uint32_t cttyType = (device == ctty) ? PtyConnection::PTY_CTTY
 390                                       : PtyConnection::PTY_PARENT_CTTY;
 391       for (conit = begin(); conit != end(); conit++) {
 392         Connection *c = conit->second;
 393         if (c->subType() == cttyType &&
 394             ((PtyConnection*)c)->ptsName() == device) {
 395           processDup(c->getFds()[0], fd);
 396           break;
 397         }
 398       }
 399       if (conit == end()) {
 400         // FIXME: Merge this code with the code in processFileConnection
 401         PtyConnection *con = new PtyConnection(fd, (const char*) device.c_str(),
 402                                                -1, -1, cttyType);
 403         // Check comments in FileConnList::postRestart() for the explanation
 404         // about isPreExistingCTTY.
 405         con->markPreExistingCTTY();
 406         add(fd, (Connection*)con);
 407       }
 408     } else if(dmtcp_is_bq_file && dmtcp_is_bq_file(device.c_str())) {
 409       if (isRegularFile) {
 410         Connection *c = findDuplication(fd, device.c_str());
 411         if (c != NULL) {
 412           add(fd,c);
 413           continue;
 414         }
 415       }
 416       processFileConnection(fd, device.c_str(), -1, -1);
 417     } else if( fd <= 2 ){
 418       add(fd, new StdioConnection(fd));
 419     } else if (Util::strStartsWith(device, "/")) {
 420       if (isRegularFile) {
 421         Connection *c = findDuplication(fd, device.c_str());
 422         if (c != NULL) {
 423           add(fd,c);
 424           continue;
 425         }
 426       }
 427       processFileConnection(fd, device.c_str(), -1, -1);
 428     }
 429   }
 430 }
 431 
 432 Connection *FileConnList::findDuplication(int fd, const char *path)
 433 {
 434   string npath(path);
 435   for (iterator i = begin(); i != end(); ++i) {
 436     Connection *con = i->second;
 437 
 438     if( con->conType() != Connection::FILE )
 439       continue;
 440 
 441     FileConnection *fcon = (FileConnection*)con;
 442     // check for duplication
 443     if( fcon->filePath() == npath && fcon->checkDup(fd) ){
 444       return con;
 445     }
 446   }
 447   return NULL;
 448 }
 449 
 450 void FileConnList::processFileConnection(int fd, const char *path,
 451                                                 int flags, mode_t mode)
 452 {
 453   Connection *c = NULL;
 454   struct stat statbuf;
 455   JASSERT(fstat(fd, &statbuf) == 0);
 456 
 457   string device;
 458   if (path == NULL) {
 459     device = jalib::Filesystem::GetDeviceName(fd);
 460   } else {
 461     device = jalib::Filesystem::ResolveSymlink(path);
 462     if (device == "") {
 463       device = path;
 464     }
 465   }
 466 
 467   path = device.c_str();
 468   if (strcmp(path, "/dev/tty") == 0) {
 469     // Controlling terminal
 470     c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_DEV_TTY);
 471   } else if (strcmp(path, "/dev/pty") == 0) {
 472     JASSERT(false) .Text("Not Implemented");
 473   } else if (Util::strStartsWith(path, "/dev/pty")) {
 474     // BSD Master
 475     c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_BSD_MASTER);
 476   } else if (Util::strStartsWith(path, "/dev/tty")) {
 477     // BSD Slave
 478     c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_BSD_SLAVE);
 479   } else if (strcmp(path, "/dev/ptmx") == 0 ||
 480              strcmp(path, "/dev/pts/ptmx") == 0) {
 481     // POSIX Master PTY
 482     c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_MASTER);
 483   } else if (Util::strStartsWith(path, "/dev/pts/")) {
 484     // POSIX Slave PTY
 485     c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_SLAVE);
 486   } else if (S_ISREG(statbuf.st_mode) || S_ISCHR(statbuf.st_mode) ||
 487              S_ISDIR(statbuf.st_mode) || S_ISBLK(statbuf.st_mode)) {
 488     int type = FileConnection::FILE_REGULAR;
 489     if (dmtcp_is_bq_file && dmtcp_is_bq_file(path)) {
 490       // Resource manager related
 491       type = FileConnection::FILE_BATCH_QUEUE;
 492     }
 493     c = new FileConnection(path, flags, mode, type);
 494   } else if (S_ISFIFO(statbuf.st_mode)) {
 495     // FIFO
 496     c = new FifoConnection(path, flags, mode);
 497   } else {
 498     JASSERT(false) (path) .Text("Unimplemented file type.");
 499   }
 500 
 501   add(fd, c);
 502 }
 503 
 504 
 505 Connection *FileConnList::createDummyConnection(int type)
 506 {
 507   switch (type) {
 508     case Connection::FILE:
 509       return new FileConnection();
 510       break;
 511     case Connection::FIFO:
 512       return new FifoConnection();
 513       break;
 514     case Connection::PTY:
 515       return new PtyConnection();
 516       break;
 517     case Connection::STDIO:
 518       return new StdioConnection();
 519       break;
 520   }
 521   return NULL;
 522 }

/* [<][>][^][v][top][bottom][index][help] */