/* [<][>][^][v][top][bottom][index][help] */
DEFINITIONS
This source file includes following definitions.
- dmtcp_FileConnList_EventHook
- dmtcp_FileConn_ProcessFdEvent
- instance
- preLockSaveOptions
- drain
- postRestart
- refill
- resume
- prepareShmList
- recreateShmFileAndMap
- restoreShmArea
- remapShmMaps
- scanForPreExisting
- findDuplication
- processFileConnection
- createDummyConnection
1 /****************************************************************************
2 * Copyright (C) 2006-2013 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3 * jansel@csail.mit.edu, kapil@ccs.neu.edu, and gene@ccs.neu.edu *
4 * *
5 * This file is part of the dmtcp/src module of DMTCP (DMTCP:dmtcp/src). *
6 * *
7 * DMTCP:dmtcp/src is free software: you can redistribute it and/or *
8 * modify it under the terms of the GNU Lesser General Public License as *
9 * published by the Free Software Foundation, either version 3 of the *
10 * License, or (at your option) any later version. *
11 * *
12 * DMTCP:dmtcp/src is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with DMTCP:dmtcp/src. If not, see *
19 * <http://www.gnu.org/licenses/>. *
20 ****************************************************************************/
21
22 /*
23 * Ckpt policy for handling files and shared memory segments.
24 *
25 * TODO(kapil): Fill the holes in this policy.
26 *
27 * Regular File:
28 * - Ckpt file descriptor
29 * - Leader election
30 * - Ckpt file based on heuristics
31 * Unlinked File:
32 * - Ckpt file descriptor
33 * - Leader election
34 * - Ckpt file
35 *
36 * Shared-memory area with regular file:
37 * - TODO(kapil): Any file descriptor with the file? If yes, use that to
38 * ckpt-file.
39 * - Open a file descriptor
40 * - Ckpt file based on heuristics
41 * - recreate file on restart
42 * - close fd on restart.
43 *
44 * Shared-memory area with unlinked file:
45 * + Ckpt:
46 * - TODO(kapil): Any file descriptor pointing to the file? If yes, delegate
47 * ckpt to the file descriptor.
48 * - everyone saves the contents of the shared-area.
49 * + Restart
50 * - File already exists: verify that the file is at least as large as
51 * (area.offset+area.size).
52 * - File doesn't exist: try to recreate the file; write content
53 * - on restart, everyone tries to recreate the file and write their data
54 * (offset, length) to the file.
55 * - everyone tries to unlink the file in a subsequent barrier.
56 */
57
58 // THESE INCLUDES ARE IN RANDOM ORDER. LET'S CLEAN IT UP AFTER RELEASE. - Gene
59 #include <sys/types.h>
60 #include <sys/stat.h>
61 #include <sys/types.h>
62 #include <sys/socket.h>
63 #include <sys/syscall.h>
64 #include <sys/mman.h>
65 #include <unistd.h>
66 #include <mqueue.h>
67 #include <stdint.h>
68 #include <signal.h>
69 #include "procselfmaps.h"
70 #include "util.h"
71 #include "shareddata.h"
72 #include "jfilesystem.h"
73 #include "jbuffer.h"
74 #include "jconvert.h"
75 #include "fileconnection.h"
76 #include "fileconnlist.h"
77 #include "filewrappers.h"
78
79 using namespace dmtcp;
80
81 void dmtcp_FileConnList_EventHook(DmtcpEvent_t event, DmtcpEventData_t *data)
82 {
83 FileConnList::instance().eventHook(event, data);
84 }
85
86 static vector<ProcMapsArea> shmAreas;
87 static vector<ProcMapsArea> unlinkedShmAreas;
88 static vector<ProcMapsArea> missingUnlinkedShmFiles;
89 static vector<FileConnection*> shmAreaConn;
90
91 void dmtcp_FileConn_ProcessFdEvent(int event, int arg1, int arg2)
92 {
93 if (event == SYS_close) {
94 FileConnList::instance().processClose(arg1);
95 } else if (event == SYS_dup) {
96 FileConnList::instance().processDup(arg1, arg2);
97 } else {
98 JASSERT(false);
99 }
100 }
101
102 static FileConnList *fileConnList = NULL;
103 FileConnList& FileConnList::instance()
104 {
105 if (fileConnList == NULL) {
106 fileConnList = new FileConnList();
107 }
108 return *fileConnList;
109 }
110
111 void FileConnList::preLockSaveOptions()
112 {
113 // Now create a list of all shared-memory areas.
114 prepareShmList();
115
116 ConnectionList::preLockSaveOptions();
117 }
118
119 void FileConnList::drain()
120 {
121 ConnectionList::drain();
122
123 vector<SharedData::InodeConnIdMap> inodeConnIdMaps;
124 for (iterator i = begin(); i != end(); ++i) {
125 Connection* con = i->second;
126 if (con->hasLock() && con->conType() == Connection::FILE) {
127 FileConnection *fileCon = (FileConnection*) con;
128 if (fileCon->checkpointed() == true) {
129 SharedData::InodeConnIdMap map;
130 map.devnum = fileCon->devnum();
131 map.inode = fileCon->inode();
132 memcpy(map.id, &i->first, sizeof (i->first));
133 inodeConnIdMaps.push_back(map);
134 }
135 }
136 }
137 if (inodeConnIdMaps.size() > 0) {
138 SharedData::insertInodeConnIdMaps(inodeConnIdMaps);
139 }
140 }
141
142 void FileConnList::postRestart()
143 {
144 /* It is possible to have two different connection-ids for a pre-existing
145 * CTTY in two or more different process trees. In this case, only one of the
146 * several process trees would be able to acquire a lock on the underlying
147 * fd. The send-receive fd logic fails in this case due to different
148 * connection-ids. Therefore, we let every process do a postRestart to
149 * reopen the CTTY.
150 *
151 * TODO: A better fix would be to have a unique connection-id for each
152 * pre-existing CTTY that is then used by all process trees. It can be
153 * implemented by using the SharedData area.
154 */
155 for (iterator i = begin(); i != end(); ++i) {
156 Connection* con = i->second;
157 if (!con->hasLock() && con->conType() == Connection::PTY &&
158 con->isPreExistingCTTY()) {
159 PtyConnection *pcon = (PtyConnection*) con;
160 pcon->postRestart();
161 }
162 }
163
164 /* Try to map the file as is, if it already exists on the disk.
165 */
166 for (size_t i = 0; i < unlinkedShmAreas.size(); i++) {
167 if (jalib::Filesystem::FileExists(unlinkedShmAreas[i].name)) {
168 // TODO(kapil): Verify the file contents.
169 JWARNING(false) (unlinkedShmAreas[i].name)
170 .Text("File was unlinked at ckpt but is currently present on disk; "
171 "remove it and try again.");
172 restoreShmArea(unlinkedShmAreas[i]);
173 } else {
174 missingUnlinkedShmFiles.push_back(unlinkedShmAreas[i]);
175 }
176 }
177
178 ConnectionList::postRestart();
179 }
180
181 void FileConnList::refill(bool isRestart)
182 {
183 // Check comments in PtyConnection::preRefill()/refill()
184 for (iterator i = begin(); i != end(); ++i) {
185 Connection* con = i->second;
186 if (con->hasLock() && con->conType() == Connection::PTY) {
187 PtyConnection *pcon = (PtyConnection*) con;
188 pcon->preRefill(isRestart);
189 }
190 }
191
192 if (isRestart) {
193 // The backing file will be created as a result of restoreShmArea. We need
194 // to unlink all such files in the resume() call below.
195 for (size_t i = 0; i < missingUnlinkedShmFiles.size(); i++) {
196 recreateShmFileAndMap(missingUnlinkedShmFiles[i]);
197 }
198 }
199
200 ConnectionList::refill(isRestart);
201 }
202
203 void FileConnList::resume(bool isRestart)
204 {
205 ConnectionList::resume(isRestart);
206 remapShmMaps();
207
208 if (isRestart) {
209 // Now unlink the files that we created as a side-effect of restoreShmArea.
210 for (size_t i = 0; i < missingUnlinkedShmFiles.size(); i++) {
211 JWARNING(unlink(missingUnlinkedShmFiles[i].name) != -1)
212 (missingUnlinkedShmFiles[i].name) (JASSERT_ERRNO)
213 .Text("The file was unlinked at the time of checkpoint. "
214 "Unlinking it after restart failed");
215 }
216 }
217 }
218
219 void FileConnList::prepareShmList()
220 {
221 ProcSelfMaps procSelfMaps;
222 ProcMapsArea area;
223
224 shmAreas.clear();
225 unlinkedShmAreas.clear();
226 missingUnlinkedShmFiles.clear();
227 shmAreaConn.clear();
228 while (procSelfMaps.getNextArea(&area)) {
229 if ((area.flags & MAP_SHARED) && area.prot != 0) {
230 if (strstr(area.name, "ptraceSharedInfo") != NULL ||
231 strstr(area.name, "dmtcpPidMap") != NULL ||
232 strstr(area.name, "dmtcpSharedArea") != NULL ||
233 strstr(area.name, "dmtcpSharedArea") != NULL ||
234 strstr(area.name, "synchronization-log") != NULL ||
235 strstr(area.name, "infiniband") != NULL ||
236 strstr(area.name, "synchronization-read-log") != NULL) {
237 continue;
238 }
239
240 if (Util::isNscdArea(area) ||
241 Util::isIBShmArea(area) ||
242 Util::isSysVShmArea(area)) {
243 continue;
244 }
245
246 /* Invalidate shared memory pages so that the next read to it (when we are
247 * writing them to ckpt file) will cause them to be reloaded from the
248 * disk.
249 */
250 JWARNING(msync(area.addr, area.size, MS_INVALIDATE) == 0)
251 (area.addr) (area.size) (area.name) (area.offset) (JASSERT_ERRNO);
252
253 if (jalib::Filesystem::FileExists(area.name)) {
254 if (_real_access(area.name, W_OK) == 0) {
255 JTRACE("Will checkpoint shared memory area") (area.name);
256 int flags = Util::memProtToOpenFlags(area.prot);
257 int fd = _real_open(area.name, flags, 0);
258 JASSERT(fd != -1) (JASSERT_ERRNO) (area.name);
259 FileConnection *fileConn =
260 new FileConnection(area.name, flags, 0, FileConnection::FILE_SHM);
261 add(fd, fileConn);
262 shmAreas.push_back(area);
263 shmAreaConn.push_back(fileConn);
264 /* Instead of unmapping the shared memory area, we make it
265 * non-readable. This way mtcp will skip the region while at the same
266 * time, we prevent JALLOC arena to grow over it.
267 *
268 * By munmapping the area, a bug was observed on CCIS linux with
269 * 'make check-java'. Once the region was unmapped, the JALLOC arena
270 * grew over it. During restart, the JALLOC'd area was reclaimed for
271 * remapping the shm file without informing JALLOC. Finally, during
272 * the second checkpoint cycle, the area was again unmapped and later
273 * JALLOC tried to access it, causing a SIGSEGV.
274 */
275 JASSERT(_real_mmap(area.addr, area.size, PROT_NONE,
276 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
277 -1, 0) != MAP_FAILED) (JASSERT_ERRNO);
278 } else {
279 JTRACE("Will not checkpoint shared memory area") (area.name);
280 }
281 } else {
282 // TODO: Shared memory areas with unlinked backing files.
283 JASSERT(Util::strEndsWith(area.name, DELETED_FILE_SUFFIX)) (area.name);
284 if (Util::strStartsWith(area.name, DEV_ZERO_DELETED_STR) ||
285 Util::strStartsWith(area.name, DEV_NULL_DELETED_STR)) {
286 JWARNING(false) (area.name)
287 .Text("Ckpt/Restart of anonymous shared memory not supported.");
288 } else {
289 JTRACE("Will recreate shm file on restart.") (area.name);
290
291 // Remove the DELETED suffix.
292 area.name[strlen(area.name) - strlen(DELETED_FILE_SUFFIX)] = '\0';
293 unlinkedShmAreas.push_back(area);
294 }
295 }
296 }
297 }
298 }
299
300 void FileConnList::recreateShmFileAndMap(const ProcMapsArea& area)
301 {
302 // TODO(kapil): Handle /dev/zero, /dev/random, etc.
303 // Recreate file in dmtcp-tmpdir;
304 string filename = Util::removeSuffix(area.name, DELETED_FILE_SUFFIX);
305 JASSERT(Util::createDirectoryTree(area.name)) (area.name)
306 .Text("Unable to create directory in File Path");
307
308 /* Now try to create the file with O_EXCL. If we fail with EEXIST, there
309 * are two possible scenarios:
310 * - The file was created by a different restarting process with data from
311 * checkpointed copy. It is possible that the data is "in flight", so we
312 * should wait until the next barrier to compare the data from our copy.
313 * - The file existed before restart. After the next barrier, abort if the
314 * contents differ from our checkpointed copy.
315 */
316 int fd = _real_open(area.name, O_CREAT | O_EXCL | O_RDWR,
317 S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
318 JASSERT(fd != -1 || errno == EEXIST) (area.name);
319
320 if (fd == -1) {
321 fd = _real_open(area.name, O_RDWR);
322 JASSERT(fd != -1) (JASSERT_ERRNO);
323 }
324
325 // Get to the correct offset.
326 JASSERT(lseek(fd, area.offset, SEEK_SET) == area.offset) (JASSERT_ERRNO);
327 // Now populate file contents from memory.
328 JASSERT(Util::writeAll(fd, area.addr, area.size) == (ssize_t)area.size)
329 (JASSERT_ERRNO);
330 restoreShmArea(area, fd);
331 }
332
333 void FileConnList::restoreShmArea(const ProcMapsArea& area, int fd)
334 {
335 if (fd == -1) {
336 fd = _real_open(area.name, Util::memProtToOpenFlags(area.prot));
337 }
338
339 JASSERT(fd != -1) (area.name) (JASSERT_ERRNO);
340
341 JTRACE("Restoring shared memory area") (area.name) ((void*)area.addr);
342 void *addr = _real_mmap(area.addr, area.size, area.prot,
343 MAP_FIXED | area.flags, fd, area.offset);
344 JASSERT(addr != MAP_FAILED) (area.flags) (area.prot) (JASSERT_ERRNO)
345 .Text("mmap failed");
346 _real_close(fd);
347 }
348
349 void FileConnList::remapShmMaps()
350 {
351 for (size_t i = 0; i < shmAreas.size(); i++) {
352 ProcMapsArea *area = &shmAreas[i];
353 FileConnection *fileCon = shmAreaConn[i];
354 int fd = fileCon->getFds()[0];
355 JTRACE("Restoring shared memory area") (area->name) ((void*)area->addr);
356 void *addr = _real_mmap(area->addr, area->size, area->prot,
357 MAP_FIXED | area->flags,
358 fd, area->offset);
359 JASSERT(addr != MAP_FAILED) (area->flags) (area->prot) (JASSERT_ERRNO) .Text("mmap failed");
360 _real_close(fd);
361 processClose(fd);
362 }
363 shmAreas.clear();
364 shmAreaConn.clear();
365 }
366
367 //examine /proc/self/fd for unknown connections
368 void FileConnList::scanForPreExisting()
369 {
370 // FIXME: Detect stdin/out/err fds to detect duplicates.
371 vector<int> fds = jalib::Filesystem::ListOpenFds();
372 string ctty = jalib::Filesystem::GetControllingTerm();
373 string parentCtty = jalib::Filesystem::GetControllingTerm(getppid());
374 for (size_t i = 0; i < fds.size(); ++i) {
375 int fd = fds[i];
376 if (!Util::isValidFd(fd)) continue;
377 if (dmtcp_is_protected_fd(fd)) continue;
378 struct stat statbuf;
379 JASSERT(fstat(fd, &statbuf) == 0);
380 bool isRegularFile = (S_ISREG(statbuf.st_mode) || S_ISCHR(statbuf.st_mode) ||
381 S_ISDIR(statbuf.st_mode) || S_ISBLK(statbuf.st_mode));
382
383 string device = jalib::Filesystem::GetDeviceName(fd);
384
385 JTRACE("scanning pre-existing device") (fd) (device);
386 if (device == ctty || device == parentCtty) {
387 // Search if this is duplicate connection
388 iterator conit;
389 uint32_t cttyType = (device == ctty) ? PtyConnection::PTY_CTTY
390 : PtyConnection::PTY_PARENT_CTTY;
391 for (conit = begin(); conit != end(); conit++) {
392 Connection *c = conit->second;
393 if (c->subType() == cttyType &&
394 ((PtyConnection*)c)->ptsName() == device) {
395 processDup(c->getFds()[0], fd);
396 break;
397 }
398 }
399 if (conit == end()) {
400 // FIXME: Merge this code with the code in processFileConnection
401 PtyConnection *con = new PtyConnection(fd, (const char*) device.c_str(),
402 -1, -1, cttyType);
403 // Check comments in FileConnList::postRestart() for the explanation
404 // about isPreExistingCTTY.
405 con->markPreExistingCTTY();
406 add(fd, (Connection*)con);
407 }
408 } else if(dmtcp_is_bq_file && dmtcp_is_bq_file(device.c_str())) {
409 if (isRegularFile) {
410 Connection *c = findDuplication(fd, device.c_str());
411 if (c != NULL) {
412 add(fd,c);
413 continue;
414 }
415 }
416 processFileConnection(fd, device.c_str(), -1, -1);
417 } else if( fd <= 2 ){
418 add(fd, new StdioConnection(fd));
419 } else if (Util::strStartsWith(device, "/")) {
420 if (isRegularFile) {
421 Connection *c = findDuplication(fd, device.c_str());
422 if (c != NULL) {
423 add(fd,c);
424 continue;
425 }
426 }
427 processFileConnection(fd, device.c_str(), -1, -1);
428 }
429 }
430 }
431
432 Connection *FileConnList::findDuplication(int fd, const char *path)
433 {
434 string npath(path);
435 for (iterator i = begin(); i != end(); ++i) {
436 Connection *con = i->second;
437
438 if( con->conType() != Connection::FILE )
439 continue;
440
441 FileConnection *fcon = (FileConnection*)con;
442 // check for duplication
443 if( fcon->filePath() == npath && fcon->checkDup(fd) ){
444 return con;
445 }
446 }
447 return NULL;
448 }
449
450 void FileConnList::processFileConnection(int fd, const char *path,
451 int flags, mode_t mode)
452 {
453 Connection *c = NULL;
454 struct stat statbuf;
455 JASSERT(fstat(fd, &statbuf) == 0);
456
457 string device;
458 if (path == NULL) {
459 device = jalib::Filesystem::GetDeviceName(fd);
460 } else {
461 device = jalib::Filesystem::ResolveSymlink(path);
462 if (device == "") {
463 device = path;
464 }
465 }
466
467 path = device.c_str();
468 if (strcmp(path, "/dev/tty") == 0) {
469 // Controlling terminal
470 c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_DEV_TTY);
471 } else if (strcmp(path, "/dev/pty") == 0) {
472 JASSERT(false) .Text("Not Implemented");
473 } else if (Util::strStartsWith(path, "/dev/pty")) {
474 // BSD Master
475 c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_BSD_MASTER);
476 } else if (Util::strStartsWith(path, "/dev/tty")) {
477 // BSD Slave
478 c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_BSD_SLAVE);
479 } else if (strcmp(path, "/dev/ptmx") == 0 ||
480 strcmp(path, "/dev/pts/ptmx") == 0) {
481 // POSIX Master PTY
482 c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_MASTER);
483 } else if (Util::strStartsWith(path, "/dev/pts/")) {
484 // POSIX Slave PTY
485 c = new PtyConnection(fd, path, flags, mode, PtyConnection::PTY_SLAVE);
486 } else if (S_ISREG(statbuf.st_mode) || S_ISCHR(statbuf.st_mode) ||
487 S_ISDIR(statbuf.st_mode) || S_ISBLK(statbuf.st_mode)) {
488 int type = FileConnection::FILE_REGULAR;
489 if (dmtcp_is_bq_file && dmtcp_is_bq_file(path)) {
490 // Resource manager related
491 type = FileConnection::FILE_BATCH_QUEUE;
492 }
493 c = new FileConnection(path, flags, mode, type);
494 } else if (S_ISFIFO(statbuf.st_mode)) {
495 // FIFO
496 c = new FifoConnection(path, flags, mode);
497 } else {
498 JASSERT(false) (path) .Text("Unimplemented file type.");
499 }
500
501 add(fd, c);
502 }
503
504
505 Connection *FileConnList::createDummyConnection(int type)
506 {
507 switch (type) {
508 case Connection::FILE:
509 return new FileConnection();
510 break;
511 case Connection::FIFO:
512 return new FifoConnection();
513 break;
514 case Connection::PTY:
515 return new PtyConnection();
516 break;
517 case Connection::STDIO:
518 return new StdioConnection();
519 break;
520 }
521 return NULL;
522 }