root/fs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. readsb
  2. bzero
  3. balloc
  4. bfree
  5. iinit
  6. ialloc
  7. iupdate
  8. iget
  9. idup
  10. ilock
  11. iunlock
  12. iput
  13. iunlockput
  14. bmap
  15. itrunc
  16. stati
  17. readi
  18. writei
  19. namecmp
  20. dirlookup
  21. dirlink
  22. skipelem
  23. namex
  24. namei
  25. nameiparent

   1 // File system implementation.  Five layers:
   2 //   + Blocks: allocator for raw disk blocks.
   3 //   + Log: crash recovery for multi-step updates.
   4 //   + Files: inode allocator, reading, writing, metadata.
   5 //   + Directories: inode with special contents (list of other inodes!)
   6 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
   7 //
   8 // This file contains the low-level file system manipulation 
   9 // routines.  The (higher-level) system call implementations
  10 // are in sysfile.c.
  11 
  12 #include "types.h"
  13 #include "defs.h"
  14 #include "param.h"
  15 #include "stat.h"
  16 #include "mmu.h"
  17 #include "proc.h"
  18 #include "spinlock.h"
  19 #include "fs.h"
  20 #include "buf.h"
  21 #include "file.h"
  22 
  23 #define min(a, b) ((a) < (b) ? (a) : (b))
  24 static void itrunc(struct inode*);
  25 struct superblock sb;   // there should be one per dev, but we run with one dev
  26 
  27 // Read the super block.
  28 void
  29 readsb(int dev, struct superblock *sb)
  30 {
  31   struct buf *bp;
  32   
  33   bp = bread(dev, 1);
  34   memmove(sb, bp->data, sizeof(*sb));
  35   brelse(bp);
  36 }
  37 
  38 // Zero a block.
  39 static void
  40 bzero(int dev, int bno)
  41 {
  42   struct buf *bp;
  43   
  44   bp = bread(dev, bno);
  45   memset(bp->data, 0, BSIZE);
  46   log_write(bp);
  47   brelse(bp);
  48 }
  49 
  50 // Blocks. 
  51 
  52 // Allocate a zeroed disk block.
  53 static uint
  54 balloc(uint dev)
  55 {
  56   int b, bi, m;
  57   struct buf *bp;
  58 
  59   bp = 0;
  60   for(b = 0; b < sb.size; b += BPB){
  61     bp = bread(dev, BBLOCK(b, sb));
  62     for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
  63       m = 1 << (bi % 8);
  64       if((bp->data[bi/8] & m) == 0){  // Is block free?
  65         bp->data[bi/8] |= m;  // Mark block in use.
  66         log_write(bp);
  67         brelse(bp);
  68         bzero(dev, b + bi);
  69         return b + bi;
  70       }
  71     }
  72     brelse(bp);
  73   }
  74   panic("balloc: out of blocks");
  75 }
  76 
  77 // Free a disk block.
  78 static void
  79 bfree(int dev, uint b)
  80 {
  81   struct buf *bp;
  82   int bi, m;
  83 
  84   readsb(dev, &sb);
  85   bp = bread(dev, BBLOCK(b, sb));
  86   bi = b % BPB;
  87   m = 1 << (bi % 8);
  88   if((bp->data[bi/8] & m) == 0)
  89     panic("freeing free block");
  90   bp->data[bi/8] &= ~m;
  91   log_write(bp);
  92   brelse(bp);
  93 }
  94 
  95 // Inodes.
  96 //
  97 // An inode describes a single unnamed file.
  98 // The inode disk structure holds metadata: the file's type,
  99 // its size, the number of links referring to it, and the
 100 // list of blocks holding the file's content.
 101 //
 102 // The inodes are laid out sequentially on disk at
 103 // sb.startinode. Each inode has a number, indicating its
 104 // position on the disk.
 105 //
 106 // The kernel keeps a cache of in-use inodes in memory
 107 // to provide a place for synchronizing access
 108 // to inodes used by multiple processes. The cached
 109 // inodes include book-keeping information that is
 110 // not stored on disk: ip->ref and ip->flags.
 111 //
 112 // An inode and its in-memory represtative go through a
 113 // sequence of states before they can be used by the
 114 // rest of the file system code.
 115 //
 116 // * Allocation: an inode is allocated if its type (on disk)
 117 //   is non-zero. ialloc() allocates, iput() frees if
 118 //   the link count has fallen to zero.
 119 //
 120 // * Referencing in cache: an entry in the inode cache
 121 //   is free if ip->ref is zero. Otherwise ip->ref tracks
 122 //   the number of in-memory pointers to the entry (open
 123 //   files and current directories). iget() to find or
 124 //   create a cache entry and increment its ref, iput()
 125 //   to decrement ref.
 126 //
 127 // * Valid: the information (type, size, &c) in an inode
 128 //   cache entry is only correct when the I_VALID bit
 129 //   is set in ip->flags. ilock() reads the inode from
 130 //   the disk and sets I_VALID, while iput() clears
 131 //   I_VALID if ip->ref has fallen to zero.
 132 //
 133 // * Locked: file system code may only examine and modify
 134 //   the information in an inode and its content if it
 135 //   has first locked the inode. The I_BUSY flag indicates
 136 //   that the inode is locked. ilock() sets I_BUSY,
 137 //   while iunlock clears it.
 138 //
 139 // Thus a typical sequence is:
 140 //   ip = iget(dev, inum)
 141 //   ilock(ip)
 142 //   ... examine and modify ip->xxx ...
 143 //   iunlock(ip)
 144 //   iput(ip)
 145 //
 146 // ilock() is separate from iget() so that system calls can
 147 // get a long-term reference to an inode (as for an open file)
 148 // and only lock it for short periods (e.g., in read()).
 149 // The separation also helps avoid deadlock and races during
 150 // pathname lookup. iget() increments ip->ref so that the inode
 151 // stays cached and pointers to it remain valid.
 152 //
 153 // Many internal file system functions expect the caller to
 154 // have locked the inodes involved; this lets callers create
 155 // multi-step atomic operations.
 156 
 157 struct {
 158   struct spinlock lock;
 159   struct inode inode[NINODE];
 160 } icache;
 161 
 162 void
 163 iinit(int dev)
 164 {
 165   initlock(&icache.lock, "icache");
 166   readsb(dev, &sb);
 167   cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d inodestart %d bmap start %d\n", sb.size,
 168           sb.nblocks, sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, sb.bmapstart);
 169 }
 170 
 171 static struct inode* iget(uint dev, uint inum);
 172 
 173 //PAGEBREAK!
 174 // Allocate a new inode with the given type on device dev.
 175 // A free inode has a type of zero.
 176 struct inode*
 177 ialloc(uint dev, short type)
 178 {
 179   int inum;
 180   struct buf *bp;
 181   struct dinode *dip;
 182 
 183   for(inum = 1; inum < sb.ninodes; inum++){
 184     bp = bread(dev, IBLOCK(inum, sb));
 185     dip = (struct dinode*)bp->data + inum%IPB;
 186     if(dip->type == 0){  // a free inode
 187       memset(dip, 0, sizeof(*dip));
 188       dip->type = type;
 189       log_write(bp);   // mark it allocated on the disk
 190       brelse(bp);
 191       return iget(dev, inum);
 192     }
 193     brelse(bp);
 194   }
 195   panic("ialloc: no inodes");
 196 }
 197 
 198 // Copy a modified in-memory inode to disk.
 199 void
 200 iupdate(struct inode *ip)
 201 {
 202   struct buf *bp;
 203   struct dinode *dip;
 204 
 205   bp = bread(ip->dev, IBLOCK(ip->inum, sb));
 206   dip = (struct dinode*)bp->data + ip->inum%IPB;
 207   dip->type = ip->type;
 208   dip->major = ip->major;
 209   dip->minor = ip->minor;
 210   dip->nlink = ip->nlink;
 211   dip->size = ip->size;
 212   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
 213   log_write(bp);
 214   brelse(bp);
 215 }
 216 
 217 // Find the inode with number inum on device dev
 218 // and return the in-memory copy. Does not lock
 219 // the inode and does not read it from disk.
 220 static struct inode*
 221 iget(uint dev, uint inum)
 222 {
 223   struct inode *ip, *empty;
 224 
 225   acquire(&icache.lock);
 226 
 227   // Is the inode already cached?
 228   empty = 0;
 229   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
 230     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
 231       ip->ref++;
 232       release(&icache.lock);
 233       return ip;
 234     }
 235     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
 236       empty = ip;
 237   }
 238 
 239   // Recycle an inode cache entry.
 240   if(empty == 0)
 241     panic("iget: no inodes");
 242 
 243   ip = empty;
 244   ip->dev = dev;
 245   ip->inum = inum;
 246   ip->ref = 1;
 247   ip->flags = 0;
 248   release(&icache.lock);
 249 
 250   return ip;
 251 }
 252 
 253 // Increment reference count for ip.
 254 // Returns ip to enable ip = idup(ip1) idiom.
 255 struct inode*
 256 idup(struct inode *ip)
 257 {
 258   acquire(&icache.lock);
 259   ip->ref++;
 260   release(&icache.lock);
 261   return ip;
 262 }
 263 
 264 // Lock the given inode.
 265 // Reads the inode from disk if necessary.
 266 void
 267 ilock(struct inode *ip)
 268 {
 269   struct buf *bp;
 270   struct dinode *dip;
 271 
 272   if(ip == 0 || ip->ref < 1)
 273     panic("ilock");
 274 
 275   acquire(&icache.lock);
 276   while(ip->flags & I_BUSY)
 277     sleep(ip, &icache.lock);
 278   ip->flags |= I_BUSY;
 279   release(&icache.lock);
 280 
 281   if(!(ip->flags & I_VALID)){
 282     bp = bread(ip->dev, IBLOCK(ip->inum, sb));
 283     dip = (struct dinode*)bp->data + ip->inum%IPB;
 284     ip->type = dip->type;
 285     ip->major = dip->major;
 286     ip->minor = dip->minor;
 287     ip->nlink = dip->nlink;
 288     ip->size = dip->size;
 289     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
 290     brelse(bp);
 291     ip->flags |= I_VALID;
 292     if(ip->type == 0)
 293       panic("ilock: no type");
 294   }
 295 }
 296 
 297 // Unlock the given inode.
 298 void
 299 iunlock(struct inode *ip)
 300 {
 301   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
 302     panic("iunlock");
 303 
 304   acquire(&icache.lock);
 305   ip->flags &= ~I_BUSY;
 306   wakeup(ip);
 307   release(&icache.lock);
 308 }
 309 
 310 // Drop a reference to an in-memory inode.
 311 // If that was the last reference, the inode cache entry can
 312 // be recycled.
 313 // If that was the last reference and the inode has no links
 314 // to it, free the inode (and its content) on disk.
 315 // All calls to iput() must be inside a transaction in
 316 // case it has to free the inode.
 317 void
 318 iput(struct inode *ip)
 319 {
 320   acquire(&icache.lock);
 321   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
 322     // inode has no links and no other references: truncate and free.
 323     if(ip->flags & I_BUSY)
 324       panic("iput busy");
 325     ip->flags |= I_BUSY;
 326     release(&icache.lock);
 327     itrunc(ip);
 328     ip->type = 0;
 329     iupdate(ip);
 330     acquire(&icache.lock);
 331     ip->flags = 0;
 332     wakeup(ip);
 333   }
 334   ip->ref--;
 335   release(&icache.lock);
 336 }
 337 
 338 // Common idiom: unlock, then put.
 339 void
 340 iunlockput(struct inode *ip)
 341 {
 342   iunlock(ip);
 343   iput(ip);
 344 }
 345 
 346 //PAGEBREAK!
 347 // Inode content
 348 //
 349 // The content (data) associated with each inode is stored
 350 // in blocks on the disk. The first NDIRECT block numbers
 351 // are listed in ip->addrs[].  The next NINDIRECT blocks are 
 352 // listed in block ip->addrs[NDIRECT].
 353 
 354 // Return the disk block address of the nth block in inode ip.
 355 // If there is no such block, bmap allocates one.
 356 static uint
 357 bmap(struct inode *ip, uint bn)
 358 {
 359   uint addr, *a;
 360   struct buf *bp;
 361 
 362   if(bn < NDIRECT){
 363     if((addr = ip->addrs[bn]) == 0)
 364       ip->addrs[bn] = addr = balloc(ip->dev);
 365     return addr;
 366   }
 367   bn -= NDIRECT;
 368 
 369   if(bn < NINDIRECT){
 370     // Load indirect block, allocating if necessary.
 371     if((addr = ip->addrs[NDIRECT]) == 0)
 372       ip->addrs[NDIRECT] = addr = balloc(ip->dev);
 373     bp = bread(ip->dev, addr);
 374     a = (uint*)bp->data;
 375     if((addr = a[bn]) == 0){
 376       a[bn] = addr = balloc(ip->dev);
 377       log_write(bp);
 378     }
 379     brelse(bp);
 380     return addr;
 381   }
 382 
 383   panic("bmap: out of range");
 384 }
 385 
 386 // Truncate inode (discard contents).
 387 // Only called when the inode has no links
 388 // to it (no directory entries referring to it)
 389 // and has no in-memory reference to it (is
 390 // not an open file or current directory).
 391 static void
 392 itrunc(struct inode *ip)
 393 {
 394   int i, j;
 395   struct buf *bp;
 396   uint *a;
 397 
 398   for(i = 0; i < NDIRECT; i++){
 399     if(ip->addrs[i]){
 400       bfree(ip->dev, ip->addrs[i]);
 401       ip->addrs[i] = 0;
 402     }
 403   }
 404   
 405   if(ip->addrs[NDIRECT]){
 406     bp = bread(ip->dev, ip->addrs[NDIRECT]);
 407     a = (uint*)bp->data;
 408     for(j = 0; j < NINDIRECT; j++){
 409       if(a[j])
 410         bfree(ip->dev, a[j]);
 411     }
 412     brelse(bp);
 413     bfree(ip->dev, ip->addrs[NDIRECT]);
 414     ip->addrs[NDIRECT] = 0;
 415   }
 416 
 417   ip->size = 0;
 418   iupdate(ip);
 419 }
 420 
 421 // Copy stat information from inode.
 422 void
 423 stati(struct inode *ip, struct stat *st)
 424 {
 425   st->dev = ip->dev;
 426   st->ino = ip->inum;
 427   st->type = ip->type;
 428   st->nlink = ip->nlink;
 429   st->size = ip->size;
 430 }
 431 
 432 //PAGEBREAK!
 433 // Read data from inode.
 434 int
 435 readi(struct inode *ip, char *dst, uint off, uint n)
 436 {
 437   uint tot, m;
 438   struct buf *bp;
 439 
 440   if(ip->type == T_DEV){
 441     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
 442       return -1;
 443     return devsw[ip->major].read(ip, dst, n);
 444   }
 445 
 446   if(off > ip->size || off + n < off)
 447     return -1;
 448   if(off + n > ip->size)
 449     n = ip->size - off;
 450 
 451   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
 452     bp = bread(ip->dev, bmap(ip, off/BSIZE));
 453     m = min(n - tot, BSIZE - off%BSIZE);
 454     memmove(dst, bp->data + off%BSIZE, m);
 455     brelse(bp);
 456   }
 457   return n;
 458 }
 459 
 460 // PAGEBREAK!
 461 // Write data to inode.
 462 int
 463 writei(struct inode *ip, char *src, uint off, uint n)
 464 {
 465   uint tot, m;
 466   struct buf *bp;
 467 
 468   if(ip->type == T_DEV){
 469     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
 470       return -1;
 471     return devsw[ip->major].write(ip, src, n);
 472   }
 473 
 474   if(off > ip->size || off + n < off)
 475     return -1;
 476   if(off + n > MAXFILE*BSIZE)
 477     return -1;
 478 
 479   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
 480     bp = bread(ip->dev, bmap(ip, off/BSIZE));
 481     m = min(n - tot, BSIZE - off%BSIZE);
 482     memmove(bp->data + off%BSIZE, src, m);
 483     log_write(bp);
 484     brelse(bp);
 485   }
 486 
 487   if(n > 0 && off > ip->size){
 488     ip->size = off;
 489     iupdate(ip);
 490   }
 491   return n;
 492 }
 493 
 494 //PAGEBREAK!
 495 // Directories
 496 
 497 int
 498 namecmp(const char *s, const char *t)
 499 {
 500   return strncmp(s, t, DIRSIZ);
 501 }
 502 
 503 // Look for a directory entry in a directory.
 504 // If found, set *poff to byte offset of entry.
 505 struct inode*
 506 dirlookup(struct inode *dp, char *name, uint *poff)
 507 {
 508   uint off, inum;
 509   struct dirent de;
 510 
 511   if(dp->type != T_DIR)
 512     panic("dirlookup not DIR");
 513 
 514   for(off = 0; off < dp->size; off += sizeof(de)){
 515     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 516       panic("dirlink read");
 517     if(de.inum == 0)
 518       continue;
 519     if(namecmp(name, de.name) == 0){
 520       // entry matches path element
 521       if(poff)
 522         *poff = off;
 523       inum = de.inum;
 524       return iget(dp->dev, inum);
 525     }
 526   }
 527 
 528   return 0;
 529 }
 530 
 531 // Write a new directory entry (name, inum) into the directory dp.
 532 int
 533 dirlink(struct inode *dp, char *name, uint inum)
 534 {
 535   int off;
 536   struct dirent de;
 537   struct inode *ip;
 538 
 539   // Check that name is not present.
 540   if((ip = dirlookup(dp, name, 0)) != 0){
 541     iput(ip);
 542     return -1;
 543   }
 544 
 545   // Look for an empty dirent.
 546   for(off = 0; off < dp->size; off += sizeof(de)){
 547     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 548       panic("dirlink read");
 549     if(de.inum == 0)
 550       break;
 551   }
 552 
 553   strncpy(de.name, name, DIRSIZ);
 554   de.inum = inum;
 555   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 556     panic("dirlink");
 557   
 558   return 0;
 559 }
 560 
 561 //PAGEBREAK!
 562 // Paths
 563 
 564 // Copy the next path element from path into name.
 565 // Return a pointer to the element following the copied one.
 566 // The returned path has no leading slashes,
 567 // so the caller can check *path=='\0' to see if the name is the last one.
 568 // If no name to remove, return 0.
 569 //
 570 // Examples:
 571 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
 572 //   skipelem("///a//bb", name) = "bb", setting name = "a"
 573 //   skipelem("a", name) = "", setting name = "a"
 574 //   skipelem("", name) = skipelem("////", name) = 0
 575 //
 576 static char*
 577 skipelem(char *path, char *name)
 578 {
 579   char *s;
 580   int len;
 581 
 582   while(*path == '/')
 583     path++;
 584   if(*path == 0)
 585     return 0;
 586   s = path;
 587   while(*path != '/' && *path != 0)
 588     path++;
 589   len = path - s;
 590   if(len >= DIRSIZ)
 591     memmove(name, s, DIRSIZ);
 592   else {
 593     memmove(name, s, len);
 594     name[len] = 0;
 595   }
 596   while(*path == '/')
 597     path++;
 598   return path;
 599 }
 600 
 601 // Look up and return the inode for a path name.
 602 // If parent != 0, return the inode for the parent and copy the final
 603 // path element into name, which must have room for DIRSIZ bytes.
 604 // Must be called inside a transaction since it calls iput().
 605 static struct inode*
 606 namex(char *path, int nameiparent, char *name)
 607 {
 608   struct inode *ip, *next;
 609 
 610   if(*path == '/')
 611     ip = iget(ROOTDEV, ROOTINO);
 612   else
 613     ip = idup(proc->cwd);
 614 
 615   while((path = skipelem(path, name)) != 0){
 616     ilock(ip);
 617     if(ip->type != T_DIR){
 618       iunlockput(ip);
 619       return 0;
 620     }
 621     if(nameiparent && *path == '\0'){
 622       // Stop one level early.
 623       iunlock(ip);
 624       return ip;
 625     }
 626     if((next = dirlookup(ip, name, 0)) == 0){
 627       iunlockput(ip);
 628       return 0;
 629     }
 630     iunlockput(ip);
 631     ip = next;
 632   }
 633   if(nameiparent){
 634     iput(ip);
 635     return 0;
 636   }
 637   return ip;
 638 }
 639 
 640 struct inode*
 641 namei(char *path)
 642 {
 643   char name[DIRSIZ];
 644   return namex(path, 0, name);
 645 }
 646 
 647 struct inode*
 648 nameiparent(char *path, char *name)
 649 {
 650   return namex(path, 1, name);
 651 }

/* [<][>][^][v][top][bottom][index][help] */