root/fs.c

/* [previous][next][first][last][top][bottom][index][help]  */

DEFINITIONS

This source file includes following definitions.
  1. readsb
  2. bzero
  3. balloc
  4. bfree
  5. iinit
  6. ialloc
  7. iupdate
  8. iget
  9. idup
  10. ilock
  11. iunlock
  12. iput
  13. iunlockput
  14. bmap
  15. itrunc
  16. stati
  17. readi
  18. writei
  19. namecmp
  20. dirlookup
  21. dirlink
  22. skipelem
  23. namex
  24. namei
  25. nameiparent

   1 // File system implementation.  Five layers:
   2 //   + Blocks: allocator for raw disk blocks.
   3 //   + Log: crash recovery for multi-step updates.
   4 //   + Files: inode allocator, reading, writing, metadata.
   5 //   + Directories: inode with special contents (list of other inodes!)
   6 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
   7 //
   8 // This file contains the low-level file system manipulation
   9 // routines.  The (higher-level) system call implementations
  10 // are in sysfile.c.
  11 
  12 #include "types.h"
  13 #include "defs.h"
  14 #include "param.h"
  15 #include "stat.h"
  16 #include "mmu.h"
  17 #include "proc.h"
  18 #include "spinlock.h"
  19 #include "sleeplock.h"
  20 #include "fs.h"
  21 #include "buf.h"
  22 #include "file.h"
  23 
  24 #define min(a, b) ((a) < (b) ? (a) : (b))
  25 static void itrunc(struct inode*);
  26 // there should be one superblock per disk device, but we run with
  27 // only one device
  28 struct superblock sb; 
  29 
  30 // Read the super block.
  31 void
  32 readsb(int dev, struct superblock *sb)
     /* [previous][next][first][last][top][bottom][index][help]  */
  33 {
  34   struct buf *bp;
  35 
  36   bp = bread(dev, 1);
  37   memmove(sb, bp->data, sizeof(*sb));
  38   brelse(bp);
  39 }
  40 
  41 // Zero a block.
  42 static void
  43 bzero(int dev, int bno)
     /* [previous][next][first][last][top][bottom][index][help]  */
  44 {
  45   struct buf *bp;
  46 
  47   bp = bread(dev, bno);
  48   memset(bp->data, 0, BSIZE);
  49   log_write(bp);
  50   brelse(bp);
  51 }
  52 
  53 // Blocks.
  54 
  55 // Allocate a zeroed disk block.
  56 static uint
  57 balloc(uint dev)
     /* [previous][next][first][last][top][bottom][index][help]  */
  58 {
  59   int b, bi, m;
  60   struct buf *bp;
  61 
  62   bp = 0;
  63   for(b = 0; b < sb.size; b += BPB){
  64     bp = bread(dev, BBLOCK(b, sb));
  65     for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
  66       m = 1 << (bi % 8);
  67       if((bp->data[bi/8] & m) == 0){  // Is block free?
  68         bp->data[bi/8] |= m;  // Mark block in use.
  69         log_write(bp);
  70         brelse(bp);
  71         bzero(dev, b + bi);
  72         return b + bi;
  73       }
  74     }
  75     brelse(bp);
  76   }
  77   panic("balloc: out of blocks");
  78 }
  79 
  80 // Free a disk block.
  81 static void
  82 bfree(int dev, uint b)
     /* [previous][next][first][last][top][bottom][index][help]  */
  83 {
  84   struct buf *bp;
  85   int bi, m;
  86 
  87   bp = bread(dev, BBLOCK(b, sb));
  88   bi = b % BPB;
  89   m = 1 << (bi % 8);
  90   if((bp->data[bi/8] & m) == 0)
  91     panic("freeing free block");
  92   bp->data[bi/8] &= ~m;
  93   log_write(bp);
  94   brelse(bp);
  95 }
  96 
  97 // Inodes.
  98 //
  99 // An inode describes a single unnamed file.
 100 // The inode disk structure holds metadata: the file's type,
 101 // its size, the number of links referring to it, and the
 102 // list of blocks holding the file's content.
 103 //
 104 // The inodes are laid out sequentially on disk at
 105 // sb.startinode. Each inode has a number, indicating its
 106 // position on the disk.
 107 //
 108 // The kernel keeps a cache of in-use inodes in memory
 109 // to provide a place for synchronizing access
 110 // to inodes used by multiple processes. The cached
 111 // inodes include book-keeping information that is
 112 // not stored on disk: ip->ref and ip->valid.
 113 //
 114 // An inode and its in-memory representation go through a
 115 // sequence of states before they can be used by the
 116 // rest of the file system code.
 117 //
 118 // * Allocation: an inode is allocated if its type (on disk)
 119 //   is non-zero. ialloc() allocates, and iput() frees if
 120 //   the reference and link counts have fallen to zero.
 121 //
 122 // * Referencing in cache: an entry in the inode cache
 123 //   is free if ip->ref is zero. Otherwise ip->ref tracks
 124 //   the number of in-memory pointers to the entry (open
 125 //   files and current directories). iget() finds or
 126 //   creates a cache entry and increments its ref; iput()
 127 //   decrements ref.
 128 //
 129 // * Valid: the information (type, size, &c) in an inode
 130 //   cache entry is only correct when ip->valid is 1.
 131 //   ilock() reads the inode from
 132 //   the disk and sets ip->valid, while iput() clears
 133 //   ip->valid if ip->ref has fallen to zero.
 134 //
 135 // * Locked: file system code may only examine and modify
 136 //   the information in an inode and its content if it
 137 //   has first locked the inode.
 138 //
 139 // Thus a typical sequence is:
 140 //   ip = iget(dev, inum)
 141 //   ilock(ip)
 142 //   ... examine and modify ip->xxx ...
 143 //   iunlock(ip)
 144 //   iput(ip)
 145 //
 146 // ilock() is separate from iget() so that system calls can
 147 // get a long-term reference to an inode (as for an open file)
 148 // and only lock it for short periods (e.g., in read()).
 149 // The separation also helps avoid deadlock and races during
 150 // pathname lookup. iget() increments ip->ref so that the inode
 151 // stays cached and pointers to it remain valid.
 152 //
 153 // Many internal file system functions expect the caller to
 154 // have locked the inodes involved; this lets callers create
 155 // multi-step atomic operations.
 156 //
 157 // The icache.lock spin-lock protects the allocation of icache
 158 // entries. Since ip->ref indicates whether an entry is free,
 159 // and ip->dev and ip->inum indicate which i-node an entry
 160 // holds, one must hold icache.lock while using any of those fields.
 161 //
 162 // An ip->lock sleep-lock protects all ip-> fields other than ref,
 163 // dev, and inum.  One must hold ip->lock in order to
 164 // read or write that inode's ip->valid, ip->size, ip->type, &c.
 165 
 166 struct {
 167   struct spinlock lock;
 168   struct inode inode[NINODE];
 169 } icache;
 170 
 171 void
 172 iinit(int dev)
     /* [previous][next][first][last][top][bottom][index][help]  */
 173 {
 174   int i = 0;
 175   
 176   initlock(&icache.lock, "icache");
 177   for(i = 0; i < NINODE; i++) {
 178     initsleeplock(&icache.inode[i].lock, "inode");
 179   }
 180 
 181   readsb(dev, &sb);
 182   cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\
 183  inodestart %d bmap start %d\n", sb.size, sb.nblocks,
 184           sb.ninodes, sb.nlog, sb.logstart, sb.inodestart,
 185           sb.bmapstart);
 186 }
 187 
 188 static struct inode* iget(uint dev, uint inum);
 189 
 190 //PAGEBREAK!
 191 // Allocate an inode on device dev.
 192 // Mark it as allocated by  giving it type type.
 193 // Returns an unlocked but allocated and referenced inode.
 194 struct inode*
 195 ialloc(uint dev, short type)
     /* [previous][next][first][last][top][bottom][index][help]  */
 196 {
 197   int inum;
 198   struct buf *bp;
 199   struct dinode *dip;
 200 
 201   for(inum = 1; inum < sb.ninodes; inum++){
 202     bp = bread(dev, IBLOCK(inum, sb));
 203     dip = (struct dinode*)bp->data + inum%IPB;
 204     if(dip->type == 0){  // a free inode
 205       memset(dip, 0, sizeof(*dip));
 206       dip->type = type;
 207       log_write(bp);   // mark it allocated on the disk
 208       brelse(bp);
 209       return iget(dev, inum);
 210     }
 211     brelse(bp);
 212   }
 213   panic("ialloc: no inodes");
 214 }
 215 
 216 // Copy a modified in-memory inode to disk.
 217 // Must be called after every change to an ip->xxx field
 218 // that lives on disk, since i-node cache is write-through.
 219 // Caller must hold ip->lock.
 220 void
 221 iupdate(struct inode *ip)
     /* [previous][next][first][last][top][bottom][index][help]  */
 222 {
 223   struct buf *bp;
 224   struct dinode *dip;
 225 
 226   bp = bread(ip->dev, IBLOCK(ip->inum, sb));
 227   dip = (struct dinode*)bp->data + ip->inum%IPB;
 228   dip->type = ip->type;
 229   dip->major = ip->major;
 230   dip->minor = ip->minor;
 231   dip->nlink = ip->nlink;
 232   dip->size = ip->size;
 233   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
 234   log_write(bp);
 235   brelse(bp);
 236 }
 237 
 238 // Find the inode with number inum on device dev
 239 // and return the in-memory copy. Does not lock
 240 // the inode and does not read it from disk.
 241 static struct inode*
 242 iget(uint dev, uint inum)
     /* [previous][next][first][last][top][bottom][index][help]  */
 243 {
 244   struct inode *ip, *empty;
 245 
 246   acquire(&icache.lock);
 247 
 248   // Is the inode already cached?
 249   empty = 0;
 250   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
 251     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
 252       ip->ref++;
 253       release(&icache.lock);
 254       return ip;
 255     }
 256     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
 257       empty = ip;
 258   }
 259 
 260   // Recycle an inode cache entry.
 261   if(empty == 0)
 262     panic("iget: no inodes");
 263 
 264   ip = empty;
 265   ip->dev = dev;
 266   ip->inum = inum;
 267   ip->ref = 1;
 268   ip->valid = 0;
 269   release(&icache.lock);
 270 
 271   return ip;
 272 }
 273 
 274 // Increment reference count for ip.
 275 // Returns ip to enable ip = idup(ip1) idiom.
 276 struct inode*
 277 idup(struct inode *ip)
     /* [previous][next][first][last][top][bottom][index][help]  */
 278 {
 279   acquire(&icache.lock);
 280   ip->ref++;
 281   release(&icache.lock);
 282   return ip;
 283 }
 284 
 285 // Lock the given inode.
 286 // Reads the inode from disk if necessary.
 287 void
 288 ilock(struct inode *ip)
     /* [previous][next][first][last][top][bottom][index][help]  */
 289 {
 290   struct buf *bp;
 291   struct dinode *dip;
 292 
 293   if(ip == 0 || ip->ref < 1)
 294     panic("ilock");
 295 
 296   acquiresleep(&ip->lock);
 297 
 298   if(ip->valid == 0){
 299     bp = bread(ip->dev, IBLOCK(ip->inum, sb));
 300     dip = (struct dinode*)bp->data + ip->inum%IPB;
 301     ip->type = dip->type;
 302     ip->major = dip->major;
 303     ip->minor = dip->minor;
 304     ip->nlink = dip->nlink;
 305     ip->size = dip->size;
 306     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
 307     brelse(bp);
 308     ip->valid = 1;
 309     if(ip->type == 0)
 310       panic("ilock: no type");
 311   }
 312 }
 313 
 314 // Unlock the given inode.
 315 void
 316 iunlock(struct inode *ip)
     /* [previous][next][first][last][top][bottom][index][help]  */
 317 {
 318   if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1)
 319     panic("iunlock");
 320 
 321   releasesleep(&ip->lock);
 322 }
 323 
 324 // Drop a reference to an in-memory inode.
 325 // If that was the last reference, the inode cache entry can
 326 // be recycled.
 327 // If that was the last reference and the inode has no links
 328 // to it, free the inode (and its content) on disk.
 329 // All calls to iput() must be inside a transaction in
 330 // case it has to free the inode.
 331 void
 332 iput(struct inode *ip)
     /* [previous][next][first][last][top][bottom][index][help]  */
 333 {
 334   acquiresleep(&ip->lock);
 335   if(ip->valid && ip->nlink == 0){
 336     acquire(&icache.lock);
 337     int r = ip->ref;
 338     release(&icache.lock);
 339     if(r == 1){
 340       // inode has no links and no other references: truncate and free.
 341       itrunc(ip);
 342       ip->type = 0;
 343       iupdate(ip);
 344       ip->valid = 0;
 345     }
 346   }
 347   releasesleep(&ip->lock);
 348 
 349   acquire(&icache.lock);
 350   ip->ref--;
 351   release(&icache.lock);
 352 }
 353 
 354 // Common idiom: unlock, then put.
 355 void
 356 iunlockput(struct inode *ip)
     /* [previous][next][first][last][top][bottom][index][help]  */
 357 {
 358   iunlock(ip);
 359   iput(ip);
 360 }
 361 
 362 //PAGEBREAK!
 363 // Inode content
 364 //
 365 // The content (data) associated with each inode is stored
 366 // in blocks on the disk. The first NDIRECT block numbers
 367 // are listed in ip->addrs[].  The next NINDIRECT blocks are
 368 // listed in block ip->addrs[NDIRECT].
 369 
 370 // Return the disk block address of the nth block in inode ip.
 371 // If there is no such block, bmap allocates one.
 372 static uint
 373 bmap(struct inode *ip, uint bn)
     /* [previous][next][first][last][top][bottom][index][help]  */
 374 {
 375   uint addr, *a;
 376   struct buf *bp;
 377 
 378   if(bn < NDIRECT){
 379     if((addr = ip->addrs[bn]) == 0)
 380       ip->addrs[bn] = addr = balloc(ip->dev);
 381     return addr;
 382   }
 383   bn -= NDIRECT;
 384 
 385   if(bn < NINDIRECT){
 386     // Load indirect block, allocating if necessary.
 387     if((addr = ip->addrs[NDIRECT]) == 0)
 388       ip->addrs[NDIRECT] = addr = balloc(ip->dev);
 389     bp = bread(ip->dev, addr);
 390     a = (uint*)bp->data;
 391     if((addr = a[bn]) == 0){
 392       a[bn] = addr = balloc(ip->dev);
 393       log_write(bp);
 394     }
 395     brelse(bp);
 396     return addr;
 397   }
 398 
 399   panic("bmap: out of range");
 400 }
 401 
 402 // Truncate inode (discard contents).
 403 // Only called when the inode has no links
 404 // to it (no directory entries referring to it)
 405 // and has no in-memory reference to it (is
 406 // not an open file or current directory).
 407 static void
 408 itrunc(struct inode *ip)
     /* [previous][next][first][last][top][bottom][index][help]  */
 409 {
 410   int i, j;
 411   struct buf *bp;
 412   uint *a;
 413 
 414   for(i = 0; i < NDIRECT; i++){
 415     if(ip->addrs[i]){
 416       bfree(ip->dev, ip->addrs[i]);
 417       ip->addrs[i] = 0;
 418     }
 419   }
 420 
 421   if(ip->addrs[NDIRECT]){
 422     bp = bread(ip->dev, ip->addrs[NDIRECT]);
 423     a = (uint*)bp->data;
 424     for(j = 0; j < NINDIRECT; j++){
 425       if(a[j])
 426         bfree(ip->dev, a[j]);
 427     }
 428     brelse(bp);
 429     bfree(ip->dev, ip->addrs[NDIRECT]);
 430     ip->addrs[NDIRECT] = 0;
 431   }
 432 
 433   ip->size = 0;
 434   iupdate(ip);
 435 }
 436 
 437 // Copy stat information from inode.
 438 // Caller must hold ip->lock.
 439 void
 440 stati(struct inode *ip, struct stat *st)
     /* [previous][next][first][last][top][bottom][index][help]  */
 441 {
 442   st->dev = ip->dev;
 443   st->ino = ip->inum;
 444   st->type = ip->type;
 445   st->nlink = ip->nlink;
 446   st->size = ip->size;
 447 }
 448 
 449 //PAGEBREAK!
 450 // Read data from inode.
 451 // Caller must hold ip->lock.
 452 int
 453 readi(struct inode *ip, char *dst, uint off, uint n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 454 {
 455   uint tot, m;
 456   struct buf *bp;
 457 
 458   if(ip->type == T_DEV){
 459     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
 460       return -1;
 461     return devsw[ip->major].read(ip, dst, n);
 462   }
 463 
 464   if(off > ip->size || off + n < off)
 465     return -1;
 466   if(off + n > ip->size)
 467     n = ip->size - off;
 468 
 469   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
 470     bp = bread(ip->dev, bmap(ip, off/BSIZE));
 471     m = min(n - tot, BSIZE - off%BSIZE);
 472     memmove(dst, bp->data + off%BSIZE, m);
 473     brelse(bp);
 474   }
 475   return n;
 476 }
 477 
 478 // PAGEBREAK!
 479 // Write data to inode.
 480 // Caller must hold ip->lock.
 481 int
 482 writei(struct inode *ip, char *src, uint off, uint n)
     /* [previous][next][first][last][top][bottom][index][help]  */
 483 {
 484   uint tot, m;
 485   struct buf *bp;
 486 
 487   if(ip->type == T_DEV){
 488     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
 489       return -1;
 490     return devsw[ip->major].write(ip, src, n);
 491   }
 492 
 493   if(off > ip->size || off + n < off)
 494     return -1;
 495   if(off + n > MAXFILE*BSIZE)
 496     return -1;
 497 
 498   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
 499     bp = bread(ip->dev, bmap(ip, off/BSIZE));
 500     m = min(n - tot, BSIZE - off%BSIZE);
 501     memmove(bp->data + off%BSIZE, src, m);
 502     log_write(bp);
 503     brelse(bp);
 504   }
 505 
 506   if(n > 0 && off > ip->size){
 507     ip->size = off;
 508     iupdate(ip);
 509   }
 510   return n;
 511 }
 512 
 513 //PAGEBREAK!
 514 // Directories
 515 
 516 int
 517 namecmp(const char *s, const char *t)
     /* [previous][next][first][last][top][bottom][index][help]  */
 518 {
 519   return strncmp(s, t, DIRSIZ);
 520 }
 521 
 522 // Look for a directory entry in a directory.
 523 // If found, set *poff to byte offset of entry.
 524 struct inode*
 525 dirlookup(struct inode *dp, char *name, uint *poff)
     /* [previous][next][first][last][top][bottom][index][help]  */
 526 {
 527   uint off, inum;
 528   struct dirent de;
 529 
 530   if(dp->type != T_DIR)
 531     panic("dirlookup not DIR");
 532 
 533   for(off = 0; off < dp->size; off += sizeof(de)){
 534     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 535       panic("dirlookup read");
 536     if(de.inum == 0)
 537       continue;
 538     if(namecmp(name, de.name) == 0){
 539       // entry matches path element
 540       if(poff)
 541         *poff = off;
 542       inum = de.inum;
 543       return iget(dp->dev, inum);
 544     }
 545   }
 546 
 547   return 0;
 548 }
 549 
 550 // Write a new directory entry (name, inum) into the directory dp.
 551 int
 552 dirlink(struct inode *dp, char *name, uint inum)
     /* [previous][next][first][last][top][bottom][index][help]  */
 553 {
 554   int off;
 555   struct dirent de;
 556   struct inode *ip;
 557 
 558   // Check that name is not present.
 559   if((ip = dirlookup(dp, name, 0)) != 0){
 560     iput(ip);
 561     return -1;
 562   }
 563 
 564   // Look for an empty dirent.
 565   for(off = 0; off < dp->size; off += sizeof(de)){
 566     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 567       panic("dirlink read");
 568     if(de.inum == 0)
 569       break;
 570   }
 571 
 572   strncpy(de.name, name, DIRSIZ);
 573   de.inum = inum;
 574   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
 575     panic("dirlink");
 576 
 577   return 0;
 578 }
 579 
 580 //PAGEBREAK!
 581 // Paths
 582 
 583 // Copy the next path element from path into name.
 584 // Return a pointer to the element following the copied one.
 585 // The returned path has no leading slashes,
 586 // so the caller can check *path=='\0' to see if the name is the last one.
 587 // If no name to remove, return 0.
 588 //
 589 // Examples:
 590 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
 591 //   skipelem("///a//bb", name) = "bb", setting name = "a"
 592 //   skipelem("a", name) = "", setting name = "a"
 593 //   skipelem("", name) = skipelem("////", name) = 0
 594 //
 595 static char*
 596 skipelem(char *path, char *name)
     /* [previous][next][first][last][top][bottom][index][help]  */
 597 {
 598   char *s;
 599   int len;
 600 
 601   while(*path == '/')
 602     path++;
 603   if(*path == 0)
 604     return 0;
 605   s = path;
 606   while(*path != '/' && *path != 0)
 607     path++;
 608   len = path - s;
 609   if(len >= DIRSIZ)
 610     memmove(name, s, DIRSIZ);
 611   else {
 612     memmove(name, s, len);
 613     name[len] = 0;
 614   }
 615   while(*path == '/')
 616     path++;
 617   return path;
 618 }
 619 
 620 // Look up and return the inode for a path name.
 621 // If parent != 0, return the inode for the parent and copy the final
 622 // path element into name, which must have room for DIRSIZ bytes.
 623 // Must be called inside a transaction since it calls iput().
 624 static struct inode*
 625 namex(char *path, int nameiparent, char *name)
     /* [previous][next][first][last][top][bottom][index][help]  */
 626 {
 627   struct inode *ip, *next;
 628 
 629   if(*path == '/')
 630     ip = iget(ROOTDEV, ROOTINO);
 631   else
 632     ip = idup(myproc()->cwd);
 633 
 634   while((path = skipelem(path, name)) != 0){
 635     ilock(ip);
 636     if(ip->type != T_DIR){
 637       iunlockput(ip);
 638       return 0;
 639     }
 640     if(nameiparent && *path == '\0'){
 641       // Stop one level early.
 642       iunlock(ip);
 643       return ip;
 644     }
 645     if((next = dirlookup(ip, name, 0)) == 0){
 646       iunlockput(ip);
 647       return 0;
 648     }
 649     iunlockput(ip);
 650     ip = next;
 651   }
 652   if(nameiparent){
 653     iput(ip);
 654     return 0;
 655   }
 656   return ip;
 657 }
 658 
 659 struct inode*
 660 namei(char *path)
     /* [previous][next][first][last][top][bottom][index][help]  */
 661 {
 662   char name[DIRSIZ];
 663   return namex(path, 0, name);
 664 }
 665 
 666 struct inode*
 667 nameiparent(char *path, char *name)
     /* [previous][next][first][last][top][bottom][index][help]  */
 668 {
 669   return namex(path, 1, name);
 670 }

/* [previous][next][first][last][top][bottom][index][help]  */