Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Apr 2018 21:19:24 +0000 (14:19 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 4 Apr 2018 21:19:24 +0000 (14:19 -0700)
Pull ext4 updates from Ted Ts'o:
 "Cleanups and bugfixes for ext4, including some fixes to make ext4 more
  robust against maliciously crafted file system images.

  (I still don't recommend that container folks hold any delusions that
  mounting arbitary images that can be crafted by malicious attackers
  should be considered sane thing to do, though!)"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (29 commits)
  ext4: force revalidation of directory pointer after seekdir(2)
  ext4: add extra checks to ext4_xattr_block_get()
  ext4: add bounds checking to ext4_xattr_find_entry()
  ext4: move call to ext4_error() into ext4_xattr_check_block()
  ext4: don't show data=<mode> option if defaulted
  ext4: omit init_itable=n in procfs when disabled
  ext4: show more binary mount options in procfs
  ext4: simplify kobject usage
  ext4: remove unused parameters in sysfs code
  ext4: null out kobject* during sysfs cleanup
  ext4: don't allow r/w mounts if metadata blocks overlap the superblock
  ext4: always initialize the crc32c checksum driver
  ext4: fail ext4_iget for root directory if unallocated
  ext4: limit xattr size to INT_MAX
  ext4: add validity checks for bitmap block numbers
  ext4: fix comments in ext4_swap_extents()
  ext4: use generic_writepages instead of __writepage/write_cache_pages
  ext4: don't complain about incorrect features when probing
  ext4: remove EXT4_STATE_DIOREAD_LOCK flag
  ext4: fix offset overflow on 32-bit archs in ext4_iomap_begin()
  ...

16 files changed:
fs/ext4/balloc.c
fs/ext4/dir.c
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.c
fs/ext4/extents.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/move_extent.c
fs/ext4/super.c
fs/ext4/sysfs.c
fs/ext4/xattr.c
fs/ext4/xattr.h
fs/jbd2/journal.c
fs/jbd2/recovery.c
include/trace/events/ext4.h

index f9b3e0a83526a333241e8b5410103b999aae1e23..a33d8fb1bf2a7fc3ac13459e526fb9ec88373db3 100644 (file)
@@ -243,8 +243,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
         */
        ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
                             sb->s_blocksize * 8, bh->b_data);
-       ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
-       ext4_group_desc_csum_set(sb, block_group, gdp);
        return 0;
 }
 
@@ -340,20 +338,25 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
        /* check whether block bitmap block number is set */
        blk = ext4_block_bitmap(sb, desc);
        offset = blk - group_first_block;
-       if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
+       if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+           !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
                /* bad block bitmap */
                return blk;
 
        /* check whether the inode bitmap block number is set */
        blk = ext4_inode_bitmap(sb, desc);
        offset = blk - group_first_block;
-       if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
+       if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+           !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
                /* bad block bitmap */
                return blk;
 
        /* check whether the inode table block number is set */
        blk = ext4_inode_table(sb, desc);
        offset = blk - group_first_block;
+       if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+           EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= sb->s_blocksize)
+               return blk;
        next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
                        EXT4_B2C(sbi, offset + sbi->s_itb_per_group),
                        EXT4_B2C(sbi, offset));
@@ -419,6 +422,7 @@ struct buffer_head *
 ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 {
        struct ext4_group_desc *desc;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct buffer_head *bh;
        ext4_fsblk_t bitmap_blk;
        int err;
@@ -427,6 +431,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
        if (!desc)
                return ERR_PTR(-EFSCORRUPTED);
        bitmap_blk = ext4_block_bitmap(sb, desc);
+       if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+           (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
+               ext4_error(sb, "Invalid block bitmap block %llu in "
+                          "block_group %u", bitmap_blk, block_group);
+               return ERR_PTR(-EFSCORRUPTED);
+       }
        bh = sb_getblk(sb, bitmap_blk);
        if (unlikely(!bh)) {
                ext4_error(sb, "Cannot get buffer for block bitmap - "
@@ -448,6 +458,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
                err = ext4_init_block_bitmap(sb, bh, block_group, desc);
                set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
+               set_buffer_verified(bh);
                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
                if (err) {
index da87cf757f7defc1050299f67257517c7b9e22a5..e2902d394f1badb78bd4c96cbb45d3814fb74265 100644 (file)
@@ -365,13 +365,15 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
 {
        struct inode *inode = file->f_mapping->host;
        int dx_dir = is_dx_dir(inode);
-       loff_t htree_max = ext4_get_htree_eof(file);
+       loff_t ret, htree_max = ext4_get_htree_eof(file);
 
        if (likely(dx_dir))
-               return generic_file_llseek_size(file, offset, whence,
+               ret = generic_file_llseek_size(file, offset, whence,
                                                    htree_max, htree_max);
        else
-               return ext4_llseek(file, offset, whence);
+               ret = ext4_llseek(file, offset, whence);
+       file->f_version = inode_peek_iversion(inode) - 1;
+       return ret;
 }
 
 /*
index 3241475a1733492e8c88a58dfd9668ad330bbcb9..a42e71203e53c0893c0d710e242b6b810b03381a 100644 (file)
@@ -1522,8 +1522,6 @@ enum {
        EXT4_STATE_EXT_MIGRATE,         /* Inode is migrating */
        EXT4_STATE_DIO_UNWRITTEN,       /* need convert on dio done*/
        EXT4_STATE_NEWENTRY,            /* File just added to dir */
-       EXT4_STATE_DIOREAD_LOCK,        /* Disable support for dio read
-                                          nolocking */
        EXT4_STATE_MAY_INLINE_DATA,     /* may have in-inode data */
        EXT4_STATE_EXT_PRECACHED,       /* extents have been precached */
        EXT4_STATE_LUSTRE_EA_INODE,     /* Lustre-style ea_inode */
@@ -3181,21 +3179,6 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
        set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
 }
 
-/*
- * Disable DIO read nolock optimization, so new dioreaders will be forced
- * to grab i_mutex
- */
-static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
-{
-       ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
-       smp_mb();
-}
-static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
-{
-       smp_mb();
-       ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
-}
-
 #define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
 
 /* For ioend & aio unwritten conversion wait queues */
index 2d593201cf7a0b012d7a537ac74186cc3c7b0eff..7c70b08d104c002e129415d197728d2e6f014e35 100644 (file)
@@ -166,13 +166,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
        might_sleep();
 
        if (ext4_handle_valid(handle)) {
-               struct super_block *sb;
-
-               sb = handle->h_transaction->t_journal->j_private;
-               if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) {
-                       jbd2_journal_abort_handle(handle);
-                       return -EIO;
-               }
                err = jbd2_journal_get_write_access(handle, bh);
                if (err)
                        ext4_journal_abort_handle(where, line, __func__, bh,
index 054416e9d827129d335561999c6c1988b1993df2..0a7315961bac6ebbca0c2bfe83cc3aba8fc5c807 100644 (file)
@@ -4796,7 +4796,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
        /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
        /* Preallocate the range including the unaligned edges */
@@ -4807,7 +4806,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                                 round_down(offset, 1 << blkbits)) >> blkbits,
                                new_size, flags);
                if (ret)
-                       goto out_dio;
+                       goto out_mutex;
 
        }
 
@@ -4824,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                ret = ext4_update_disksize_before_punch(inode, offset, len);
                if (ret) {
                        up_write(&EXT4_I(inode)->i_mmap_sem);
-                       goto out_dio;
+                       goto out_mutex;
                }
                /* Now release the pages and zero block aligned part of pages */
                truncate_pagecache_range(inode, start, end - 1);
@@ -4834,10 +4833,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                                             flags);
                up_write(&EXT4_I(inode)->i_mmap_sem);
                if (ret)
-                       goto out_dio;
+                       goto out_mutex;
        }
        if (!partial_begin && !partial_end)
-               goto out_dio;
+               goto out_mutex;
 
        /*
         * In worst case we have to writeout two nonadjacent unwritten
@@ -4850,7 +4849,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        if (IS_ERR(handle)) {
                ret = PTR_ERR(handle);
                ext4_std_error(inode->i_sb, ret);
-               goto out_dio;
+               goto out_mutex;
        }
 
        inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -4875,8 +4874,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                ext4_handle_sync(handle);
 
        ext4_journal_stop(handle);
-out_dio:
-       ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        inode_unlock(inode);
        return ret;
@@ -4964,11 +4961,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        }
 
        /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
        ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
-       ext4_inode_resume_unlocked_dio(inode);
        if (ret)
                goto out;
 
@@ -5485,7 +5480,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        }
 
        /* Wait for existing dio to complete */
-       ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
        /*
@@ -5562,7 +5556,6 @@ out_stop:
        ext4_journal_stop(handle);
 out_mmap:
        up_write(&EXT4_I(inode)->i_mmap_sem);
-       ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        inode_unlock(inode);
        return ret;
@@ -5635,7 +5628,6 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
        }
 
        /* Wait for existing dio to complete */
-       ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
        /*
@@ -5737,7 +5729,6 @@ out_stop:
        ext4_journal_stop(handle);
 out_mmap:
        up_write(&EXT4_I(inode)->i_mmap_sem);
-       ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        inode_unlock(inode);
        return ret;
@@ -5751,7 +5742,7 @@ out_mutex:
  * @lblk1:     Start block for first inode
  * @lblk2:     Start block for second inode
  * @count:     Number of blocks to swap
- * @mark_unwritten: Mark second inode's extents as unwritten after swap
+ * @unwritten: Mark second inode's extents as unwritten after swap
  * @erp:       Pointer to save error value
  *
  * This helper routine does exactly what is promise "swap extents". All other
@@ -5765,7 +5756,7 @@ out_mutex:
  */
 int
 ext4_swap_extents(handle_t *handle, struct inode *inode1,
-                    struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
+                 struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
                  ext4_lblk_t count, int unwritten, int *erp)
 {
        struct ext4_ext_path *path1 = NULL;
index 7830d28df331d8eb69374851ba59163c36f0eaa7..df92e3ec9913d8ab19f9e763e9bfbcf88d824f27 100644 (file)
@@ -66,44 +66,6 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
                memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
 }
 
-/* Initializes an uninitialized inode bitmap */
-static int ext4_init_inode_bitmap(struct super_block *sb,
-                                      struct buffer_head *bh,
-                                      ext4_group_t block_group,
-                                      struct ext4_group_desc *gdp)
-{
-       struct ext4_group_info *grp;
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       J_ASSERT_BH(bh, buffer_locked(bh));
-
-       /* If checksum is bad mark all blocks and inodes use to prevent
-        * allocation, essentially implementing a per-group read-only flag. */
-       if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-               grp = ext4_get_group_info(sb, block_group);
-               if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-                       percpu_counter_sub(&sbi->s_freeclusters_counter,
-                                          grp->bb_free);
-               set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-               if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-                       int count;
-                       count = ext4_free_inodes_count(sb, gdp);
-                       percpu_counter_sub(&sbi->s_freeinodes_counter,
-                                          count);
-               }
-               set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-               return -EFSBADCRC;
-       }
-
-       memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
-       ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
-                       bh->b_data);
-       ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
-                                  EXT4_INODES_PER_GROUP(sb) / 8);
-       ext4_group_desc_csum_set(sb, block_group, gdp);
-
-       return 0;
-}
-
 void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
 {
        if (uptodate) {
@@ -160,6 +122,7 @@ static struct buffer_head *
 ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 {
        struct ext4_group_desc *desc;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct buffer_head *bh = NULL;
        ext4_fsblk_t bitmap_blk;
        int err;
@@ -169,6 +132,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                return ERR_PTR(-EFSCORRUPTED);
 
        bitmap_blk = ext4_inode_bitmap(sb, desc);
+       if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+           (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
+               ext4_error(sb, "Invalid inode bitmap blk %llu in "
+                          "block_group %u", bitmap_blk, block_group);
+               return ERR_PTR(-EFSCORRUPTED);
+       }
        bh = sb_getblk(sb, bitmap_blk);
        if (unlikely(!bh)) {
                ext4_error(sb, "Cannot read inode bitmap - "
@@ -187,17 +156,14 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 
        ext4_lock_group(sb, block_group);
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-               err = ext4_init_inode_bitmap(sb, bh, block_group, desc);
+               memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+               ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
+                                    sb->s_blocksize * 8, bh->b_data);
                set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
                set_buffer_verified(bh);
                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
-               if (err) {
-                       ext4_error(sb, "Failed to init inode bitmap for group "
-                                  "%u: %d", block_group, err);
-                       goto out;
-               }
                return bh;
        }
        ext4_unlock_group(sb, block_group);
index c94780075b04f752b1cfdec9ccf6f631bd874116..18aa2ef963ad94de80b4df92f219abc630ff4eee 100644 (file)
@@ -2694,15 +2694,6 @@ out:
        return err;
 }
 
-static int __writepage(struct page *page, struct writeback_control *wbc,
-                      void *data)
-{
-       struct address_space *mapping = data;
-       int ret = ext4_writepage(page, wbc);
-       mapping_set_error(mapping, ret);
-       return ret;
-}
-
 static int ext4_writepages(struct address_space *mapping,
                           struct writeback_control *wbc)
 {
@@ -2740,11 +2731,7 @@ static int ext4_writepages(struct address_space *mapping,
                goto out_writepages;
 
        if (ext4_should_journal_data(inode)) {
-               struct blk_plug plug;
-
-               blk_start_plug(&plug);
-               ret = write_cache_pages(mapping, wbc, __writepage, mapping);
-               blk_finish_plug(&plug);
+               ret = generic_writepages(mapping, wbc);
                goto out_writepages;
        }
 
@@ -3524,7 +3511,7 @@ retry:
                iomap->flags |= IOMAP_F_DIRTY;
        iomap->bdev = inode->i_sb->s_bdev;
        iomap->dax_dev = sbi->s_daxdev;
-       iomap->offset = first_block << blkbits;
+       iomap->offset = (u64)first_block << blkbits;
        iomap->length = (u64)map.m_len << blkbits;
 
        if (ret == 0) {
@@ -3669,7 +3656,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
        int orphan = 0;
        handle_t *handle;
 
-       if (final_size > inode->i_size) {
+       if (final_size > inode->i_size || final_size > ei->i_disksize) {
                /* Credits for sb + inode write */
                handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
                if (IS_ERR(handle)) {
@@ -3682,7 +3669,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
                        goto out;
                }
                orphan = 1;
-               ei->i_disksize = inode->i_size;
+               ext4_update_i_disksize(inode, inode->i_size);
                ext4_journal_stop(handle);
        }
 
@@ -3789,9 +3776,10 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
                        ext4_orphan_del(handle, inode);
                if (ret > 0) {
                        loff_t end = offset + ret;
-                       if (end > inode->i_size) {
-                               ei->i_disksize = end;
-                               i_size_write(inode, end);
+                       if (end > inode->i_size || end > ei->i_disksize) {
+                               ext4_update_i_disksize(inode, end);
+                               if (end > inode->i_size)
+                                       i_size_write(inode, end);
                                /*
                                 * We're going to return a positive `ret'
                                 * here due to non-zero-length I/O, so there's
@@ -4251,7 +4239,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        }
 
        /* Wait all existing dio workers, newcomers will block on i_mutex */
-       ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
        /*
@@ -4324,7 +4311,6 @@ out_stop:
        ext4_journal_stop(handle);
 out_dio:
        up_write(&EXT4_I(inode)->i_mmap_sem);
-       ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
        inode_unlock(inode);
        return ret;
@@ -4746,6 +4732,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                goto bad_inode;
        raw_inode = ext4_raw_inode(&iloc);
 
+       if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
+               EXT4_ERROR_INODE(inode, "root inode unallocated");
+               ret = -EFSCORRUPTED;
+               goto bad_inode;
+       }
+
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
                ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
                if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
@@ -5506,9 +5498,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                 */
                if (orphan) {
                        if (!ext4_should_journal_data(inode)) {
-                               ext4_inode_block_unlocked_dio(inode);
                                inode_dio_wait(inode);
-                               ext4_inode_resume_unlocked_dio(inode);
                        } else
                                ext4_wait_for_tail_page_commit(inode);
                }
@@ -5999,7 +5989,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                return -EROFS;
 
        /* Wait for all existing dio workers */
-       ext4_inode_block_unlocked_dio(inode);
        inode_dio_wait(inode);
 
        /*
@@ -6015,7 +6004,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                err = filemap_write_and_wait(inode->i_mapping);
                if (err < 0) {
                        up_write(&EXT4_I(inode)->i_mmap_sem);
-                       ext4_inode_resume_unlocked_dio(inode);
                        return err;
                }
        }
@@ -6038,7 +6026,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                if (err < 0) {
                        jbd2_journal_unlock_updates(journal);
                        percpu_up_write(&sbi->s_journal_flag_rwsem);
-                       ext4_inode_resume_unlocked_dio(inode);
                        return err;
                }
                ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
@@ -6050,7 +6037,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 
        if (val)
                up_write(&EXT4_I(inode)->i_mmap_sem);
-       ext4_inode_resume_unlocked_dio(inode);
 
        /* Finally we can mark the inode as dirty. */
 
index 7e99ad02f1baaca241a2d04c5b01d6d9eba7a12f..a7074115d6f68a6d552660c82af91f6009ee510f 100644 (file)
@@ -124,8 +124,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
        truncate_inode_pages(&inode_bl->i_data, 0);
 
        /* Wait for all existing dio workers */
-       ext4_inode_block_unlocked_dio(inode);
-       ext4_inode_block_unlocked_dio(inode_bl);
        inode_dio_wait(inode);
        inode_dio_wait(inode_bl);
 
@@ -186,8 +184,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
        ext4_double_up_write_data_sem(inode, inode_bl);
 
 journal_err_out:
-       ext4_inode_resume_unlocked_dio(inode);
-       ext4_inode_resume_unlocked_dio(inode_bl);
        unlock_two_nondirectories(inode, inode_bl);
        iput(inode_bl);
        return err;
@@ -481,6 +477,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
                return 0;
 
        ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
+       trace_ext4_shutdown(sb, flags);
 
        switch (flags) {
        case EXT4_GOING_FLAGS_DEFAULT:
@@ -492,15 +489,13 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
                set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
                if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
                        (void) ext4_force_commit(sb);
-                       jbd2_journal_abort(sbi->s_journal, 0);
+                       jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN);
                }
                break;
        case EXT4_GOING_FLAGS_NOLOGFLUSH:
                set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
-               if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
-                       msleep(100);
-                       jbd2_journal_abort(sbi->s_journal, 0);
-               }
+               if (sbi->s_journal && !is_journal_aborted(sbi->s_journal))
+                       jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN);
                break;
        default:
                return -EINVAL;
index b96e4bd3b3ec42b750fbd23f942c6ca8072c9285..8e17efdcbf118d7218f0f9e68633e5f6639aa582 100644 (file)
@@ -601,8 +601,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
        lock_two_nondirectories(orig_inode, donor_inode);
 
        /* Wait for all existing dio workers */
-       ext4_inode_block_unlocked_dio(orig_inode);
-       ext4_inode_block_unlocked_dio(donor_inode);
        inode_dio_wait(orig_inode);
        inode_dio_wait(donor_inode);
 
@@ -693,8 +691,6 @@ out:
        ext4_ext_drop_refs(path);
        kfree(path);
        ext4_double_up_write_data_sem(orig_inode, donor_inode);
-       ext4_inode_resume_unlocked_dio(orig_inode);
-       ext4_inode_resume_unlocked_dio(donor_inode);
        unlock_two_nondirectories(orig_inode, donor_inode);
 
        return ret;
index 39bf464c35f170c2e77171fc0422997921949883..185f7e61f4cfe00531e71e772a519af005f6bd69 100644 (file)
@@ -101,15 +101,13 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
  *   i_data_sem (rw)
  *
  * truncate:
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
- *   i_mmap_rwsem (w) -> page lock
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
- *   transaction start -> i_data_sem (rw)
+ * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
+ *   i_data_sem (rw)
  *
  * direct IO:
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
- *   transaction start -> i_data_sem (rw)
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
  *
  * writepages:
  * transaction start -> page lock(s) -> i_data_sem (rw)
@@ -448,6 +446,7 @@ void __ext4_error(struct super_block *sb, const char *function,
        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
                return;
 
+       trace_ext4_error(sb, function, line);
        if (ext4_error_ratelimit(sb)) {
                va_start(args, fmt);
                vaf.fmt = fmt;
@@ -472,6 +471,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
                return;
 
+       trace_ext4_error(inode->i_sb, function, line);
        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
        es->s_last_error_block = cpu_to_le64(block);
        if (ext4_error_ratelimit(inode->i_sb)) {
@@ -507,6 +507,7 @@ void __ext4_error_file(struct file *file, const char *function,
        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
                return;
 
+       trace_ext4_error(inode->i_sb, function, line);
        es = EXT4_SB(inode->i_sb)->s_es;
        es->s_last_error_ino = cpu_to_le32(inode->i_ino);
        if (ext4_error_ratelimit(inode->i_sb)) {
@@ -719,6 +720,7 @@ __acquires(bitlock)
        if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
                return;
 
+       trace_ext4_error(sb, function, line);
        es->s_last_error_ino = cpu_to_le32(ino);
        es->s_last_error_block = cpu_to_le64(block);
        __save_error_info(sb, function, line);
@@ -2019,7 +2021,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_super_block *es = sbi->s_es;
-       int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
+       int def_errors, def_mount_opt = sbi->s_def_mount_opt;
        const struct mount_opts *m;
        char sep = nodefs ? '\n' : ',';
 
@@ -2034,7 +2036,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
                if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
                    (m->flags & MOPT_CLEAR_ERR))
                        continue;
-               if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
+               if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
                        continue; /* skip if same as the default */
                if ((want_set &&
                     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
@@ -2068,7 +2070,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
                SEQ_OPTS_PUTS("i_version");
        if (nodefs || sbi->s_stripe)
                SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
-       if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
+       if (nodefs || EXT4_MOUNT_DATA_FLAGS &
+                       (sbi->s_mount_opt ^ def_mount_opt)) {
                if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
                        SEQ_OPTS_PUTS("data=journal");
                else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
@@ -2081,7 +2084,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
                SEQ_OPTS_PRINT("inode_readahead_blks=%u",
                               sbi->s_inode_readahead_blks);
 
-       if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
+       if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
                       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
                SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
        if (nodefs || sbi->s_max_dir_size_kb)
@@ -2333,6 +2336,8 @@ static int ext4_check_descriptors(struct super_block *sb,
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                                 "Block bitmap for group %u overlaps "
                                 "superblock", i);
+                       if (!sb_rdonly(sb))
+                               return 0;
                }
                if (block_bitmap < first_block || block_bitmap > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2345,6 +2350,8 @@ static int ext4_check_descriptors(struct super_block *sb,
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                                 "Inode bitmap for group %u overlaps "
                                 "superblock", i);
+                       if (!sb_rdonly(sb))
+                               return 0;
                }
                if (inode_bitmap < first_block || inode_bitmap > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2357,6 +2364,8 @@ static int ext4_check_descriptors(struct super_block *sb,
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                                 "Inode table for group %u overlaps "
                                 "superblock", i);
+                       if (!sb_rdonly(sb))
+                               return 0;
                }
                if (inode_table < first_block ||
                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
@@ -3490,15 +3499,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
 
        /* Load the checksum driver */
-       if (ext4_has_feature_metadata_csum(sb) ||
-           ext4_has_feature_ea_inode(sb)) {
-               sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
-               if (IS_ERR(sbi->s_chksum_driver)) {
-                       ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
-                       ret = PTR_ERR(sbi->s_chksum_driver);
-                       sbi->s_chksum_driver = NULL;
-                       goto failed_mount;
-               }
+       sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+       if (IS_ERR(sbi->s_chksum_driver)) {
+               ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
+               ret = PTR_ERR(sbi->s_chksum_driver);
+               sbi->s_chksum_driver = NULL;
+               goto failed_mount;
        }
 
        /* Check superblock checksum */
@@ -3660,6 +3666,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
                                 "using the ext4 subsystem");
                else {
+                       /*
+                        * If we're probing be silent, if this looks like
+                        * it's actually an ext[34] filesystem.
+                        */
+                       if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
+                               goto failed_mount;
                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
                                 "to feature incompatibilities");
                        goto failed_mount;
@@ -3671,6 +3683,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
                                 "using the ext4 subsystem");
                else {
+                       /*
+                        * If we're probing be silent, if this looks like
+                        * it's actually an ext4 filesystem.
+                        */
+                       if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
+                               goto failed_mount;
                        ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
                                 "to feature incompatibilities");
                        goto failed_mount;
@@ -4094,10 +4112,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                 * cope, else JOURNAL_DATA
                 */
                if (jbd2_journal_check_available_features
-                   (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
+                   (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
                        set_opt(sb, ORDERED_DATA);
-               else
+                       sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
+               } else {
                        set_opt(sb, JOURNAL_DATA);
+                       sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
+               }
                break;
 
        case EXT4_MOUNT_ORDERED_DATA:
index 1205261f130cbfa8ae82632399aa7beb73e4cc6a..9ebd26c957c2a85c1963bb22861574b6f247e46c 100644 (file)
@@ -49,8 +49,7 @@ struct ext4_attr {
        } u;
 };
 
-static ssize_t session_write_kbytes_show(struct ext4_attr *a,
-                                        struct ext4_sb_info *sbi, char *buf)
+static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 {
        struct super_block *sb = sbi->s_buddy_cache->i_sb;
 
@@ -61,8 +60,7 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a,
                         sbi->s_sectors_written_start) >> 1);
 }
 
-static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
-                                         struct ext4_sb_info *sbi, char *buf)
+static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 {
        struct super_block *sb = sbi->s_buddy_cache->i_sb;
 
@@ -74,8 +72,7 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
                          EXT4_SB(sb)->s_sectors_written_start) >> 1)));
 }
 
-static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
-                                         struct ext4_sb_info *sbi,
+static ssize_t inode_readahead_blks_store(struct ext4_sb_info *sbi,
                                          const char *buf, size_t count)
 {
        unsigned long t;
@@ -92,8 +89,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
        return count;
 }
 
-static ssize_t reserved_clusters_store(struct ext4_attr *a,
-                                  struct ext4_sb_info *sbi,
+static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi,
                                   const char *buf, size_t count)
 {
        unsigned long long val;
@@ -109,8 +105,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
        return count;
 }
 
-static ssize_t trigger_test_error(struct ext4_attr *a,
-                                 struct ext4_sb_info *sbi,
+static ssize_t trigger_test_error(struct ext4_sb_info *sbi,
                                  const char *buf, size_t count)
 {
        int len = count;
@@ -268,9 +263,9 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
                                (s64) EXT4_C2B(sbi,
                       percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
        case attr_session_write_kbytes:
-               return session_write_kbytes_show(a, sbi, buf);
+               return session_write_kbytes_show(sbi, buf);
        case attr_lifetime_write_kbytes:
-               return lifetime_write_kbytes_show(a, sbi, buf);
+               return lifetime_write_kbytes_show(sbi, buf);
        case attr_reserved_clusters:
                return snprintf(buf, PAGE_SIZE, "%llu\n",
                                (unsigned long long)
@@ -306,7 +301,7 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
 
        switch (a->attr_id) {
        case attr_reserved_clusters:
-               return reserved_clusters_store(a, sbi, buf, len);
+               return reserved_clusters_store(sbi, buf, len);
        case attr_pointer_ui:
                if (!ptr)
                        return 0;
@@ -316,9 +311,9 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
                *((unsigned int *) ptr) = t;
                return len;
        case attr_inode_readahead:
-               return inode_readahead_blks_store(a, sbi, buf, len);
+               return inode_readahead_blks_store(sbi, buf, len);
        case attr_trigger_test_error:
-               return trigger_test_error(a, sbi, buf, len);
+               return trigger_test_error(sbi, buf, len);
        }
        return 0;
 }
@@ -330,13 +325,6 @@ static void ext4_sb_release(struct kobject *kobj)
        complete(&sbi->s_kobj_unregister);
 }
 
-static void ext4_kset_release(struct kobject *kobj)
-{
-       struct kset *kset = container_of(kobj, struct kset, kobj);
-
-       kfree(kset);
-}
-
 static const struct sysfs_ops ext4_attr_ops = {
        .show   = ext4_attr_show,
        .store  = ext4_attr_store,
@@ -348,19 +336,14 @@ static struct kobj_type ext4_sb_ktype = {
        .release        = ext4_sb_release,
 };
 
-static struct kobj_type ext4_ktype = {
-       .sysfs_ops      = &ext4_attr_ops,
-       .release        = ext4_kset_release,
-};
-
-static struct kset *ext4_kset;
-
 static struct kobj_type ext4_feat_ktype = {
        .default_attrs  = ext4_feat_attrs,
        .sysfs_ops      = &ext4_attr_ops,
        .release        = (void (*)(struct kobject *))kfree,
 };
 
+static struct kobject *ext4_root;
+
 static struct kobject *ext4_feat;
 
 #define PROC_FILE_SHOW_DEFN(name) \
@@ -398,9 +381,8 @@ int ext4_register_sysfs(struct super_block *sb)
        const struct ext4_proc_files *p;
        int err;
 
-       sbi->s_kobj.kset = ext4_kset;
        init_completion(&sbi->s_kobj_unregister);
-       err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL,
+       err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root,
                                   "%s", sb->s_id);
        if (err) {
                kobject_put(&sbi->s_kobj);
@@ -436,26 +418,18 @@ int __init ext4_init_sysfs(void)
 {
        int ret;
 
-       ext4_kset = kzalloc(sizeof(*ext4_kset), GFP_KERNEL);
-       if (!ext4_kset)
+       ext4_root = kobject_create_and_add("ext4", fs_kobj);
+       if (!ext4_root)
                return -ENOMEM;
 
-       kobject_set_name(&ext4_kset->kobj, "ext4");
-       ext4_kset->kobj.parent = fs_kobj;
-       ext4_kset->kobj.ktype = &ext4_ktype;
-       ret = kset_register(ext4_kset);
-       if (ret)
-               goto kset_err;
-
        ext4_feat = kzalloc(sizeof(*ext4_feat), GFP_KERNEL);
        if (!ext4_feat) {
                ret = -ENOMEM;
-               goto kset_err;
+               goto root_err;
        }
 
-       ext4_feat->kset = ext4_kset;
        ret = kobject_init_and_add(ext4_feat, &ext4_feat_ktype,
-                                  NULL, "features");
+                                  ext4_root, "features");
        if (ret)
                goto feat_err;
 
@@ -464,17 +438,19 @@ int __init ext4_init_sysfs(void)
 
 feat_err:
        kobject_put(ext4_feat);
-kset_err:
-       kset_unregister(ext4_kset);
-       ext4_kset = NULL;
+       ext4_feat = NULL;
+root_err:
+       kobject_put(ext4_root);
+       ext4_root = NULL;
        return ret;
 }
 
 void ext4_exit_sysfs(void)
 {
        kobject_put(ext4_feat);
-       kset_unregister(ext4_kset);
-       ext4_kset = NULL;
+       ext4_feat = NULL;
+       kobject_put(ext4_root);
+       ext4_root = NULL;
        remove_proc_entry(proc_dirname, NULL);
        ext4_proc_root = NULL;
 }
index 63656dbafdc4519f40bfb885aec78b65d66cc967..499cb4b1fbd22b98a8b8d923910ed0cce72d6994 100644 (file)
@@ -195,10 +195,13 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
 
        /* Check the values */
        while (!IS_LAST_ENTRY(entry)) {
-               if (entry->e_value_size != 0 &&
-                   entry->e_value_inum == 0) {
+               u32 size = le32_to_cpu(entry->e_value_size);
+
+               if (size > EXT4_XATTR_SIZE_MAX)
+                       return -EFSCORRUPTED;
+
+               if (size != 0 && entry->e_value_inum == 0) {
                        u16 offs = le16_to_cpu(entry->e_value_offs);
-                       u32 size = le32_to_cpu(entry->e_value_size);
                        void *value;
 
                        /*
@@ -222,25 +225,36 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
 }
 
 static inline int
-ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
+__ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
+                        const char *function, unsigned int line)
 {
-       int error;
+       int error = -EFSCORRUPTED;
 
        if (buffer_verified(bh))
                return 0;
 
        if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
            BHDR(bh)->h_blocks != cpu_to_le32(1))
-               return -EFSCORRUPTED;
+               goto errout;
+       error = -EFSBADCRC;
        if (!ext4_xattr_block_csum_verify(inode, bh))
-               return -EFSBADCRC;
+               goto errout;
        error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size,
                                         bh->b_data);
-       if (!error)
+errout:
+       if (error)
+               __ext4_error_inode(inode, function, line, 0,
+                                  "corrupted xattr block %llu",
+                                  (unsigned long long) bh->b_blocknr);
+       else
                set_buffer_verified(bh);
        return error;
 }
 
+#define ext4_xattr_check_block(inode, bh) \
+       __ext4_xattr_check_block((inode), (bh),  __func__, __LINE__)
+
+
 static int
 __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
                         void *end, const char *function, unsigned int line)
@@ -262,18 +276,22 @@ errout:
        __xattr_check_inode((inode), (header), (end), __func__, __LINE__)
 
 static int
-ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
-                     const char *name, int sorted)
+xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry,
+                void *end, int name_index, const char *name, int sorted)
 {
-       struct ext4_xattr_entry *entry;
+       struct ext4_xattr_entry *entry, *next;
        size_t name_len;
        int cmp = 1;
 
        if (name == NULL)
                return -EINVAL;
        name_len = strlen(name);
-       entry = *pentry;
-       for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+       for (entry = *pentry; !IS_LAST_ENTRY(entry); entry = next) {
+               next = EXT4_XATTR_NEXT(entry);
+               if ((void *) next >= end) {
+                       EXT4_ERROR_INODE(inode, "corrupted xattr entries");
+                       return -EFSCORRUPTED;
+               }
                cmp = name_index - entry->e_name_index;
                if (!cmp)
                        cmp = name_len - entry->e_name_len;
@@ -495,6 +513,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
        struct buffer_head *bh = NULL;
        struct ext4_xattr_entry *entry;
        size_t size;
+       void *end;
        int error;
        struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
 
@@ -511,20 +530,20 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
                goto cleanup;
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-       if (ext4_xattr_check_block(inode, bh)) {
-               EXT4_ERROR_INODE(inode, "bad block %llu",
-                                EXT4_I(inode)->i_file_acl);
-               error = -EFSCORRUPTED;
+       error = ext4_xattr_check_block(inode, bh);
+       if (error)
                goto cleanup;
-       }
        ext4_xattr_block_cache_insert(ea_block_cache, bh);
        entry = BFIRST(bh);
-       error = ext4_xattr_find_entry(&entry, name_index, name, 1);
+       end = bh->b_data + bh->b_size;
+       error = xattr_find_entry(inode, &entry, end, name_index, name, 1);
        if (error)
                goto cleanup;
        size = le32_to_cpu(entry->e_value_size);
+       error = -ERANGE;
+       if (unlikely(size > EXT4_XATTR_SIZE_MAX))
+               goto cleanup;
        if (buffer) {
-               error = -ERANGE;
                if (size > buffer_size)
                        goto cleanup;
                if (entry->e_value_inum) {
@@ -533,8 +552,12 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
                        if (error)
                                goto cleanup;
                } else {
-                       memcpy(buffer, bh->b_data +
-                              le16_to_cpu(entry->e_value_offs), size);
+                       u16 offset = le16_to_cpu(entry->e_value_offs);
+                       void *p = bh->b_data + offset;
+
+                       if (unlikely(p + size > end))
+                               goto cleanup;
+                       memcpy(buffer, p, size);
                }
        }
        error = size;
@@ -568,12 +591,14 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
        if (error)
                goto cleanup;
        entry = IFIRST(header);
-       error = ext4_xattr_find_entry(&entry, name_index, name, 0);
+       error = xattr_find_entry(inode, &entry, end, name_index, name, 0);
        if (error)
                goto cleanup;
        size = le32_to_cpu(entry->e_value_size);
+       error = -ERANGE;
+       if (unlikely(size > EXT4_XATTR_SIZE_MAX))
+               goto cleanup;
        if (buffer) {
-               error = -ERANGE;
                if (size > buffer_size)
                        goto cleanup;
                if (entry->e_value_inum) {
@@ -582,8 +607,12 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
                        if (error)
                                goto cleanup;
                } else {
-                       memcpy(buffer, (void *)IFIRST(header) +
-                              le16_to_cpu(entry->e_value_offs), size);
+                       u16 offset = le16_to_cpu(entry->e_value_offs);
+                       void *p = (void *)IFIRST(header) + offset;
+
+                       if (unlikely(p + size > end))
+                               goto cleanup;
+                       memcpy(buffer, p, size);
                }
        }
        error = size;
@@ -676,12 +705,9 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                goto cleanup;
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-       if (ext4_xattr_check_block(inode, bh)) {
-               EXT4_ERROR_INODE(inode, "bad block %llu",
-                                EXT4_I(inode)->i_file_acl);
-               error = -EFSCORRUPTED;
+       error = ext4_xattr_check_block(inode, bh);
+       if (error)
                goto cleanup;
-       }
        ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
        error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 
@@ -808,10 +834,9 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
                        goto out;
                }
 
-               if (ext4_xattr_check_block(inode, bh)) {
-                       ret = -EFSCORRUPTED;
+               ret = ext4_xattr_check_block(inode, bh);
+               if (ret)
                        goto out;
-               }
 
                for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
                     entry = EXT4_XATTR_NEXT(entry))
@@ -1793,19 +1818,16 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
                ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
                        atomic_read(&(bs->bh->b_count)),
                        le32_to_cpu(BHDR(bs->bh)->h_refcount));
-               if (ext4_xattr_check_block(inode, bs->bh)) {
-                       EXT4_ERROR_INODE(inode, "bad block %llu",
-                                        EXT4_I(inode)->i_file_acl);
-                       error = -EFSCORRUPTED;
+               error = ext4_xattr_check_block(inode, bs->bh);
+               if (error)
                        goto cleanup;
-               }
                /* Find the named attribute. */
                bs->s.base = BHDR(bs->bh);
                bs->s.first = BFIRST(bs->bh);
                bs->s.end = bs->bh->b_data + bs->bh->b_size;
                bs->s.here = bs->s.first;
-               error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
-                                             i->name, 1);
+               error = xattr_find_entry(inode, &bs->s.here, bs->s.end,
+                                        i->name_index, i->name, 1);
                if (error && error != -ENODATA)
                        goto cleanup;
                bs->s.not_found = error;
@@ -2164,8 +2186,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
                if (error)
                        return error;
                /* Find the named attribute. */
-               error = ext4_xattr_find_entry(&is->s.here, i->name_index,
-                                             i->name, 0);
+               error = xattr_find_entry(inode, &is->s.here, is->s.end,
+                                        i->name_index, i->name, 0);
                if (error && error != -ENODATA)
                        return error;
                is->s.not_found = error;
@@ -2721,13 +2743,9 @@ retry:
                error = -EIO;
                if (!bh)
                        goto cleanup;
-               if (ext4_xattr_check_block(inode, bh)) {
-                       EXT4_ERROR_INODE(inode, "bad block %llu",
-                                        EXT4_I(inode)->i_file_acl);
-                       error = -EFSCORRUPTED;
-                       brelse(bh);
+               error = ext4_xattr_check_block(inode, bh);
+               if (error)
                        goto cleanup;
-               }
                base = BHDR(bh);
                end = bh->b_data + bh->b_size;
                min_offs = end - base;
@@ -2884,11 +2902,8 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
                        goto cleanup;
                }
                error = ext4_xattr_check_block(inode, bh);
-               if (error) {
-                       EXT4_ERROR_INODE(inode, "bad block %llu (error %d)",
-                                        EXT4_I(inode)->i_file_acl, error);
+               if (error)
                        goto cleanup;
-               }
 
                if (ext4_has_feature_ea_inode(inode->i_sb)) {
                        for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
index dd54c4f995c8d0439c7647963cd8394b3f5e94e0..f39cad2abe2a8855211c07b22938499ba1d3a40e 100644 (file)
@@ -70,6 +70,17 @@ struct ext4_xattr_entry {
                EXT4_I(inode)->i_extra_isize))
 #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
 
+/*
+ * XATTR_SIZE_MAX is currently 64k, but for the purposes of checking
+ * for file system consistency errors, we use a somewhat bigger value.
+ * This allows XATTR_SIZE_MAX to grow in the future, but by using this
+ * instead of INT_MAX for certain consistency checks, we don't need to
+ * worry about arithmetic overflows.  (Actually XATTR_SIZE_MAX is
+ * defined in include/uapi/linux/limits.h, so changing it is going
+ * not going to be trivial....)
+ */
+#define EXT4_XATTR_SIZE_MAX (1 << 24)
+
 /*
  * The minimum size of EA value when you start storing it in an external inode
  * size of block - size of header - size of 1 entry - 4 null bytes
index 3fbf48ec21881698325f3b84c4b2654485d7711b..dfb057900e791a0bf3b692ba1ca0fc57a9377b03 100644 (file)
@@ -974,7 +974,7 @@ out:
 }
 
 /*
- * This is a variaon of __jbd2_update_log_tail which checks for validity of
+ * This is a variation of __jbd2_update_log_tail which checks for validity of
  * provided log tail and locks j_checkpoint_mutex. So it is safe against races
  * with other threads updating log tail.
  */
@@ -1417,6 +1417,9 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
        journal_superblock_t *sb = journal->j_superblock;
        int ret;
 
+       if (is_journal_aborted(journal))
+               return -EIO;
+
        BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
        jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
                  tail_block, tail_tid);
@@ -1483,12 +1486,15 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
 void jbd2_journal_update_sb_errno(journal_t *journal)
 {
        journal_superblock_t *sb = journal->j_superblock;
+       int errcode;
 
        read_lock(&journal->j_state_lock);
-       jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
-                 journal->j_errno);
-       sb->s_errno    = cpu_to_be32(journal->j_errno);
+       errcode = journal->j_errno;
        read_unlock(&journal->j_state_lock);
+       if (errcode == -ESHUTDOWN)
+               errcode = 0;
+       jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
+       sb->s_errno    = cpu_to_be32(errcode);
 
        jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
 }
@@ -2105,12 +2111,22 @@ void __jbd2_journal_abort_hard(journal_t *journal)
  * but don't do any other IO. */
 static void __journal_abort_soft (journal_t *journal, int errno)
 {
-       if (journal->j_flags & JBD2_ABORT)
-               return;
+       int old_errno;
 
-       if (!journal->j_errno)
+       write_lock(&journal->j_state_lock);
+       old_errno = journal->j_errno;
+       if (!journal->j_errno || errno == -ESHUTDOWN)
                journal->j_errno = errno;
 
+       if (journal->j_flags & JBD2_ABORT) {
+               write_unlock(&journal->j_state_lock);
+               if (!old_errno && old_errno != -ESHUTDOWN &&
+                   errno == -ESHUTDOWN)
+                       jbd2_journal_update_sb_errno(journal);
+               return;
+       }
+       write_unlock(&journal->j_state_lock);
+
        __jbd2_journal_abort_hard(journal);
 
        if (errno) {
index f99910b69c78d6e37854a82689a4cadc9b4d3e1d..a4967b27ffb636212ddd21234b1b2fc211a6038b 100644 (file)
@@ -600,8 +600,8 @@ static int do_one_pass(journal_t *journal,
                                                success = -EFSBADCRC;
                                                printk(KERN_ERR "JBD2: Invalid "
                                                       "checksum recovering "
-                                                      "block %llu in log\n",
-                                                      blocknr);
+                                                      "data block %llu in "
+                                                      "log\n", blocknr);
                                                block_error = 1;
                                                goto skip_write;
                                        }
index 4d0e3af4e56174192c3d70a8e698e1b11b89c3b4..0e31eb136c5730d264a3fd7e7b99dcddd793ec14 100644 (file)
@@ -2585,6 +2585,49 @@ DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key);
 DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key);
 DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping);
 
+TRACE_EVENT(ext4_shutdown,
+       TP_PROTO(struct super_block *sb, unsigned long flags),
+
+       TP_ARGS(sb, flags),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field(     unsigned,  flags                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->flags  = flags;
+       ),
+
+       TP_printk("dev %d,%d flags %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->flags)
+);
+
+TRACE_EVENT(ext4_error,
+       TP_PROTO(struct super_block *sb, const char *function,
+                unsigned int line),
+
+       TP_ARGS(sb, function, line),
+
+       TP_STRUCT__entry(
+               __field(        dev_t,  dev                     )
+               __field( const char *,  function                )
+               __field(     unsigned,  line                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev    = sb->s_dev;
+               __entry->function = function;
+               __entry->line   = line;
+       ),
+
+       TP_printk("dev %d,%d function %s line %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->function, __entry->line)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */