Merge tag 'f2fs-for-4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeu...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 31 Jan 2018 03:07:32 +0000 (19:07 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 31 Jan 2018 03:07:32 +0000 (19:07 -0800)
Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've followed up to support some generic features such
  as cgroup, block reservation, linking fscrypt_ops, delivering
  write_hints, and some ioctls. And, we could fix some corner cases in
  terms of power-cut recovery and subtle deadlocks.

  Enhancements:
   - bitmap operations to handle NAT blocks
   - readahead to improve readdir speed
   - switch to use fscrypt_*
   - apply write hints for direct IO
   - add reserve_root=%u,resuid=%u,resgid=%u to reserve blocks for root/uid/gid
   - modify b_avail and b_free to consider root reserved blocks
   - support cgroup writeback
   - support FIEMAP_FLAG_XATTR for fibmap
   - add F2FS_IOC_PRECACHE_EXTENTS to pre-cache extents
   - add F2FS_IOC_{GET/SET}_PIN_FILE to pin LBAs for data blocks
   - support inode creation time

  Bug fixs:
   - sysfile-based quota operations
   - memory footprint accounting
   - allow to write data on partial preallocation case
   - fix deadlock case on fallocate
   - fix to handle fill_super errors
   - fix missing inode updates of fsync'ed file
   - recover renamed file which was fsycn'ed before
   - drop inmemory pages in corner error case
   - keep last_disk_size correctly
   - recover missing i_inline flags during roll-forward

  Various clean-up patches were added as well"

* tag 'f2fs-for-4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (72 commits)
  f2fs: support inode creation time
  f2fs: rebuild sit page from sit info in mem
  f2fs: stop issuing discard if fs is readonly
  f2fs: clean up duplicated assignment in init_discard_policy
  f2fs: use GFP_F2FS_ZERO for cleanup
  f2fs: allow to recover node blocks given updated checkpoint
  f2fs: recover some i_inline flags
  f2fs: correct removexattr behavior for null valued extended attribute
  f2fs: drop page cache after fs shutdown
  f2fs: stop gc/discard thread after fs shutdown
  f2fs: hanlde error case in f2fs_ioc_shutdown
  f2fs: split need_inplace_update
  f2fs: fix to update last_disk_size correctly
  f2fs: kill F2FS_INLINE_XATTR_ADDRS for cleanup
  f2fs: clean up error path of fill_super
  f2fs: avoid hungtask when GC encrypted block if io_bits is set
  f2fs: allow quota to use reserved blocks
  f2fs: fix to drop all inmem pages correctly
  f2fs: speed up defragment on sparse file
  f2fs: support F2FS_IOC_PRECACHE_EXTENTS
  ...

1  2 
fs/f2fs/data.c

diff --combined fs/f2fs/data.c
index 455f086cce3dba0db35ff24860996d7cb44ff8cb,6cba74eb09a773d480b157569182ffa7d9fb6028..7578ed1a85e05840f44e3a031831a4ba74fe43a7
@@@ -56,7 -56,7 +56,7 @@@ static void f2fs_read_end_io(struct bi
        int i;
  
  #ifdef CONFIG_F2FS_FAULT_INJECTION
 -      if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
 +      if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
                f2fs_show_injection_info(FAULT_IO);
                bio->bi_status = BLK_STS_IOERR;
        }
@@@ -111,8 -111,13 +111,13 @@@ static void f2fs_write_end_io(struct bi
  
                if (unlikely(bio->bi_status)) {
                        mapping_set_error(page->mapping, -EIO);
-                       f2fs_stop_checkpoint(sbi, true);
+                       if (type == F2FS_WB_CP_DATA)
+                               f2fs_stop_checkpoint(sbi, true);
                }
+               f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
+                                       page->index != nid_of_node(page));
                dec_page_count(sbi, type);
                clear_cold_data(page);
                end_page_writeback(page);
@@@ -169,6 -174,7 +174,7 @@@ static bool __same_bdev(struct f2fs_sb_
   * Low-level block read/write IO operations.
   */
  static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
+                               struct writeback_control *wbc,
                                int npages, bool is_read)
  {
        struct bio *bio;
        f2fs_target_device(sbi, blk_addr, bio);
        bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
        bio->bi_private = is_read ? NULL : sbi;
+       if (wbc)
+               wbc_init_bio(wbc, bio);
  
        return bio;
  }
@@@ -373,7 -381,8 +381,8 @@@ int f2fs_submit_page_bio(struct f2fs_io
        f2fs_trace_ios(fio, 0);
  
        /* Allocate a new bio */
-       bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->op));
+       bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
+                               1, is_read_io(fio->op));
  
        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
                bio_put(bio);
@@@ -435,7 -444,7 +444,7 @@@ alloc_new
                        dec_page_count(sbi, WB_DATA_TYPE(bio_page));
                        goto out_fail;
                }
-               io->bio = __bio_alloc(sbi, fio->new_blkaddr,
+               io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
                                                BIO_MAX_PAGES, false);
                io->fio = *fio;
        }
                goto alloc_new;
        }
  
+       if (fio->io_wbc)
+               wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
        io->last_block_in_bio = fio->new_blkaddr;
        f2fs_trace_ios(fio, 0);
  
@@@ -783,7 -795,7 +795,7 @@@ got_it
        return page;
  }
  
- static int __allocate_data_block(struct dnode_of_data *dn)
+ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
  {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
        struct f2fs_summary sum;
@@@ -808,7 -820,7 +820,7 @@@ alloc
        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
  
        allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
-                                       &sum, CURSEG_WARM_DATA, NULL, false);
+                                       &sum, seg_type, NULL, false);
        set_data_blkaddr(dn);
  
        /* update i_size */
@@@ -831,10 -843,12 +843,12 @@@ int f2fs_preallocate_blocks(struct kioc
  {
        struct inode *inode = file_inode(iocb->ki_filp);
        struct f2fs_map_blocks map;
+       int flag;
        int err = 0;
+       bool direct_io = iocb->ki_flags & IOCB_DIRECT;
  
        /* convert inline data for Direct I/O*/
-       if (iocb->ki_flags & IOCB_DIRECT) {
+       if (direct_io) {
                err = f2fs_convert_inline_inode(inode);
                if (err)
                        return err;
                map.m_len = 0;
  
        map.m_next_pgofs = NULL;
+       map.m_next_extent = NULL;
+       map.m_seg_type = NO_CHECK_TYPE;
  
-       if (iocb->ki_flags & IOCB_DIRECT)
-               return f2fs_map_blocks(inode, &map, 1,
-                       __force_buffered_io(inode, WRITE) ?
-                               F2FS_GET_BLOCK_PRE_AIO :
-                               F2FS_GET_BLOCK_PRE_DIO);
+       if (direct_io) {
+               map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint);
+               flag = __force_buffered_io(inode, WRITE) ?
+                                       F2FS_GET_BLOCK_PRE_AIO :
+                                       F2FS_GET_BLOCK_PRE_DIO;
+               goto map_blocks;
+       }
        if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
                err = f2fs_convert_inline_inode(inode);
                if (err)
                        return err;
        }
-       if (!f2fs_has_inline_data(inode))
-               return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+       if (f2fs_has_inline_data(inode))
+               return err;
+       flag = F2FS_GET_BLOCK_PRE_AIO;
+ map_blocks:
+       err = f2fs_map_blocks(inode, &map, 1, flag);
+       if (map.m_len > 0 && err == -ENOSPC) {
+               if (!direct_io)
+                       set_inode_flag(inode, FI_NO_PREALLOC);
+               err = 0;
+       }
        return err;
  }
  
@@@ -904,6 -932,7 +932,7 @@@ int f2fs_map_blocks(struct inode *inode
        blkcnt_t prealloc;
        struct extent_info ei = {0,0,0};
        block_t blkaddr;
+       unsigned int start_pgofs;
  
        if (!maxblocks)
                return 0;
                map->m_pblk = ei.blk + pgofs - ei.fofs;
                map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
                map->m_flags = F2FS_MAP_MAPPED;
+               if (map->m_next_extent)
+                       *map->m_next_extent = pgofs + map->m_len;
                goto out;
        }
  
@@@ -937,10 -968,14 +968,14 @@@ next_dnode
                        if (map->m_next_pgofs)
                                *map->m_next_pgofs =
                                        get_next_page_offset(&dn, pgofs);
+                       if (map->m_next_extent)
+                               *map->m_next_extent =
+                                       get_next_page_offset(&dn, pgofs);
                }
                goto unlock_out;
        }
  
+       start_pgofs = pgofs;
        prealloc = 0;
        last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
        end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
@@@ -960,7 -995,8 +995,8 @@@ next_block
                                        last_ofs_in_node = dn.ofs_in_node;
                                }
                        } else {
-                               err = __allocate_data_block(&dn);
+                               err = __allocate_data_block(&dn,
+                                                       map->m_seg_type);
                                if (!err)
                                        set_inode_flag(inode, FI_APPEND_WRITE);
                        }
                                map->m_pblk = 0;
                                goto sync_out;
                        }
+                       if (flag == F2FS_GET_BLOCK_PRECACHE)
+                               goto sync_out;
                        if (flag == F2FS_GET_BLOCK_FIEMAP &&
                                                blkaddr == NULL_ADDR) {
                                if (map->m_next_pgofs)
                                        *map->m_next_pgofs = pgofs + 1;
+                               goto sync_out;
                        }
-                       if (flag != F2FS_GET_BLOCK_FIEMAP ||
-                                               blkaddr != NEW_ADDR)
+                       if (flag != F2FS_GET_BLOCK_FIEMAP) {
+                               /* for defragment case */
+                               if (map->m_next_pgofs)
+                                       *map->m_next_pgofs = pgofs + 1;
                                goto sync_out;
+                       }
                }
        }
  
@@@ -1031,6 -1073,16 +1073,16 @@@ skip
        else if (dn.ofs_in_node < end_offset)
                goto next_block;
  
+       if (flag == F2FS_GET_BLOCK_PRECACHE) {
+               if (map->m_flags & F2FS_MAP_MAPPED) {
+                       unsigned int ofs = start_pgofs - map->m_lblk;
+                       f2fs_update_extent_cache_range(&dn,
+                               start_pgofs, map->m_pblk + ofs,
+                               map->m_len - ofs);
+               }
+       }
        f2fs_put_dnode(&dn);
  
        if (create) {
        goto next_dnode;
  
  sync_out:
+       if (flag == F2FS_GET_BLOCK_PRECACHE) {
+               if (map->m_flags & F2FS_MAP_MAPPED) {
+                       unsigned int ofs = start_pgofs - map->m_lblk;
+                       f2fs_update_extent_cache_range(&dn,
+                               start_pgofs, map->m_pblk + ofs,
+                               map->m_len - ofs);
+               }
+               if (map->m_next_extent)
+                       *map->m_next_extent = pgofs + 1;
+       }
        f2fs_put_dnode(&dn);
  unlock_out:
        if (create) {
@@@ -1053,7 -1116,7 +1116,7 @@@ out
  
  static int __get_data_block(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh, int create, int flag,
-                       pgoff_t *next_pgofs)
+                       pgoff_t *next_pgofs, int seg_type)
  {
        struct f2fs_map_blocks map;
        int err;
        map.m_lblk = iblock;
        map.m_len = bh->b_size >> inode->i_blkbits;
        map.m_next_pgofs = next_pgofs;
+       map.m_next_extent = NULL;
+       map.m_seg_type = seg_type;
  
        err = f2fs_map_blocks(inode, &map, create, flag);
        if (!err) {
@@@ -1076,14 -1141,17 +1141,17 @@@ static int get_data_block(struct inode 
                        pgoff_t *next_pgofs)
  {
        return __get_data_block(inode, iblock, bh_result, create,
-                                                       flag, next_pgofs);
+                                                       flag, next_pgofs,
+                                                       NO_CHECK_TYPE);
  }
  
  static int get_data_block_dio(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create)
  {
        return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_DEFAULT, NULL);
+                                               F2FS_GET_BLOCK_DEFAULT, NULL,
+                                               rw_hint_to_seg_type(
+                                                       inode->i_write_hint));
  }
  
  static int get_data_block_bmap(struct inode *inode, sector_t iblock,
                return -EFBIG;
  
        return __get_data_block(inode, iblock, bh_result, create,
-                                               F2FS_GET_BLOCK_BMAP, NULL);
+                                               F2FS_GET_BLOCK_BMAP, NULL,
+                                               NO_CHECK_TYPE);
  }
  
  static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@@ -1107,6 -1176,68 +1176,68 @@@ static inline loff_t blk_to_logical(str
        return (blk << inode->i_blkbits);
  }
  
+ static int f2fs_xattr_fiemap(struct inode *inode,
+                               struct fiemap_extent_info *fieinfo)
+ {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       struct page *page;
+       struct node_info ni;
+       __u64 phys = 0, len;
+       __u32 flags;
+       nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+       int err = 0;
+       if (f2fs_has_inline_xattr(inode)) {
+               int offset;
+               page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
+                                               inode->i_ino, false);
+               if (!page)
+                       return -ENOMEM;
+               get_node_info(sbi, inode->i_ino, &ni);
+               phys = (__u64)blk_to_logical(inode, ni.blk_addr);
+               offset = offsetof(struct f2fs_inode, i_addr) +
+                                       sizeof(__le32) * (DEF_ADDRS_PER_INODE -
+                                       get_inline_xattr_addrs(inode));
+               phys += offset;
+               len = inline_xattr_size(inode);
+               f2fs_put_page(page, 1);
+               flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
+               if (!xnid)
+                       flags |= FIEMAP_EXTENT_LAST;
+               err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
+               if (err || err == 1)
+                       return err;
+       }
+       if (xnid) {
+               page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
+               if (!page)
+                       return -ENOMEM;
+               get_node_info(sbi, xnid, &ni);
+               phys = (__u64)blk_to_logical(inode, ni.blk_addr);
+               len = inode->i_sb->s_blocksize;
+               f2fs_put_page(page, 1);
+               flags = FIEMAP_EXTENT_LAST;
+       }
+       if (phys)
+               err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
+       return (err < 0 ? err : 0);
+ }
  int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                u64 start, u64 len)
  {
        u32 flags = 0;
        int ret = 0;
  
-       ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+       if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
+               ret = f2fs_precache_extents(inode);
+               if (ret)
+                       return ret;
+       }
+       ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
        if (ret)
                return ret;
  
+       inode_lock(inode);
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
+               ret = f2fs_xattr_fiemap(inode, fieinfo);
+               goto out;
+       }
        if (f2fs_has_inline_data(inode)) {
                ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
                if (ret != -EAGAIN)
-                       return ret;
+                       goto out;
        }
  
-       inode_lock(inode);
        if (logical_to_blk(inode, len) == 0)
                len = blk_to_logical(inode, 1);
  
@@@ -1198,7 -1340,6 +1340,6 @@@ static int f2fs_mpage_readpages(struct 
                        unsigned nr_pages)
  {
        struct bio *bio = NULL;
-       unsigned page_idx;
        sector_t last_block_in_bio = 0;
        struct inode *inode = mapping->host;
        const unsigned blkbits = inode->i_blkbits;
        map.m_len = 0;
        map.m_flags = 0;
        map.m_next_pgofs = NULL;
+       map.m_next_extent = NULL;
+       map.m_seg_type = NO_CHECK_TYPE;
  
-       for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
+       for (; nr_pages; nr_pages--) {
                if (pages) {
                        page = list_last_entry(pages, struct page, lru);
  
@@@ -1376,18 -1518,79 +1518,79 @@@ retry_encrypt
        return PTR_ERR(fio->encrypted_page);
  }
  
+ static inline bool check_inplace_update_policy(struct inode *inode,
+                               struct f2fs_io_info *fio)
+ {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       unsigned int policy = SM_I(sbi)->ipu_policy;
+       if (policy & (0x1 << F2FS_IPU_FORCE))
+               return true;
+       if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
+               return true;
+       if (policy & (0x1 << F2FS_IPU_UTIL) &&
+                       utilization(sbi) > SM_I(sbi)->min_ipu_util)
+               return true;
+       if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
+                       utilization(sbi) > SM_I(sbi)->min_ipu_util)
+               return true;
+       /*
+        * IPU for rewrite async pages
+        */
+       if (policy & (0x1 << F2FS_IPU_ASYNC) &&
+                       fio && fio->op == REQ_OP_WRITE &&
+                       !(fio->op_flags & REQ_SYNC) &&
+                       !f2fs_encrypted_inode(inode))
+               return true;
+       /* this is only set during fdatasync */
+       if (policy & (0x1 << F2FS_IPU_FSYNC) &&
+                       is_inode_flag_set(inode, FI_NEED_IPU))
+               return true;
+       return false;
+ }
+ bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
+ {
+       if (f2fs_is_pinned_file(inode))
+               return true;
+       /* if this is cold file, we should overwrite to avoid fragmentation */
+       if (file_is_cold(inode))
+               return true;
+       return check_inplace_update_policy(inode, fio);
+ }
+ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
+ {
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+       if (test_opt(sbi, LFS))
+               return true;
+       if (S_ISDIR(inode->i_mode))
+               return true;
+       if (f2fs_is_atomic_file(inode))
+               return true;
+       if (fio) {
+               if (is_cold_data(fio->page))
+                       return true;
+               if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
+                       return true;
+       }
+       return false;
+ }
  static inline bool need_inplace_update(struct f2fs_io_info *fio)
  {
        struct inode *inode = fio->page->mapping->host;
  
-       if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
-               return false;
-       if (is_cold_data(fio->page))
-               return false;
-       if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
+       if (should_update_outplace(inode, fio))
                return false;
  
-       return need_inplace_update_policy(inode, fio);
+       return should_update_inplace(inode, fio);
  }
  
  static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
@@@ -1508,10 -1711,17 +1711,17 @@@ static int __write_data_page(struct pag
                .submitted = false,
                .need_lock = LOCK_RETRY,
                .io_type = io_type,
+               .io_wbc = wbc,
        };
  
        trace_f2fs_writepage(page, DATA);
  
+       /* we should bypass data pages to proceed the kworkder jobs */
+       if (unlikely(f2fs_cp_error(sbi))) {
+               mapping_set_error(page->mapping, -EIO);
+               goto out;
+       }
        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
                goto redirty_out;
  
@@@ -1536,12 -1746,6 +1746,6 @@@ write
                        available_free_memory(sbi, BASE_CHECK))))
                goto redirty_out;
  
-       /* we should bypass data pages to proceed the kworkder jobs */
-       if (unlikely(f2fs_cp_error(sbi))) {
-               mapping_set_error(page->mapping, -EIO);
-               goto out;
-       }
        /* Dentry blocks are controlled by checkpoint */
        if (S_ISDIR(inode->i_mode)) {
                fio.need_lock = LOCK_DONE;
                }
        }
  
-       down_write(&F2FS_I(inode)->i_sem);
-       if (F2FS_I(inode)->last_disk_size < psize)
-               F2FS_I(inode)->last_disk_size = psize;
-       up_write(&F2FS_I(inode)->i_sem);
+       if (err) {
+               file_set_keep_isize(inode);
+       } else {
+               down_write(&F2FS_I(inode)->i_sem);
+               if (F2FS_I(inode)->last_disk_size < psize)
+                       F2FS_I(inode)->last_disk_size = psize;
+               up_write(&F2FS_I(inode)->i_sem);
+       }
  
  done:
        if (err && err != -ENOENT)
@@@ -1933,7 -2141,7 +2141,7 @@@ static int f2fs_write_begin(struct fil
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct page *page = NULL;
        pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
-       bool need_balance = false;
+       bool need_balance = false, drop_atomic = false;
        block_t blkaddr = NULL_ADDR;
        int err = 0;
  
        if (f2fs_is_atomic_file(inode) &&
                        !available_free_memory(sbi, INMEM_PAGES)) {
                err = -ENOMEM;
+               drop_atomic = true;
                goto fail;
        }
  
@@@ -2022,7 -2231,7 +2231,7 @@@ repeat
  fail:
        f2fs_put_page(page, 1);
        f2fs_write_failed(mapping, pos + len);
-       if (f2fs_is_atomic_file(inode))
+       if (drop_atomic)
                drop_inmem_pages_all(sbi);
        return err;
  }