new primitive: discard_new_inode()
authorAl Viro <viro@zeniv.linux.org.uk>
Thu, 28 Jun 2018 19:53:17 +0000 (15:53 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Fri, 3 Aug 2018 19:55:30 +0000 (15:55 -0400)
We don't want open-by-handle picking half-set-up in-core
struct inode from e.g. mkdir() having failed halfway through.
In other words, we don't want such inodes returned by iget_locked()
on their way to extinction.  However, we can't just have them
unhashed - otherwise open-by-handle immediately *after* that would've
ended up creating a new in-core inode over the on-disk one that
is in process of being freed right under us.

Solution: new flag (I_CREATING) set by insert_inode_locked() and
removed by unlock_new_inode() and a new primitive (discard_new_inode())
to be used by such halfway-through-setup failure exits instead of
unlock_new_inode() / iput() combinations.  That primitive unlocks new
inode, but leaves I_CREATING in place.

iget_locked() treats finding an I_CREATING inode as failure
(-ESTALE, once we sort out the error propagation).
insert_inode_locked() treats the same as instant -EBUSY.
ilookup() treats those as icache miss.

[Fix by Dan Carpenter <dan.carpenter@oracle.com> folded in]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/dcache.c
fs/inode.c
include/linux/fs.h

index a7d9e7a..11b753d 100644 (file)
@@ -1892,7 +1892,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
        spin_lock(&inode->i_lock);
        __d_instantiate(entry, inode);
        WARN_ON(!(inode->i_state & I_NEW));
-       inode->i_state &= ~I_NEW;
+       inode->i_state &= ~I_NEW & ~I_CREATING;
        smp_mb();
        wake_up_bit(&inode->i_state, __I_NEW);
        spin_unlock(&inode->i_lock);
index 2c300e9..6cd2e7b 100644 (file)
@@ -804,6 +804,10 @@ repeat:
                        __wait_on_freeing_inode(inode);
                        goto repeat;
                }
+               if (unlikely(inode->i_state & I_CREATING)) {
+                       spin_unlock(&inode->i_lock);
+                       return ERR_PTR(-ESTALE);
+               }
                __iget(inode);
                spin_unlock(&inode->i_lock);
                return inode;
@@ -831,6 +835,10 @@ repeat:
                        __wait_on_freeing_inode(inode);
                        goto repeat;
                }
+               if (unlikely(inode->i_state & I_CREATING)) {
+                       spin_unlock(&inode->i_lock);
+                       return ERR_PTR(-ESTALE);
+               }
                __iget(inode);
                spin_unlock(&inode->i_lock);
                return inode;
@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode)
        lockdep_annotate_inode_mutex_key(inode);
        spin_lock(&inode->i_lock);
        WARN_ON(!(inode->i_state & I_NEW));
-       inode->i_state &= ~I_NEW;
+       inode->i_state &= ~I_NEW & ~I_CREATING;
        smp_mb();
        wake_up_bit(&inode->i_state, __I_NEW);
        spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(unlock_new_inode);
 
+void discard_new_inode(struct inode *inode)
+{
+       lockdep_annotate_inode_mutex_key(inode);
+       spin_lock(&inode->i_lock);
+       WARN_ON(!(inode->i_state & I_NEW));
+       inode->i_state &= ~I_NEW;
+       smp_mb();
+       wake_up_bit(&inode->i_state, __I_NEW);
+       spin_unlock(&inode->i_lock);
+       iput(inode);
+}
+EXPORT_SYMBOL(discard_new_inode);
+
 /**
  * lock_two_nondirectories - take two i_mutexes on non-directory objects
  *
@@ -1039,6 +1060,8 @@ again:
                 * Use the old inode instead of the preallocated one.
                 */
                spin_unlock(&inode_hash_lock);
+               if (IS_ERR(old))
+                       return NULL;
                wait_on_inode(old);
                if (unlikely(inode_unhashed(old))) {
                        iput(old);
@@ -1128,6 +1151,8 @@ again:
        inode = find_inode_fast(sb, head, ino);
        spin_unlock(&inode_hash_lock);
        if (inode) {
+               if (IS_ERR(inode))
+                       return NULL;
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
                        iput(inode);
@@ -1165,6 +1190,8 @@ again:
                 */
                spin_unlock(&inode_hash_lock);
                destroy_inode(inode);
+               if (IS_ERR(old))
+                       return NULL;
                inode = old;
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
@@ -1282,7 +1309,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
        inode = find_inode(sb, head, test, data);
        spin_unlock(&inode_hash_lock);
 
-       return inode;
+       return IS_ERR(inode) ? NULL : inode;
 }
 EXPORT_SYMBOL(ilookup5_nowait);
 
@@ -1338,6 +1365,8 @@ again:
        spin_unlock(&inode_hash_lock);
 
        if (inode) {
+               if (IS_ERR(inode))
+                       return NULL;
                wait_on_inode(inode);
                if (unlikely(inode_unhashed(inode))) {
                        iput(inode);
@@ -1421,12 +1450,17 @@ int insert_inode_locked(struct inode *inode)
                }
                if (likely(!old)) {
                        spin_lock(&inode->i_lock);
-                       inode->i_state |= I_NEW;
+                       inode->i_state |= I_NEW | I_CREATING;
                        hlist_add_head(&inode->i_hash, head);
                        spin_unlock(&inode->i_lock);
                        spin_unlock(&inode_hash_lock);
                        return 0;
                }
+               if (unlikely(old->i_state & I_CREATING)) {
+                       spin_unlock(&old->i_lock);
+                       spin_unlock(&inode_hash_lock);
+                       return -EBUSY;
+               }
                __iget(old);
                spin_unlock(&old->i_lock);
                spin_unlock(&inode_hash_lock);
@@ -1443,7 +1477,10 @@ EXPORT_SYMBOL(insert_inode_locked);
 int insert_inode_locked4(struct inode *inode, unsigned long hashval,
                int (*test)(struct inode *, void *), void *data)
 {
-       struct inode *old = inode_insert5(inode, hashval, test, NULL, data);
+       struct inode *old;
+
+       inode->i_state |= I_CREATING;
+       old = inode_insert5(inode, hashval, test, NULL, data);
 
        if (old != inode) {
                iput(old);
index 5c91108..a426005 100644 (file)
@@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  * I_OVL_INUSE         Used by overlayfs to get exclusive ownership on upper
  *                     and work dirs among overlayfs mounts.
  *
+ * I_CREATING          New object's inode in the middle of setting up.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC           (1 << 0)
@@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 #define __I_DIRTY_TIME_EXPIRED 12
 #define I_DIRTY_TIME_EXPIRED   (1 << __I_DIRTY_TIME_EXPIRED)
 #define I_WB_SWITCH            (1 << 13)
-#define I_OVL_INUSE                    (1 << 14)
+#define I_OVL_INUSE            (1 << 14)
+#define I_CREATING             (1 << 15)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
 static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
 #endif
 extern void unlock_new_inode(struct inode *);
+extern void discard_new_inode(struct inode *);
 extern unsigned int get_next_ino(void);
 extern void evict_inodes(struct super_block *sb);