Merge tag 'riscv-for-linus-4.20-mw3' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Nov 2018 16:17:39 +0000 (09:17 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Nov 2018 16:17:39 +0000 (09:17 -0700)
Pull RISC-V defconfig update from Palmer Dabbelt:
 "Sorry for the last minute patches, but it was suggested we try to push
  this in before rc1 to make it easier for people to keep their branch
  rebases sane"

* tag 'riscv-for-linus-4.20-mw3' of git://git.kernel.org/pub/scm/linux/kernel/git/palmer/riscv-linux:
  RISC-V: refresh defconfig

102 files changed:
Documentation/process/index.rst
Documentation/process/programming-language.rst [new file with mode: 0644]
MAINTAINERS
block/bio.c
drivers/auxdisplay/panel.c
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_receiver.c
drivers/block/loop.c
drivers/block/nbd.c
drivers/fsi/fsi-sbefifo.c
drivers/gpu/drm/amd/display/dc/os_types.h
drivers/isdn/mISDN/l1oip_core.c
drivers/misc/vmw_vmci/vmci_queue_pair.c
drivers/nvme/target/io-cmd-file.c
drivers/target/iscsi/iscsi_target_util.c
drivers/target/target_core_file.c
drivers/usb/usbip/usbip_common.c
drivers/xen/pvcalls-back.c
fs/9p/vfs_addr.c
fs/9p/vfs_dir.c
fs/9p/xattr.c
fs/afs/Kconfig
fs/afs/Makefile
fs/afs/addr_list.c
fs/afs/afs.h
fs/afs/cache.c
fs/afs/callback.c
fs/afs/cell.c
fs/afs/cmservice.c
fs/afs/dir.c
fs/afs/dynroot.c
fs/afs/file.c
fs/afs/flock.c
fs/afs/fs_probe.c [new file with mode: 0644]
fs/afs/fsclient.c
fs/afs/inode.c
fs/afs/internal.h
fs/afs/mntpt.c
fs/afs/proc.c
fs/afs/protocol_yfs.h [new file with mode: 0644]
fs/afs/rotate.c
fs/afs/rxrpc.c
fs/afs/security.c
fs/afs/server.c
fs/afs/server_list.c
fs/afs/super.c
fs/afs/vl_list.c [new file with mode: 0644]
fs/afs/vl_probe.c [new file with mode: 0644]
fs/afs/vl_rotate.c [new file with mode: 0644]
fs/afs/vlclient.c
fs/afs/volume.c
fs/afs/write.c
fs/afs/xattr.c
fs/afs/yfsclient.c [new file with mode: 0644]
fs/block_dev.c
fs/ceph/file.c
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/misc.c
fs/cifs/smb2ops.c
fs/cifs/smbdirect.c
fs/cifs/transport.c
fs/direct-io.c
fs/dlm/lowcomms.c
fs/exofs/super.c
fs/ext4/ext4.h
fs/ext4/ialloc.c
fs/ext4/namei.c
fs/fuse/file.c
fs/iomap.c
fs/nfsd/vfs.c
fs/ntfs/namei.c
fs/ocfs2/cluster/tcp.c
fs/orangefs/inode.c
fs/read_write.c
fs/splice.c
include/linux/compiler-clang.h
include/linux/compiler-gcc.h
include/linux/compiler-intel.h
include/linux/compiler.h
include/linux/compiler_attributes.h [new file with mode: 0644]
include/linux/compiler_types.h
include/linux/uio.h
include/trace/events/afs.h
lib/iov_iter.c
mm/filemap.c
mm/page_io.c
net/9p/client.c
net/9p/trans_virtio.c
net/bluetooth/6lowpan.c
net/bluetooth/a2mp.c
net/bluetooth/smp.c
net/ceph/messenger.c
net/netfilter/ipvs/ip_vs_sync.c
net/smc/smc_clc.c
net/socket.c
net/sunrpc/svcsock.c
net/sunrpc/xprtsock.c
net/tipc/topsrv.c
net/tls/tls_device.c
net/tls/tls_sw.c
scripts/Makefile.extrawarn

index 757808526d9a8bbb2197dc54a532ea766e5c4be3..878ebfda7eeff378a2fee48e3b361aa6b3587896 100644 (file)
@@ -25,6 +25,7 @@ Below are the essential guides that every developer should read.
    code-of-conduct-interpretation
    development-process
    submitting-patches
+   programming-language
    coding-style
    maintainer-pgp-guide
    email-clients
diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst
new file mode 100644 (file)
index 0000000..e5f5f06
--- /dev/null
@@ -0,0 +1,45 @@
+.. _programming_language:
+
+Programming Language
+====================
+
+The kernel is written in the C programming language [c-language]_.
+More precisely, the kernel is typically compiled with ``gcc`` [gcc]_
+under ``-std=gnu89`` [gcc-c-dialect-options]_: the GNU dialect of ISO C90
+(including some C99 features).
+
+This dialect contains many extensions to the language [gnu-extensions]_,
+and many of them are used within the kernel as a matter of course.
+
+There is some support for compiling the kernel with ``clang`` [clang]_
+and ``icc`` [icc]_ for several of the architectures, although at the time
+of writing it is not completed, requiring third-party patches.
+
+Attributes
+----------
+
+One of the common extensions used throughout the kernel are attributes
+[gcc-attribute-syntax]_. Attributes allow to introduce
+implementation-defined semantics to language entities (like variables,
+functions or types) without having to make significant syntactic changes
+to the language (e.g. adding a new keyword) [n2049]_.
+
+In some cases, attributes are optional (i.e. a compiler not supporting them
+should still produce proper code, even if it is slower or does not perform
+as many compile-time checks/diagnostics).
+
+The kernel defines pseudo-keywords (e.g. ``__pure``) instead of using
+directly the GNU attribute syntax (e.g. ``__attribute__((__pure__))``)
+in order to feature detect which ones can be used and/or to shorten the code.
+
+Please refer to ``include/linux/compiler_attributes.h`` for more information.
+
+.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [gcc] https://gcc.gnu.org
+.. [clang] https://clang.llvm.org
+.. [icc] https://software.intel.com/en-us/c-compilers
+.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
+
index bb97067d05680285393832a6bc72113978105133..f4855974f325063c1e5fea88d9d755ce75ee3b66 100644 (file)
@@ -3737,6 +3737,11 @@ L:       platform-driver-x86@vger.kernel.org
 S:     Maintained
 F:     drivers/platform/x86/compal-laptop.c
 
+COMPILER ATTRIBUTES
+M:     Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+S:     Maintained
+F:     include/linux/compiler_attributes.h
+
 CONEXANT ACCESSRUNNER USB DRIVER
 L:     accessrunner-general@lists.sourceforge.net
 W:     http://accessrunner.sourceforge.net/
index bbfeb4ee2892fcbd9d51de450c41fab7dc466ce5..c27f77befbacc70a36e194f5f2427e20cde123af 100644 (file)
@@ -1256,7 +1256,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
        /*
         * success
         */
-       if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
+       if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
            (map_data && map_data->from_user)) {
                ret = bio_copy_from_iter(bio, iter);
                if (ret)
index 3b25a643058c9dde38511d646da8700e53c46837..21b9b2f2470a26d1f2d1c2d5eb4237fe3902af82 100644 (file)
@@ -155,10 +155,9 @@ struct logical_input {
                        int release_data;
                } std;
                struct {        /* valid when type == INPUT_TYPE_KBD */
-                       /* strings can be non null-terminated */
-                       char press_str[sizeof(void *) + sizeof(int)];
-                       char repeat_str[sizeof(void *) + sizeof(int)];
-                       char release_str[sizeof(void *) + sizeof(int)];
+                       char press_str[sizeof(void *) + sizeof(int)] __nonstring;
+                       char repeat_str[sizeof(void *) + sizeof(int)] __nonstring;
+                       char release_str[sizeof(void *) + sizeof(int)] __nonstring;
                } kbd;
        } u;
 };
index 55fd104f1ed4b91cf36b0d6cb1c8b9270443507a..fa8204214ac027adf660db960d4297d3f0cca7bb 100644 (file)
@@ -1856,7 +1856,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 
        /* THINK  if (signal_pending) return ... ? */
 
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+       iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
 
        if (sock == connection->data.socket) {
                rcu_read_lock();
index fc67fd853375c033a253753c02b8a3a8c23df4b2..61c392752fe4bbfeba5b1b404bf64cff7e9d8b00 100644 (file)
@@ -516,7 +516,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag
        struct msghdr msg = {
                .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
        };
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size);
+       iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
        return sock_recvmsg(sock, &msg, msg.msg_flags);
 }
 
index abad6d15f956343ff86ad45d0f40ff4c7faae50b..e6273ae85246029358272406bfb00cca07226455 100644 (file)
@@ -269,7 +269,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
        struct iov_iter i;
        ssize_t bw;
 
-       iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
+       iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
 
        file_start_write(file);
        bw = vfs_iter_write(file, &i, ppos, 0);
@@ -347,7 +347,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
        ssize_t len;
 
        rq_for_each_segment(bvec, rq, iter) {
-               iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
+               iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
                len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
                if (len < 0)
                        return len;
@@ -388,7 +388,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
                b.bv_offset = 0;
                b.bv_len = bvec.bv_len;
 
-               iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
+               iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
                len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
                if (len < 0) {
                        ret = len;
@@ -555,8 +555,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
        }
        atomic_set(&cmd->ref, 2);
 
-       iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
-                     segments, blk_rq_bytes(rq));
+       iov_iter_bvec(&iter, rw, bvec, segments, blk_rq_bytes(rq));
        iter.iov_offset = offset;
 
        cmd->iocb.ki_pos = pos;
index 14a51254c3db7f19c94cdab62e1d9e192c7ae02f..4d4d6129ff6627f1249cade3101d2927d7db5a25 100644 (file)
@@ -473,7 +473,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
        u32 nbd_cmd_flags = 0;
        int sent = nsock->sent, skip = 0;
 
-       iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+       iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
 
        switch (req_op(req)) {
        case REQ_OP_DISCARD:
@@ -564,8 +564,7 @@ send_pages:
 
                        dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
                                req, bvec.bv_len);
-                       iov_iter_bvec(&from, ITER_BVEC | WRITE,
-                                     &bvec, 1, bvec.bv_len);
+                       iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
                        if (skip) {
                                if (skip >= iov_iter_count(&from)) {
                                        skip -= iov_iter_count(&from);
@@ -624,7 +623,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
        int ret = 0;
 
        reply.magic = 0;
-       iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+       iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
        result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
        if (result <= 0) {
                if (!nbd_disconnected(config))
@@ -678,8 +677,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
                struct bio_vec bvec;
 
                rq_for_each_segment(bvec, req, iter) {
-                       iov_iter_bvec(&to, ITER_BVEC | READ,
-                                     &bvec, 1, bvec.bv_len);
+                       iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
                        result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
                        if (result <= 0) {
                                dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
@@ -1073,7 +1071,7 @@ static void send_disconnects(struct nbd_device *nbd)
        for (i = 0; i < config->num_connections; i++) {
                struct nbd_sock *nsock = config->socks[i];
 
-               iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+               iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
                mutex_lock(&nsock->tx_lock);
                ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
                if (ret <= 0)
index ae861342626e3516527167b18d96a7f5ac522169..d92f5b87c251e1248e20fd44708a4d80f759bc00 100644 (file)
@@ -638,7 +638,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo)
        }
         ffdc_iov.iov_base = ffdc;
        ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE;
-        iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
+        iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
        cmd[0] = cpu_to_be32(2);
        cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC);
        rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter);
@@ -735,7 +735,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
        rbytes = (*resp_len) * sizeof(__be32);
        resp_iov.iov_base = response;
        resp_iov.iov_len = rbytes;
-        iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes);
+        iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes);
 
        /* Perform the command */
        mutex_lock(&sbefifo->lock);
index a407892905af29661a70ad75a6c76c5d502163c3..c0d9f332baedc10d71e701b5f19e254d8b7af40d 100644 (file)
@@ -40,8 +40,6 @@
 #define LITTLEENDIAN_CPU
 #endif
 
-#undef READ
-#undef WRITE
 #undef FRAME_SIZE
 
 #define dm_output_to_console(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__)
index b05022f94f18c22ff2c72fa71cf11984077152b7..072bb5e36c184e6945624ec07dc27595fe8e7a7d 100644 (file)
@@ -718,8 +718,7 @@ l1oip_socket_thread(void *data)
                printk(KERN_DEBUG "%s: socket created and open\n",
                       __func__);
        while (!signal_pending(current)) {
-               iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1,
-                               recvbuf_size);
+               iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, recvbuf_size);
                recvlen = sock_recvmsg(socket, &msg, 0);
                if (recvlen > 0) {
                        l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen);
index bd52f29b4a4e273eb6f69a56c34a0789279d2226..264f4ed8eef26e8f839d121d905497c3c75de67c 100644 (file)
@@ -3030,7 +3030,7 @@ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
        if (!qpair || !buf)
                return VMCI_ERROR_INVALID_ARGS;
 
-       iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size);
+       iov_iter_kvec(&from, WRITE, &v, 1, buf_size);
 
        qp_lock(qpair);
 
@@ -3074,7 +3074,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
        if (!qpair || !buf)
                return VMCI_ERROR_INVALID_ARGS;
 
-       iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+       iov_iter_kvec(&to, READ, &v, 1, buf_size);
 
        qp_lock(qpair);
 
@@ -3119,7 +3119,7 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
        if (!qpair || !buf)
                return VMCI_ERROR_INVALID_ARGS;
 
-       iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+       iov_iter_kvec(&to, READ, &v, 1, buf_size);
 
        qp_lock(qpair);
 
index 39d972e2595f0dc764f2a5ac37d589422068c139..01feebec29ea2d671d2d110a8a4df7148cc2f616 100644 (file)
@@ -101,7 +101,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
                rw = READ;
        }
 
-       iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+       iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
 
        iocb->ki_pos = pos;
        iocb->ki_filp = req->ns->file;
index 1227872227dc446b70b492600f52214c256098ad..36b742932c724c8e6f80ca29cb36d0d20208a9a7 100644 (file)
@@ -1245,8 +1245,7 @@ static int iscsit_do_rx_data(
                return -1;
 
        memset(&msg, 0, sizeof(struct msghdr));
-       iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC,
-                     count->iov, count->iov_count, data);
+       iov_iter_kvec(&msg.msg_iter, READ, count->iov, count->iov_count, data);
 
        while (msg_data_left(&msg)) {
                rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL);
@@ -1302,8 +1301,7 @@ int tx_data(
 
        memset(&msg, 0, sizeof(struct msghdr));
 
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
-                     iov, iov_count, data);
+       iov_iter_kvec(&msg.msg_iter, WRITE, iov, iov_count, data);
 
        while (msg_data_left(&msg)) {
                int tx_loop = sock_sendmsg(conn->sock, &msg);
index 16751ae55d7b6f64b2bc9551f588f8b0e5d06cb8..49b110d1b972b671b17f0e1e1dab588628155bc8 100644 (file)
@@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                len += sg->length;
        }
 
-       iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len);
+       iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len);
 
        aio_cmd->cmd = cmd;
        aio_cmd->len = len;
@@ -353,7 +353,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
                len += sg->length;
        }
 
-       iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
+       iov_iter_bvec(&iter, READ, bvec, sgl_nents, len);
        if (is_write)
                ret = vfs_iter_write(fd, &iter, &pos, 0);
        else
@@ -490,7 +490,7 @@ fd_execute_write_same(struct se_cmd *cmd)
                len += se_dev->dev_attrib.block_size;
        }
 
-       iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
+       iov_iter_bvec(&iter, READ, bvec, nolb, len);
        ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);
 
        kfree(bvec);
index 9756752c0681f99c2acb1aaf1213ea23e03055f9..45da3e01c7b03ae04b3c889e73d140baa2c9dc7f 100644 (file)
@@ -309,7 +309,7 @@ int usbip_recv(struct socket *sock, void *buf, int size)
        if (!sock || !buf || !size)
                return -EINVAL;
 
-       iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
+       iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
 
        usbip_dbg_xmit("enter\n");
 
index b1092fbefa6309d2535b17b78979b6f3fa9b2b42..2e5d845b5091478252dfb1ed17395cf9c5ce870a 100644 (file)
@@ -137,13 +137,13 @@ static void pvcalls_conn_back_read(void *opaque)
        if (masked_prod < masked_cons) {
                vec[0].iov_base = data->in + masked_prod;
                vec[0].iov_len = wanted;
-               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted);
+               iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted);
        } else {
                vec[0].iov_base = data->in + masked_prod;
                vec[0].iov_len = array_size - masked_prod;
                vec[1].iov_base = data->in;
                vec[1].iov_len = wanted - vec[0].iov_len;
-               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted);
+               iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted);
        }
 
        atomic_set(&map->read, 0);
@@ -195,13 +195,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
        if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) {
                vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
                vec[0].iov_len = size;
-               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size);
+               iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size);
        } else {
                vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
                vec[0].iov_len = array_size - pvcalls_mask(cons, array_size);
                vec[1].iov_base = data->out;
                vec[1].iov_len = size - vec[0].iov_len;
-               iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size);
+               iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size);
        }
 
        atomic_set(&map->write, 0);
index e1cbdfdb7c684fd24fdb6f25ee03f4e253e9ef58..0bcbcc20f76954e4e8e3d959628e87f3915b3ce3 100644 (file)
@@ -65,7 +65,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
        if (retval == 0)
                return retval;
 
-       iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
+       iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE);
 
        retval = p9_client_read(fid, page_offset(page), &to, &err);
        if (err) {
@@ -175,7 +175,7 @@ static int v9fs_vfs_writepage_locked(struct page *page)
        bvec.bv_page = page;
        bvec.bv_offset = 0;
        bvec.bv_len = len;
-       iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
+       iov_iter_bvec(&from, WRITE, &bvec, 1, len);
 
        /* We should have writeback_fid always set */
        BUG_ON(!v9inode->writeback_fid);
index cb6c4031af552b010c2e8ff5469b9088c769b3b7..00745147329dc9b6876404011378a8a29ef1791c 100644 (file)
@@ -123,7 +123,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
                if (rdir->tail == rdir->head) {
                        struct iov_iter to;
                        int n;
-                       iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
+                       iov_iter_kvec(&to, READ, &kvec, 1, buflen);
                        n = p9_client_read(file->private_data, ctx->pos, &to,
                                           &err);
                        if (err)
index 352abc39e891a1468d3576cc199fa13b089f32f7..ac8ff8ca4c115fa0ae52ec4c8fcbe47499780d94 100644 (file)
@@ -32,7 +32,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
        struct iov_iter to;
        int err;
 
-       iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
+       iov_iter_kvec(&to, READ, &kvec, 1, buffer_size);
 
        attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
        if (IS_ERR(attr_fid)) {
@@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
        struct iov_iter from;
        int retval, err;
 
-       iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
+       iov_iter_kvec(&from, WRITE, &kvec, 1, value_len);
 
        p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
                 name, value_len, flags);
index ebba3b18e5da6dd5ff7d27cd248218937ab19a0f..701aaa9b18994a1e789adad6e7544817c320bc2b 100644 (file)
@@ -27,3 +27,15 @@ config AFS_FSCACHE
        help
          Say Y here if you want AFS data to be cached locally on disk through
          the generic filesystem cache manager
+
+config AFS_DEBUG_CURSOR
+       bool "AFS server cursor debugging"
+       depends on AFS_FS
+       help
+         Say Y here to cause the contents of a server cursor to be dumped to
+         the dmesg log if the server rotation algorithm fails to successfully
+         contact a server.
+
+         See <file:Documentation/filesystems/afs.txt> for more information.
+
+         If unsure, say N.
index 546874057bd3594bd0997d37b8801fd866461077..0738e2bf51936ed1e45ee84a0ed32a61e96c35f6 100644 (file)
@@ -17,6 +17,7 @@ kafs-y := \
        file.o \
        flock.o \
        fsclient.o \
+       fs_probe.o \
        inode.o \
        main.o \
        misc.o \
@@ -29,9 +30,13 @@ kafs-y := \
        super.o \
        netdevices.o \
        vlclient.o \
+       vl_list.o \
+       vl_probe.o \
+       vl_rotate.o \
        volume.o \
        write.o \
-       xattr.o
+       xattr.o \
+       yfsclient.o
 
 kafs-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_AFS_FS)  := kafs.o
index 55a756c60746ca7924625b99b0c2a42d429119fa..967db336d11ae016324f4f15d7cbd33b809045c2 100644 (file)
@@ -64,19 +64,25 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
 /*
  * Parse a text string consisting of delimited addresses.
  */
-struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
-                                          char delim,
-                                          unsigned short service,
-                                          unsigned short port)
+struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
+                                              const char *text, size_t len,
+                                              char delim,
+                                              unsigned short service,
+                                              unsigned short port)
 {
+       struct afs_vlserver_list *vllist;
        struct afs_addr_list *alist;
        const char *p, *end = text + len;
+       const char *problem;
        unsigned int nr = 0;
+       int ret = -ENOMEM;
 
        _enter("%*.*s,%c", (int)len, (int)len, text, delim);
 
-       if (!len)
+       if (!len) {
+               _leave(" = -EDESTADDRREQ [empty]");
                return ERR_PTR(-EDESTADDRREQ);
+       }
 
        if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
                delim = ',';
@@ -84,18 +90,24 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
        /* Count the addresses */
        p = text;
        do {
-               if (!*p)
-                       return ERR_PTR(-EINVAL);
+               if (!*p) {
+                       problem = "nul";
+                       goto inval;
+               }
                if (*p == delim)
                        continue;
                nr++;
                if (*p == '[') {
                        p++;
-                       if (p == end)
-                               return ERR_PTR(-EINVAL);
+                       if (p == end) {
+                               problem = "brace1";
+                               goto inval;
+                       }
                        p = memchr(p, ']', end - p);
-                       if (!p)
-                               return ERR_PTR(-EINVAL);
+                       if (!p) {
+                               problem = "brace2";
+                               goto inval;
+                       }
                        p++;
                        if (p >= end)
                                break;
@@ -109,10 +121,19 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 
        _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
 
-       alist = afs_alloc_addrlist(nr, service, port);
-       if (!alist)
+       vllist = afs_alloc_vlserver_list(1);
+       if (!vllist)
                return ERR_PTR(-ENOMEM);
 
+       vllist->nr_servers = 1;
+       vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT);
+       if (!vllist->servers[0].server)
+               goto error_vl;
+
+       alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
+       if (!alist)
+               goto error;
+
        /* Extract the addresses */
        p = text;
        do {
@@ -135,17 +156,21 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
                                        break;
                }
 
-               if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop))
+               if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) {
                        family = AF_INET;
-               else if (in6_pton(p, q - p, (u8 *)x, -1, &stop))
+               } else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) {
                        family = AF_INET6;
-               else
+               } else {
+                       problem = "family";
                        goto bad_address;
+               }
 
-               if (stop != q)
+               p = q;
+               if (stop != p) {
+                       problem = "nostop";
                        goto bad_address;
+               }
 
-               p = q;
                if (q < end && *q == ']')
                        p++;
 
@@ -154,18 +179,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
                                /* Port number specification "+1234" */
                                xport = 0;
                                p++;
-                               if (p >= end || !isdigit(*p))
+                               if (p >= end || !isdigit(*p)) {
+                                       problem = "port";
                                        goto bad_address;
+                               }
                                do {
                                        xport *= 10;
                                        xport += *p - '0';
-                                       if (xport > 65535)
+                                       if (xport > 65535) {
+                                               problem = "pval";
                                                goto bad_address;
+                                       }
                                        p++;
                                } while (p < end && isdigit(*p));
                        } else if (*p == delim) {
                                p++;
                        } else {
+                               problem = "weird";
                                goto bad_address;
                        }
                }
@@ -177,12 +207,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 
        } while (p < end);
 
+       rcu_assign_pointer(vllist->servers[0].server->addresses, alist);
        _leave(" = [nr %u]", alist->nr_addrs);
-       return alist;
+       return vllist;
 
-bad_address:
-       kfree(alist);
+inval:
+       _leave(" = -EINVAL [%s %zu %*.*s]",
+              problem, p - text, (int)len, (int)len, text);
        return ERR_PTR(-EINVAL);
+bad_address:
+       _leave(" = -EINVAL [%s %zu %*.*s]",
+              problem, p - text, (int)len, (int)len, text);
+       ret = -EINVAL;
+error:
+       afs_put_addrlist(alist);
+error_vl:
+       afs_put_vlserverlist(net, vllist);
+       return ERR_PTR(ret);
 }
 
 /*
@@ -201,30 +242,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1,
 /*
  * Perform a DNS query for VL servers and build a up an address list.
  */
-struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
 {
-       struct afs_addr_list *alist;
-       char *vllist = NULL;
+       struct afs_vlserver_list *vllist;
+       char *result = NULL;
        int ret;
 
        _enter("%s", cell->name);
 
-       ret = dns_query("afsdb", cell->name, cell->name_len,
-                       "", &vllist, _expiry);
-       if (ret < 0)
+       ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
+                       &result, _expiry);
+       if (ret < 0) {
+               _leave(" = %d [dns]", ret);
                return ERR_PTR(ret);
-
-       alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
-                                    VL_SERVICE, AFS_VL_PORT);
-       if (IS_ERR(alist)) {
-               kfree(vllist);
-               if (alist != ERR_PTR(-ENOMEM))
-                       pr_err("Failed to parse DNS data\n");
-               return alist;
        }
 
-       kfree(vllist);
-       return alist;
+       if (*_expiry == 0)
+               *_expiry = ktime_get_real_seconds() + 60;
+
+       if (ret > 1 && result[0] == 0)
+               vllist = afs_extract_vlserver_list(cell, result, ret);
+       else
+               vllist = afs_parse_text_addrs(cell->net, result, ret, ',',
+                                             VL_SERVICE, AFS_VL_PORT);
+       kfree(result);
+       if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM))
+               pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist));
+
+       return vllist;
 }
 
 /*
@@ -258,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
                        sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
 
        srx = &alist->addrs[i];
+       srx->srx_family = AF_RXRPC;
+       srx->transport_type = SOCK_DGRAM;
        srx->transport_len = sizeof(srx->transport.sin);
        srx->transport.sin.sin_family = AF_INET;
        srx->transport.sin.sin_port = htons(port);
@@ -296,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
                        sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
 
        srx = &alist->addrs[i];
+       srx->srx_family = AF_RXRPC;
+       srx->transport_type = SOCK_DGRAM;
        srx->transport_len = sizeof(srx->transport.sin6);
        srx->transport.sin6.sin6_family = AF_INET6;
        srx->transport.sin6.sin6_port = htons(port);
@@ -308,25 +357,33 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
  */
 bool afs_iterate_addresses(struct afs_addr_cursor *ac)
 {
-       _enter("%hu+%hd", ac->start, (short)ac->index);
+       unsigned long set, failed;
+       int index;
 
        if (!ac->alist)
                return false;
 
-       if (ac->begun) {
-               ac->index++;
-               if (ac->index == ac->alist->nr_addrs)
-                       ac->index = 0;
+       set = ac->alist->responded;
+       failed = ac->alist->failed;
+       _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
 
-               if (ac->index == ac->start) {
-                       ac->error = -EDESTADDRREQ;
-                       return false;
-               }
-       }
+       ac->nr_iterations++;
+
+       set &= ~(failed | ac->tried);
+
+       if (!set)
+               return false;
 
-       ac->begun = true;
+       index = READ_ONCE(ac->alist->preferred);
+       if (test_bit(index, &set))
+               goto selected;
+
+       index = __ffs(set);
+
+selected:
+       ac->index = index;
+       set_bit(index, &ac->tried);
        ac->responded = false;
-       ac->addr = &ac->alist->addrs[ac->index];
        return true;
 }
 
@@ -339,53 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
 
        alist = ac->alist;
        if (alist) {
-               if (ac->responded && ac->index != ac->start)
-                       WRITE_ONCE(alist->index, ac->index);
+               if (ac->responded &&
+                   ac->index != alist->preferred &&
+                   test_bit(ac->alist->preferred, &ac->tried))
+                       WRITE_ONCE(alist->preferred, ac->index);
                afs_put_addrlist(alist);
+               ac->alist = NULL;
        }
 
-       ac->addr = NULL;
-       ac->alist = NULL;
-       ac->begun = false;
        return ac->error;
 }
-
-/*
- * Set the address cursor for iterating over VL servers.
- */
-int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
-{
-       struct afs_addr_list *alist;
-       int ret;
-
-       if (!rcu_access_pointer(cell->vl_addrs)) {
-               ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
-                                 TASK_INTERRUPTIBLE);
-               if (ret < 0)
-                       return ret;
-
-               if (!rcu_access_pointer(cell->vl_addrs) &&
-                   ktime_get_real_seconds() < cell->dns_expiry)
-                       return cell->error;
-       }
-
-       read_lock(&cell->vl_addrs_lock);
-       alist = rcu_dereference_protected(cell->vl_addrs,
-                                         lockdep_is_held(&cell->vl_addrs_lock));
-       if (alist->nr_addrs > 0)
-               afs_get_addrlist(alist);
-       else
-               alist = NULL;
-       read_unlock(&cell->vl_addrs_lock);
-
-       if (!alist)
-               return -EDESTADDRREQ;
-
-       ac->alist = alist;
-       ac->addr = NULL;
-       ac->start = READ_ONCE(alist->index);
-       ac->index = ac->start;
-       ac->error = 0;
-       ac->begun = false;
-       return 0;
-}
index b4ff1f7ae4ab048a345bdbfae6ea895e31299abb..d12ffb457e4745809460707c02176d2e4a657e4b 100644 (file)
@@ -23,9 +23,9 @@
 #define AFSPATHMAX             1024    /* Maximum length of a pathname plus NUL */
 #define AFSOPAQUEMAX           1024    /* Maximum length of an opaque field */
 
-typedef unsigned                       afs_volid_t;
-typedef unsigned                       afs_vnodeid_t;
-typedef unsigned long long             afs_dataversion_t;
+typedef u64                    afs_volid_t;
+typedef u64                    afs_vnodeid_t;
+typedef u64                    afs_dataversion_t;
 
 typedef enum {
        AFSVL_RWVOL,                    /* read/write volume */
@@ -52,8 +52,9 @@ typedef enum {
  */
 struct afs_fid {
        afs_volid_t     vid;            /* volume ID */
-       afs_vnodeid_t   vnode;          /* file index within volume */
-       unsigned        unique;         /* unique ID number (file index version) */
+       afs_vnodeid_t   vnode;          /* Lower 64-bits of file index within volume */
+       u32             vnode_hi;       /* Upper 32-bits of file index */
+       u32             unique;         /* unique ID number (file index version) */
 };
 
 /*
@@ -67,14 +68,14 @@ typedef enum {
 } afs_callback_type_t;
 
 struct afs_callback {
+       time64_t                expires_at;     /* Time at which expires */
        unsigned                version;        /* Callback version */
-       unsigned                expiry;         /* Time at which expires */
        afs_callback_type_t     type;           /* Type of callback */
 };
 
 struct afs_callback_break {
        struct afs_fid          fid;            /* File identifier */
-       struct afs_callback     cb;             /* Callback details */
+       //struct afs_callback   cb;             /* Callback details */
 };
 
 #define AFSCBMAX 50    /* maximum callbacks transferred per bulk op */
@@ -129,19 +130,18 @@ typedef u32 afs_access_t;
 struct afs_file_status {
        u64                     size;           /* file size */
        afs_dataversion_t       data_version;   /* current data version */
-       time_t                  mtime_client;   /* last time client changed data */
-       time_t                  mtime_server;   /* last time server changed data */
-       unsigned                abort_code;     /* Abort if bulk-fetching this failed */
-
-       afs_file_type_t         type;           /* file type */
-       unsigned                nlink;          /* link count */
-       u32                     author;         /* author ID */
-       u32                     owner;          /* owner ID */
-       u32                     group;          /* group ID */
+       struct timespec64       mtime_client;   /* Last time client changed data */
+       struct timespec64       mtime_server;   /* Last time server changed data */
+       s64                     author;         /* author ID */
+       s64                     owner;          /* owner ID */
+       s64                     group;          /* group ID */
        afs_access_t            caller_access;  /* access rights for authenticated caller */
        afs_access_t            anon_access;    /* access rights for unauthenticated caller */
        umode_t                 mode;           /* UNIX mode */
+       afs_file_type_t         type;           /* file type */
+       u32                     nlink;          /* link count */
        s32                     lock_count;     /* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
+       u32                     abort_code;     /* Abort if bulk-fetching this failed */
 };
 
 /*
@@ -158,25 +158,27 @@ struct afs_file_status {
  * AFS volume synchronisation information
  */
 struct afs_volsync {
-       time_t                  creation;       /* volume creation time */
+       time64_t                creation;       /* volume creation time */
 };
 
 /*
  * AFS volume status record
  */
 struct afs_volume_status {
-       u32                     vid;            /* volume ID */
-       u32                     parent_id;      /* parent volume ID */
+       afs_volid_t             vid;            /* volume ID */
+       afs_volid_t             parent_id;      /* parent volume ID */
        u8                      online;         /* true if volume currently online and available */
        u8                      in_service;     /* true if volume currently in service */
        u8                      blessed;        /* same as in_service */
        u8                      needs_salvage;  /* true if consistency checking required */
        u32                     type;           /* volume type (afs_voltype_t) */
-       u32                     min_quota;      /* minimum space set aside (blocks) */
-       u32                     max_quota;      /* maximum space this volume may occupy (blocks) */
-       u32                     blocks_in_use;  /* space this volume currently occupies (blocks) */
-       u32                     part_blocks_avail; /* space available in volume's partition */
-       u32                     part_max_blocks; /* size of volume's partition */
+       u64                     min_quota;      /* minimum space set aside (blocks) */
+       u64                     max_quota;      /* maximum space this volume may occupy (blocks) */
+       u64                     blocks_in_use;  /* space this volume currently occupies (blocks) */
+       u64                     part_blocks_avail; /* space available in volume's partition */
+       u64                     part_max_blocks; /* size of volume's partition */
+       s64                     vol_copy_date;
+       s64                     vol_backup_date;
 };
 
 #define AFS_BLOCK_SIZE 1024
index b1c31ec4523a897b0142ba8699ff48ac10f2d801..f6d0a21e8052f066482def18a8740c32032b731d 100644 (file)
@@ -49,7 +49,7 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
        struct afs_vnode *vnode = cookie_netfs_data;
        struct afs_vnode_cache_aux aux;
 
-       _enter("{%x,%x,%llx},%p,%u",
+       _enter("{%llx,%x,%llx},%p,%u",
               vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
               buffer, buflen);
 
index 5f261fbf2182b22a47fc93b7c6fee35f113e0097..1c7955f5cdaf2e776026390f615806f3e6ce535c 100644 (file)
@@ -210,12 +210,10 @@ void afs_init_callback_state(struct afs_server *server)
 /*
  * actually break a callback
  */
-void afs_break_callback(struct afs_vnode *vnode)
+void __afs_break_callback(struct afs_vnode *vnode)
 {
        _enter("");
 
-       write_seqlock(&vnode->cb_lock);
-
        clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
        if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
                vnode->cb_break++;
@@ -230,7 +228,12 @@ void afs_break_callback(struct afs_vnode *vnode)
                        afs_lock_may_be_available(vnode);
                spin_unlock(&vnode->lock);
        }
+}
 
+void afs_break_callback(struct afs_vnode *vnode)
+{
+       write_seqlock(&vnode->cb_lock);
+       __afs_break_callback(vnode);
        write_sequnlock(&vnode->cb_lock);
 }
 
@@ -310,14 +313,10 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
        /* TODO: Sort the callback break list by volume ID */
 
        for (; count > 0; callbacks++, count--) {
-               _debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
+               _debug("- Fid { vl=%08llx n=%llu u=%u }",
                       callbacks->fid.vid,
                       callbacks->fid.vnode,
-                      callbacks->fid.unique,
-                      callbacks->cb.version,
-                      callbacks->cb.expiry,
-                      callbacks->cb.type
-                      );
+                      callbacks->fid.unique);
                afs_break_one_callback(server, &callbacks->fid);
        }
 
index 6127f0fcd62c4e376bd2554c1003aedb40aab471..cf445dbd5f2e05d4c716dadb3123fb397537d4e6 100644 (file)
@@ -20,6 +20,8 @@
 #include "internal.h"
 
 static unsigned __read_mostly afs_cell_gc_delay = 10;
+static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
+static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
 
 static void afs_manage_cell(struct work_struct *);
 
@@ -119,7 +121,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
  */
 static struct afs_cell *afs_alloc_cell(struct afs_net *net,
                                       const char *name, unsigned int namelen,
-                                      const char *vllist)
+                                      const char *addresses)
 {
        struct afs_cell *cell;
        int i, ret;
@@ -134,7 +136,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
        if (namelen == 5 && memcmp(name, "@cell", 5) == 0)
                return ERR_PTR(-EINVAL);
 
-       _enter("%*.*s,%s", namelen, namelen, name, vllist);
+       _enter("%*.*s,%s", namelen, namelen, name, addresses);
 
        cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
        if (!cell) {
@@ -153,23 +155,26 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
                       (1 << AFS_CELL_FL_NO_LOOKUP_YET));
        INIT_LIST_HEAD(&cell->proc_volumes);
        rwlock_init(&cell->proc_lock);
-       rwlock_init(&cell->vl_addrs_lock);
+       rwlock_init(&cell->vl_servers_lock);
 
        /* Fill in the VL server list if we were given a list of addresses to
         * use.
         */
-       if (vllist) {
-               struct afs_addr_list *alist;
-
-               alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
-                                            VL_SERVICE, AFS_VL_PORT);
-               if (IS_ERR(alist)) {
-                       ret = PTR_ERR(alist);
+       if (addresses) {
+               struct afs_vlserver_list *vllist;
+
+               vllist = afs_parse_text_addrs(net,
+                                             addresses, strlen(addresses), ':',
+                                             VL_SERVICE, AFS_VL_PORT);
+               if (IS_ERR(vllist)) {
+                       ret = PTR_ERR(vllist);
                        goto parse_failed;
                }
 
-               rcu_assign_pointer(cell->vl_addrs, alist);
+               rcu_assign_pointer(cell->vl_servers, vllist);
                cell->dns_expiry = TIME64_MAX;
+       } else {
+               cell->dns_expiry = ktime_get_real_seconds();
        }
 
        _leave(" = %p", cell);
@@ -356,26 +361,40 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
  */
 static void afs_update_cell(struct afs_cell *cell)
 {
-       struct afs_addr_list *alist, *old;
-       time64_t now, expiry;
+       struct afs_vlserver_list *vllist, *old;
+       unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
+       unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
+       time64_t now, expiry = 0;
 
        _enter("%s", cell->name);
 
-       alist = afs_dns_query(cell, &expiry);
-       if (IS_ERR(alist)) {
-               switch (PTR_ERR(alist)) {
+       vllist = afs_dns_query(cell, &expiry);
+
+       now = ktime_get_real_seconds();
+       if (min_ttl > max_ttl)
+               max_ttl = min_ttl;
+       if (expiry < now + min_ttl)
+               expiry = now + min_ttl;
+       else if (expiry > now + max_ttl)
+               expiry = now + max_ttl;
+
+       if (IS_ERR(vllist)) {
+               switch (PTR_ERR(vllist)) {
                case -ENODATA:
-                       /* The DNS said that the cell does not exist */
+               case -EDESTADDRREQ:
+                       /* The DNS said that the cell does not exist or there
+                        * weren't any addresses to be had.
+                        */
                        set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
                        clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-                       cell->dns_expiry = ktime_get_real_seconds() + 61;
+                       cell->dns_expiry = expiry;
                        break;
 
                case -EAGAIN:
                case -ECONNREFUSED:
                default:
                        set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-                       cell->dns_expiry = ktime_get_real_seconds() + 10;
+                       cell->dns_expiry = now + 10;
                        break;
                }
 
@@ -387,12 +406,12 @@ static void afs_update_cell(struct afs_cell *cell)
                /* Exclusion on changing vl_addrs is achieved by a
                 * non-reentrant work item.
                 */
-               old = rcu_dereference_protected(cell->vl_addrs, true);
-               rcu_assign_pointer(cell->vl_addrs, alist);
+               old = rcu_dereference_protected(cell->vl_servers, true);
+               rcu_assign_pointer(cell->vl_servers, vllist);
                cell->dns_expiry = expiry;
 
                if (old)
-                       afs_put_addrlist(old);
+                       afs_put_vlserverlist(cell->net, old);
        }
 
        if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
@@ -414,7 +433,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
 
        ASSERTCMP(atomic_read(&cell->usage), ==, 0);
 
-       afs_put_addrlist(rcu_access_pointer(cell->vl_addrs));
+       afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
        key_put(cell->anonymous_key);
        kfree(cell);
 
index 9e51d6fe7e8f975f34f877217a28a8e99bcfa5e4..8ee5972893ed5a75583bfb2821a42636403ee086 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/ip.h>
 #include "internal.h"
 #include "afs_cm.h"
+#include "protocol_yfs.h"
 
 static int afs_deliver_cb_init_call_back_state(struct afs_call *);
 static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
@@ -30,6 +31,8 @@ static void SRXAFSCB_Probe(struct work_struct *);
 static void SRXAFSCB_ProbeUuid(struct work_struct *);
 static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
 
+static int afs_deliver_yfs_cb_callback(struct afs_call *);
+
 #define CM_NAME(name) \
        const char afs_SRXCB##name##_name[] __tracepoint_string =       \
                "CB." #name
@@ -100,13 +103,26 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
        .work           = SRXAFSCB_TellMeAboutYourself,
 };
 
+/*
+ * YFS CB.CallBack operation type
+ */
+static CM_NAME(YFS_CallBack);
+static const struct afs_call_type afs_SRXYFSCB_CallBack = {
+       .name           = afs_SRXCBYFS_CallBack_name,
+       .deliver        = afs_deliver_yfs_cb_callback,
+       .destructor     = afs_cm_destructor,
+       .work           = SRXAFSCB_CallBack,
+};
+
 /*
  * route an incoming cache manager call
  * - return T if supported, F if not
  */
 bool afs_cm_incoming_call(struct afs_call *call)
 {
-       _enter("{CB.OP %u}", call->operation_ID);
+       _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
+
+       call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);
 
        switch (call->operation_ID) {
        case CBCallBack:
@@ -127,11 +143,101 @@ bool afs_cm_incoming_call(struct afs_call *call)
        case CBTellMeAboutYourself:
                call->type = &afs_SRXCBTellMeAboutYourself;
                return true;
+       case YFSCBCallBack:
+               if (call->service_id != YFS_CM_SERVICE)
+                       return false;
+               call->type = &afs_SRXYFSCB_CallBack;
+               return true;
        default:
                return false;
        }
 }
 
+/*
+ * Record a probe to the cache manager from a server.
+ */
+static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
+{
+       _enter("");
+
+       if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
+           !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
+               if (server->cm_epoch == call->epoch)
+                       return 0;
+
+               if (!server->probe.said_rebooted) {
+                       pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
+                       server->probe.said_rebooted = true;
+               }
+       }
+
+       spin_lock(&server->probe_lock);
+
+       if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
+               server->cm_epoch = call->epoch;
+               server->probe.cm_epoch = call->epoch;
+               goto out;
+       }
+
+       if (server->probe.cm_probed &&
+           call->epoch != server->probe.cm_epoch &&
+           !server->probe.said_inconsistent) {
+               pr_notice("kAFS: FS endpoints inconsistent %pU\n",
+                         &server->uuid);
+               server->probe.said_inconsistent = true;
+       }
+
+       if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
+               server->probe.cm_epoch = server->cm_epoch;
+
+out:
+       server->probe.cm_probed = true;
+       spin_unlock(&server->probe_lock);
+       return 0;
+}
+
+/*
+ * Find the server record by peer address and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_peer(struct afs_call *call)
+{
+       struct sockaddr_rxrpc srx;
+       struct afs_server *server;
+
+       rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+
+       server = afs_find_server(call->net, &srx);
+       if (!server) {
+               trace_afs_cm_no_server(call, &srx);
+               return 0;
+       }
+
+       call->cm_server = server;
+       return afs_record_cm_probe(call, server);
+}
+
+/*
+ * Find the server record by server UUID and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_uuid(struct afs_call *call,
+                                     struct afs_uuid *uuid)
+{
+       struct afs_server *server;
+
+       rcu_read_lock();
+       server = afs_find_server_by_uuid(call->net, call->request);
+       rcu_read_unlock();
+       if (!server) {
+               trace_afs_cm_no_server_u(call, call->request);
+               return 0;
+       }
+
+       call->cm_server = server;
+       return afs_record_cm_probe(call, server);
+}
+
 /*
  * Clean up a cache manager call.
  */
@@ -168,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
 static int afs_deliver_cb_callback(struct afs_call *call)
 {
        struct afs_callback_break *cb;
-       struct sockaddr_rxrpc srx;
        __be32 *bp;
        int ret, loop;
 
@@ -176,32 +281,32 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* extract the FID array and its count in two steps */
        case 1:
                _debug("extract FID count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count = ntohl(call->tmp);
                _debug("FID count: %u", call->count);
                if (call->count > AFSCBMAX)
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_cb_fid_count);
 
                call->buffer = kmalloc(array3_size(call->count, 3, 4),
                                       GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
-               call->offset = 0;
+               afs_extract_to_buf(call, call->count * 3 * 4);
                call->unmarshall++;
 
        case 2:
                _debug("extract FID array");
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count * 3 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -218,59 +323,46 @@ static int afs_deliver_cb_callback(struct afs_call *call)
                        cb->fid.vid     = ntohl(*bp++);
                        cb->fid.vnode   = ntohl(*bp++);
                        cb->fid.unique  = ntohl(*bp++);
-                       cb->cb.type     = AFSCM_CB_UNTYPED;
                }
 
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* extract the callback array and its count in two steps */
        case 3:
                _debug("extract CB count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count2 = ntohl(call->tmp);
                _debug("CB count: %u", call->count2);
                if (call->count2 != call->count && call->count2 != 0)
-                       return afs_protocol_error(call, -EBADMSG);
-               call->offset = 0;
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_cb_count);
+               call->_iter = &call->iter;
+               iov_iter_discard(&call->iter, READ, call->count2 * 3 * 4);
                call->unmarshall++;
 
        case 4:
-               _debug("extract CB array");
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count2 * 3 * 4, false);
+               _debug("extract discard %zu/%u",
+                      iov_iter_count(&call->iter), call->count2 * 3 * 4);
+
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
-               _debug("unmarshall CB array");
-               cb = call->request;
-               bp = call->buffer;
-               for (loop = call->count2; loop > 0; loop--, cb++) {
-                       cb->cb.version  = ntohl(*bp++);
-                       cb->cb.expiry   = ntohl(*bp++);
-                       cb->cb.type     = ntohl(*bp++);
-               }
-
-               call->offset = 0;
                call->unmarshall++;
        case 5:
                break;
        }
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
+               return afs_io_error(call, afs_io_error_cm_reply);
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-       call->cm_server = afs_find_server(call->net, &srx);
-       if (!call->cm_server)
-               trace_afs_cm_no_server(call, &srx);
-
-       return afs_queue_call_work(call);
+       return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -294,24 +386,18 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
  */
 static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 {
-       struct sockaddr_rxrpc srx;
        int ret;
 
        _enter("");
 
-       rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       call->cm_server = afs_find_server(call->net, &srx);
-       if (!call->cm_server)
-               trace_afs_cm_no_server(call, &srx);
-
-       return afs_queue_call_work(call);
+       return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -330,16 +416,15 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
+               afs_extract_to_buf(call, 11 * sizeof(__be32));
                call->unmarshall++;
 
        case 1:
                _debug("extract UUID");
-               ret = afs_extract_data(call, call->buffer,
-                                      11 * sizeof(__be32), false);
+               ret = afs_extract_data(call, false);
                switch (ret) {
                case 0:         break;
                case -EAGAIN:   return 0;
@@ -362,7 +447,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
                for (loop = 0; loop < 6; loop++)
                        r->node[loop] = ntohl(b[loop + 5]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 2:
@@ -370,17 +454,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
        }
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
+               return afs_io_error(call, afs_io_error_cm_reply);
 
        /* we'll need the file server record as that tells us which set of
         * vnodes to operate upon */
-       rcu_read_lock();
-       call->cm_server = afs_find_server_by_uuid(call->net, call->request);
-       rcu_read_unlock();
-       if (!call->cm_server)
-               trace_afs_cm_no_server_u(call, call->request);
-
-       return afs_queue_call_work(call);
+       return afs_find_cm_server_by_uuid(call, call->request);
 }
 
 /*
@@ -405,14 +483,14 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 
        _enter("");
 
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
-
-       return afs_queue_call_work(call);
+               return afs_io_error(call, afs_io_error_cm_reply);
+       return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -453,16 +531,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
                if (!call->buffer)
                        return -ENOMEM;
+               afs_extract_to_buf(call, 11 * sizeof(__be32));
                call->unmarshall++;
 
        case 1:
                _debug("extract UUID");
-               ret = afs_extract_data(call, call->buffer,
-                                      11 * sizeof(__be32), false);
+               ret = afs_extract_data(call, false);
                switch (ret) {
                case 0:         break;
                case -EAGAIN:   return 0;
@@ -485,7 +562,6 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
                for (loop = 0; loop < 6; loop++)
                        r->node[loop] = ntohl(b[loop + 5]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 2:
@@ -493,9 +569,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
        }
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
-
-       return afs_queue_call_work(call);
+               return afs_io_error(call, afs_io_error_cm_reply);
+       return afs_find_cm_server_by_uuid(call, call->request);
 }
 
 /*
@@ -570,12 +645,88 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 
        _enter("");
 
-       ret = afs_extract_data(call, NULL, 0, false);
+       afs_extract_discard(call, 0);
+       ret = afs_extract_data(call, false);
        if (ret < 0)
                return ret;
 
        if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-               return -EIO;
+               return afs_io_error(call, afs_io_error_cm_reply);
+       return afs_find_cm_server_by_peer(call);
+}
+
+/*
+ * deliver request data to a YFS CB.CallBack call
+ */
+static int afs_deliver_yfs_cb_callback(struct afs_call *call)
+{
+       struct afs_callback_break *cb;
+       struct yfs_xdr_YFSFid *bp;
+       size_t size;
+       int ret, loop;
+
+       _enter("{%u}", call->unmarshall);
+
+       switch (call->unmarshall) {
+       case 0:
+               afs_extract_to_tmp(call);
+               call->unmarshall++;
+
+               /* extract the FID array and its count in two steps */
+       case 1:
+               _debug("extract FID count");
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
+
+               call->count = ntohl(call->tmp);
+               _debug("FID count: %u", call->count);
+               if (call->count > YFSCBMAX)
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_cb_fid_count);
+
+               size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid));
+               call->buffer = kmalloc(size, GFP_KERNEL);
+               if (!call->buffer)
+                       return -ENOMEM;
+               afs_extract_to_buf(call, size);
+               call->unmarshall++;
+
+       case 2:
+               _debug("extract FID array");
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
+
+               _debug("unmarshall FID array");
+               call->request = kcalloc(call->count,
+                                       sizeof(struct afs_callback_break),
+                                       GFP_KERNEL);
+               if (!call->request)
+                       return -ENOMEM;
+
+               cb = call->request;
+               bp = call->buffer;
+               for (loop = call->count; loop > 0; loop--, cb++) {
+                       cb->fid.vid     = xdr_to_u64(bp->volume);
+                       cb->fid.vnode   = xdr_to_u64(bp->vnode.lo);
+                       cb->fid.vnode_hi = ntohl(bp->vnode.hi);
+                       cb->fid.unique  = ntohl(bp->vnode.unique);
+                       bp++;
+               }
+
+               afs_extract_to_tmp(call);
+               call->unmarshall++;
+
+       case 3:
+               break;
+       }
+
+       if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+               return afs_io_error(call, afs_io_error_cm_reply);
 
-       return afs_queue_call_work(call);
+       /* We'll need the file server record as that tells us which set of
+        * vnodes to operate upon.
+        */
+       return afs_find_cm_server_by_peer(call);
 }
index 855bf2b79fed4117559f6f011cacd3b43f74b927..43dea3b00c29b9dc93b6416e8e869b4f8d726c28 100644 (file)
@@ -138,6 +138,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
                               ntohs(dbuf->blocks[tmp].hdr.magic));
                        trace_afs_dir_check_failed(dvnode, off, i_size);
                        kunmap(page);
+                       trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
                        goto error;
                }
 
@@ -190,9 +191,11 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 retry:
        i_size = i_size_read(&dvnode->vfs_inode);
        if (i_size < 2048)
-               return ERR_PTR(-EIO);
-       if (i_size > 2048 * 1024)
+               return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
+       if (i_size > 2048 * 1024) {
+               trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
                return ERR_PTR(-EFBIG);
+       }
 
        _enter("%llu", i_size);
 
@@ -315,7 +318,8 @@ content_has_grown:
 /*
  * deal with one block in an AFS directory
  */
-static int afs_dir_iterate_block(struct dir_context *ctx,
+static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+                                struct dir_context *ctx,
                                 union afs_xdr_dir_block *block,
                                 unsigned blkoff)
 {
@@ -365,7 +369,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
                                       " (len %u/%zu)",
                                       blkoff / sizeof(union afs_xdr_dir_block),
                                       offset, next, tmp, nlen);
-                               return -EIO;
+                               return afs_bad(dvnode, afs_file_error_dir_over_end);
                        }
                        if (!(block->hdr.bitmap[next / 8] &
                              (1 << (next % 8)))) {
@@ -373,7 +377,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
                                       " %u unmarked extension (len %u/%zu)",
                                       blkoff / sizeof(union afs_xdr_dir_block),
                                       offset, next, tmp, nlen);
-                               return -EIO;
+                               return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
                        }
 
                        _debug("ENT[%zu.%u]: ext %u/%zu",
@@ -442,7 +446,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
                 */
                page = req->pages[blkoff / PAGE_SIZE];
                if (!page) {
-                       ret = -EIO;
+                       ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
                        break;
                }
                mark_page_accessed(page);
@@ -455,7 +459,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
                do {
                        dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
                                               sizeof(union afs_xdr_dir_block)];
-                       ret = afs_dir_iterate_block(ctx, dblock, blkoff);
+                       ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff);
                        if (ret != 1) {
                                kunmap(page);
                                goto out;
@@ -548,7 +552,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
        }
 
        *fid = cookie.fid;
-       _leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique);
+       _leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique);
        return 0;
 }
 
@@ -826,7 +830,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
        struct key *key;
        int ret;
 
-       _enter("{%x:%u},%p{%pd},",
+       _enter("{%llx:%llu},%p{%pd},",
               dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry);
 
        ASSERTCMP(d_inode(dentry), ==, NULL);
@@ -896,7 +900,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
        if (d_really_is_positive(dentry)) {
                vnode = AFS_FS_I(d_inode(dentry));
-               _enter("{v={%x:%u} n=%pd fl=%lx},",
+               _enter("{v={%llx:%llu} n=%pd fl=%lx},",
                       vnode->fid.vid, vnode->fid.vnode, dentry,
                       vnode->flags);
        } else {
@@ -965,7 +969,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
                /* if the vnode ID has changed, then the dirent points to a
                 * different file */
                if (fid.vnode != vnode->fid.vnode) {
-                       _debug("%pd: dirent changed [%u != %u]",
+                       _debug("%pd: dirent changed [%llu != %llu]",
                               dentry, fid.vnode,
                               vnode->fid.vnode);
                        goto not_found;
@@ -1085,6 +1089,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 
        vnode = AFS_FS_I(inode);
        set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+       afs_vnode_commit_status(fc, vnode, 0);
        d_add(new_dentry, inode);
 }
 
@@ -1104,7 +1109,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
        mode |= S_IFDIR;
 
-       _enter("{%x:%u},{%pd},%ho",
+       _enter("{%llx:%llu},{%pd},%ho",
               dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
        key = afs_request_key(dvnode->volume->cell);
@@ -1169,12 +1174,12 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
 static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct afs_fs_cursor fc;
-       struct afs_vnode *dvnode = AFS_FS_I(dir);
+       struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
        struct key *key;
        u64 data_version = dvnode->status.data_version;
        int ret;
 
-       _enter("{%x:%u},{%pd}",
+       _enter("{%llx:%llu},{%pd}",
               dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
        key = afs_request_key(dvnode->volume->cell);
@@ -1183,11 +1188,19 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
                goto error;
        }
 
+       /* Try to make sure we have a callback promise on the victim. */
+       if (d_really_is_positive(dentry)) {
+               vnode = AFS_FS_I(d_inode(dentry));
+               ret = afs_validate(vnode, key);
+               if (ret < 0)
+                       goto error_key;
+       }
+
        ret = -ERESTARTSYS;
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
-                       afs_fs_remove(&fc, dentry->d_name.name, true,
+                       afs_fs_remove(&fc, vnode, dentry->d_name.name, true,
                                      data_version);
                }
 
@@ -1201,6 +1214,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
                }
        }
 
+error_key:
        key_put(key);
 error:
        return ret;
@@ -1231,7 +1245,9 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
        if (d_really_is_positive(dentry)) {
                struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 
-               if (dir_valid) {
+               if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+                       /* Already done */
+               } else if (dir_valid) {
                        drop_nlink(&vnode->vfs_inode);
                        if (vnode->vfs_inode.i_nlink == 0) {
                                set_bit(AFS_VNODE_DELETED, &vnode->flags);
@@ -1260,13 +1276,13 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
 static int afs_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct afs_fs_cursor fc;
-       struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
+       struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
        struct key *key;
        unsigned long d_version = (unsigned long)dentry->d_fsdata;
        u64 data_version = dvnode->status.data_version;
        int ret;
 
-       _enter("{%x:%u},{%pd}",
+       _enter("{%llx:%llu},{%pd}",
               dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
        if (dentry->d_name.len >= AFSNAMEMAX)
@@ -1290,7 +1306,18 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
        if (afs_begin_vnode_operation(&fc, dvnode, key)) {
                while (afs_select_fileserver(&fc)) {
                        fc.cb_break = afs_calc_vnode_cb_break(dvnode);
-                       afs_fs_remove(&fc, dentry->d_name.name, false,
+
+                       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
+                           !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
+                               yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
+                                                   data_version);
+                               if (fc.ac.error != -ECONNABORTED ||
+                                   fc.ac.abort_code != RXGEN_OPCODE)
+                                       continue;
+                               set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
+                       }
+
+                       afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
                                      data_version);
                }
 
@@ -1330,7 +1357,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
        mode |= S_IFREG;
 
-       _enter("{%x:%u},{%pd},%ho,",
+       _enter("{%llx:%llu},{%pd},%ho,",
               dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
        ret = -ENAMETOOLONG;
@@ -1393,7 +1420,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
        dvnode = AFS_FS_I(dir);
        data_version = dvnode->status.data_version;
 
-       _enter("{%x:%u},{%x:%u},{%pd}",
+       _enter("{%llx:%llu},{%llx:%llu},{%pd}",
               vnode->fid.vid, vnode->fid.vnode,
               dvnode->fid.vid, dvnode->fid.vnode,
               dentry);
@@ -1464,7 +1491,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
        u64 data_version = dvnode->status.data_version;
        int ret;
 
-       _enter("{%x:%u},{%pd},%s",
+       _enter("{%llx:%llu},{%pd},%s",
               dvnode->fid.vid, dvnode->fid.vnode, dentry,
               content);
 
@@ -1540,7 +1567,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
        orig_data_version = orig_dvnode->status.data_version;
        new_data_version = new_dvnode->status.data_version;
 
-       _enter("{%x:%u},{%x:%u},{%x:%u},{%pd}",
+       _enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
               orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
               vnode->fid.vid, vnode->fid.vnode,
               new_dvnode->fid.vid, new_dvnode->fid.vnode,
@@ -1607,7 +1634,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
 {
        struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
 
-       _enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
+       _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
 
        set_page_private(page, 0);
        ClearPagePrivate(page);
index f29c6dade7f6250348b886b44b8be150199f78f7..a9ba81ddf1546272d4a5cbb7e0885326c250c6ff 100644 (file)
@@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry)
                return 0;
        }
 
-       ret = dns_query("afsdb", name, len, "", NULL, NULL);
+       ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL);
        if (ret == -ENODATA)
                ret = -EDESTADDRREQ;
        return ret;
@@ -62,7 +62,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
        struct inode *inode;
        int ret = -ENOENT;
 
-       _enter("%p{%pd}, {%x:%u}",
+       _enter("%p{%pd}, {%llx:%llu}",
               dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
 
        if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
index 7d4f26198573d7f6a4dffb7ff4a82ee0f8fbb573..d6bc3f5d784b5676185070ae208345dd2eef3325 100644 (file)
@@ -121,7 +121,7 @@ int afs_open(struct inode *inode, struct file *file)
        struct key *key;
        int ret;
 
-       _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
 
        key = afs_request_key(vnode->volume->cell);
        if (IS_ERR(key)) {
@@ -170,7 +170,7 @@ int afs_release(struct inode *inode, struct file *file)
        struct afs_vnode *vnode = AFS_FS_I(inode);
        struct afs_file *af = file->private_data;
 
-       _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
 
        if ((file->f_mode & FMODE_WRITE))
                return vfs_fsync(file, 0);
@@ -228,7 +228,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s{%x:%u.%u},%x,,,",
+       _enter("%s{%llx:%llu.%u},%x,,,",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -634,7 +634,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
        struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
        unsigned long priv;
 
-       _enter("{{%x:%u}[%lu],%lx},%x",
+       _enter("{{%llx:%llu}[%lu],%lx},%x",
               vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
               gfp_flags);
 
index dc62d15a964b8809d7028d33a393c41b6963242b..0568fd98682109e0dd686d5097bf9fe2e40ecfe1 100644 (file)
@@ -29,7 +29,7 @@ static const struct file_lock_operations afs_lock_ops = {
  */
 void afs_lock_may_be_available(struct afs_vnode *vnode)
 {
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
        queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
 }
@@ -76,7 +76,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s{%x:%u.%u},%x,%u",
+       _enter("%s{%llx:%llu.%u},%x,%u",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -107,7 +107,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s{%x:%u.%u},%x",
+       _enter("%s{%llx:%llu.%u},%x",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -138,7 +138,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s{%x:%u.%u},%x",
+       _enter("%s{%llx:%llu.%u},%x",
               vnode->volume->name,
               vnode->fid.vid,
               vnode->fid.vnode,
@@ -175,7 +175,7 @@ void afs_lock_work(struct work_struct *work)
        struct key *key;
        int ret;
 
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
        spin_lock(&vnode->lock);
 
@@ -192,7 +192,7 @@ again:
                ret = afs_release_lock(vnode, vnode->lock_key);
                if (ret < 0)
                        printk(KERN_WARNING "AFS:"
-                              " Failed to release lock on {%x:%x} error %d\n",
+                              " Failed to release lock on {%llx:%llx} error %d\n",
                               vnode->fid.vid, vnode->fid.vnode, ret);
 
                spin_lock(&vnode->lock);
@@ -229,7 +229,7 @@ again:
                key_put(key);
 
                if (ret < 0)
-                       pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
+                       pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
                                   vnode->fid.vid, vnode->fid.vnode, ret);
 
                spin_lock(&vnode->lock);
@@ -430,7 +430,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
        struct key *key = afs_file_key(file);
        int ret;
 
-       _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+       _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
        /* only whole-file locks are supported */
        if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
@@ -582,7 +582,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
        struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
        int ret;
 
-       _enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+       _enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
        /* Flush all pending writes before doing anything with locks. */
        vfs_fsync(file, 0);
@@ -639,7 +639,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 {
        struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 
-       _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
+       _enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
               vnode->fid.vid, vnode->fid.vnode, cmd,
               fl->fl_type, fl->fl_flags,
               (long long) fl->fl_start, (long long) fl->fl_end);
@@ -662,7 +662,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 {
        struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 
-       _enter("{%x:%u},%d,{t=%x,fl=%x}",
+       _enter("{%llx:%llu},%d,{t=%x,fl=%x}",
               vnode->fid.vid, vnode->fid.vnode, cmd,
               fl->fl_type, fl->fl_flags);
 
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
new file mode 100644 (file)
index 0000000..d049cb4
--- /dev/null
@@ -0,0 +1,270 @@
+/* AFS fileserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static bool afs_fs_probe_done(struct afs_server *server)
+{
+       if (!atomic_dec_and_test(&server->probe_outstanding))
+               return false;
+
+       wake_up_var(&server->probe_outstanding);
+       clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
+       wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
+       return true;
+}
+
+/*
+ * Process the result of probing a fileserver.  This is called after successful
+ * or failed delivery of an FS.GetCapabilities operation.
+ */
+void afs_fileserver_probe_result(struct afs_call *call)
+{
+       struct afs_addr_list *alist = call->alist;
+       struct afs_server *server = call->reply[0];
+       unsigned int server_index = (long)call->reply[1];
+       unsigned int index = call->addr_ix;
+       unsigned int rtt = UINT_MAX;
+       bool have_result = false;
+       u64 _rtt;
+       int ret = call->error;
+
+       _enter("%pU,%u", &server->uuid, index);
+
+       spin_lock(&server->probe_lock);
+
+       switch (ret) {
+       case 0:
+               server->probe.error = 0;
+               goto responded;
+       case -ECONNABORTED:
+               if (!server->probe.responded) {
+                       server->probe.abort_code = call->abort_code;
+                       server->probe.error = ret;
+               }
+               goto responded;
+       case -ENOMEM:
+       case -ENONET:
+               server->probe.local_failure = true;
+               afs_io_error(call, afs_io_error_fs_probe_fail);
+               goto out;
+       case -ECONNRESET: /* Responded, but call expired. */
+       case -ENETUNREACH:
+       case -EHOSTUNREACH:
+       case -ECONNREFUSED:
+       case -ETIMEDOUT:
+       case -ETIME:
+       default:
+               clear_bit(index, &alist->responded);
+               set_bit(index, &alist->failed);
+               if (!server->probe.responded &&
+                   (server->probe.error == 0 ||
+                    server->probe.error == -ETIMEDOUT ||
+                    server->probe.error == -ETIME))
+                       server->probe.error = ret;
+               afs_io_error(call, afs_io_error_fs_probe_fail);
+               goto out;
+       }
+
+responded:
+       set_bit(index, &alist->responded);
+       clear_bit(index, &alist->failed);
+
+       if (call->service_id == YFS_FS_SERVICE) {
+               server->probe.is_yfs = true;
+               set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+               alist->addrs[index].srx_service = call->service_id;
+       } else {
+               server->probe.not_yfs = true;
+               if (!server->probe.is_yfs) {
+                       clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+                       alist->addrs[index].srx_service = call->service_id;
+               }
+       }
+
+       /* Get the RTT and scale it to fit into a 32-bit value that represents
+        * over a minute of time so that we can access it with one instruction
+        * on a 32-bit system.
+        */
+       _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
+       _rtt /= 64;
+       rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
+       if (rtt < server->probe.rtt) {
+               server->probe.rtt = rtt;
+               alist->preferred = index;
+               have_result = true;
+       }
+
+       smp_wmb(); /* Set rtt before responded. */
+       server->probe.responded = true;
+       set_bit(AFS_SERVER_FL_PROBED, &server->flags);
+out:
+       spin_unlock(&server->probe_lock);
+
+       _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+              server_index, index, &alist->addrs[index].transport,
+              (unsigned int)rtt, ret);
+
+       have_result |= afs_fs_probe_done(server);
+       if (have_result) {
+               server->probe.have_result = true;
+               wake_up_var(&server->probe.have_result);
+               wake_up_all(&server->probe_wq);
+       }
+}
+
+/*
+ * Probe all of a fileserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static int afs_do_probe_fileserver(struct afs_net *net,
+                                  struct afs_server *server,
+                                  struct key *key,
+                                  unsigned int server_index)
+{
+       struct afs_addr_cursor ac = {
+               .index = 0,
+       };
+       int ret;
+
+       _enter("%pU", &server->uuid);
+
+       read_lock(&server->fs_lock);
+       ac.alist = rcu_dereference_protected(server->addresses,
+                                            lockdep_is_held(&server->fs_lock));
+       read_unlock(&server->fs_lock);
+
+       atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+       memset(&server->probe, 0, sizeof(server->probe));
+       server->probe.rtt = UINT_MAX;
+
+       for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+               ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
+                                             true);
+               if (ret != -EINPROGRESS) {
+                       afs_fs_probe_done(server);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_probe_fileservers(struct afs_net *net, struct key *key,
+                         struct afs_server_list *list)
+{
+       struct afs_server *server;
+       int i, ret;
+
+       for (i = 0; i < list->nr_servers; i++) {
+               server = list->servers[i].server;
+               if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
+                       continue;
+
+               if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
+                       ret = afs_do_probe_fileserver(net, server, key, i);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Wait for the first as-yet untried fileserver to respond.
+ */
+int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
+{
+       struct wait_queue_entry *waits;
+       struct afs_server *server;
+       unsigned int rtt = UINT_MAX;
+       bool have_responders = false;
+       int pref = -1, i;
+
+       _enter("%u,%lx", slist->nr_servers, untried);
+
+       /* Only wait for servers that have a probe outstanding. */
+       for (i = 0; i < slist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = slist->servers[i].server;
+                       if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+                               __clear_bit(i, &untried);
+                       if (server->probe.responded)
+                               have_responders = true;
+               }
+       }
+       if (have_responders || !untried)
+               return 0;
+
+       waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+       if (!waits)
+               return -ENOMEM;
+
+       for (i = 0; i < slist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = slist->servers[i].server;
+                       init_waitqueue_entry(&waits[i], current);
+                       add_wait_queue(&server->probe_wq, &waits[i]);
+               }
+       }
+
+       for (;;) {
+               bool still_probing = false;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               for (i = 0; i < slist->nr_servers; i++) {
+                       if (test_bit(i, &untried)) {
+                               server = slist->servers[i].server;
+                               if (server->probe.responded)
+                                       goto stop;
+                               if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+                                       still_probing = true;
+                       }
+               }
+
+               if (!still_probing || unlikely(signal_pending(current)))
+                       goto stop;
+               schedule();
+       }
+
+stop:
+       set_current_state(TASK_RUNNING);
+
+       for (i = 0; i < slist->nr_servers; i++) {
+               if (test_bit(i, &untried)) {
+                       server = slist->servers[i].server;
+                       if (server->probe.responded &&
+                           server->probe.rtt < rtt) {
+                               pref = i;
+                               rtt = server->probe.rtt;
+                       }
+
+                       remove_wait_queue(&server->probe_wq, &waits[i]);
+               }
+       }
+
+       kfree(waits);
+
+       if (pref == -1 && signal_pending(current))
+               return -ERESTARTSYS;
+
+       if (pref >= 0)
+               slist->preferred = pref;
+       return 0;
+}
index 50929cb91732f5adec19706788e6a31aeb8beb03..ca08c83168f5fbf1f7f6b52c8c3ff769bf70cf04 100644 (file)
 #include "internal.h"
 #include "afs_fs.h"
 #include "xdr_fs.h"
+#include "protocol_yfs.h"
 
 static const struct afs_fid afs_zero_fid;
 
-/*
- * We need somewhere to discard into in case the server helpfully returns more
- * than we asked for in FS.FetchData{,64}.
- */
-static u8 afs_discard_buffer[64];
-
 static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
 {
        call->cbi = afs_get_cb_interest(cbi);
@@ -75,8 +70,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode,
        struct timespec64 t;
        umode_t mode;
 
-       t.tv_sec = status->mtime_client;
-       t.tv_nsec = 0;
+       t = status->mtime_client;
        vnode->vfs_inode.i_ctime = t;
        vnode->vfs_inode.i_mtime = t;
        vnode->vfs_inode.i_atime = t;
@@ -96,7 +90,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode,
        if (!(flags & AFS_VNODE_NOT_YET_SET)) {
                if (expected_version &&
                    *expected_version != status->data_version) {
-                       _debug("vnode modified %llx on {%x:%u} [exp %llx]",
+                       _debug("vnode modified %llx on {%llx:%llu} [exp %llx]",
                               (unsigned long long) status->data_version,
                               vnode->fid.vid, vnode->fid.vnode,
                               (unsigned long long) *expected_version);
@@ -170,7 +164,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
                if (type != status->type &&
                    vnode &&
                    !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
-                       pr_warning("Vnode %x:%x:%x changed type %u to %u\n",
+                       pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
                                   vnode->fid.vid,
                                   vnode->fid.vnode,
                                   vnode->fid.unique,
@@ -200,8 +194,10 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
        EXTRACT_M(mode);
        EXTRACT_M(group);
 
-       status->mtime_client = ntohl(xdr->mtime_client);
-       status->mtime_server = ntohl(xdr->mtime_server);
+       status->mtime_client.tv_sec = ntohl(xdr->mtime_client);
+       status->mtime_client.tv_nsec = 0;
+       status->mtime_server.tv_sec = ntohl(xdr->mtime_server);
+       status->mtime_server.tv_nsec = 0;
        status->lock_count   = ntohl(xdr->lock_count);
 
        size  = (u64)ntohl(xdr->size_lo);
@@ -233,7 +229,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
 
 bad:
        xdr_dump_bad(*_bp);
-       return afs_protocol_error(call, -EBADMSG);
+       return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
 }
 
 /*
@@ -273,7 +269,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
 
        write_seqlock(&vnode->cb_lock);
 
-       if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
+       if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
                vnode->cb_version       = ntohl(*bp++);
                cb_expiry               = ntohl(*bp++);
                vnode->cb_type          = ntohl(*bp++);
@@ -293,13 +289,19 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
        *_bp = bp;
 }
 
-static void xdr_decode_AFSCallBack_raw(const __be32 **_bp,
+static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
+{
+       return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC);
+}
+
+static void xdr_decode_AFSCallBack_raw(struct afs_call *call,
+                                      const __be32 **_bp,
                                       struct afs_callback *cb)
 {
        const __be32 *bp = *_bp;
 
        cb->version     = ntohl(*bp++);
-       cb->expiry      = ntohl(*bp++);
+       cb->expires_at  = xdr_decode_expiry(call, ntohl(*bp++));
        cb->type        = ntohl(*bp++);
        *_bp = bp;
 }
@@ -311,14 +313,18 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp,
                                  struct afs_volsync *volsync)
 {
        const __be32 *bp = *_bp;
+       u32 creation;
 
-       volsync->creation = ntohl(*bp++);
+       creation = ntohl(*bp++);
        bp++; /* spare2 */
        bp++; /* spare3 */
        bp++; /* spare4 */
        bp++; /* spare5 */
        bp++; /* spare6 */
        *_bp = bp;
+
+       if (volsync)
+               volsync->creation = creation;
 }
 
 /*
@@ -379,6 +385,8 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
        vs->blocks_in_use       = ntohl(*bp++);
        vs->part_blocks_avail   = ntohl(*bp++);
        vs->part_max_blocks     = ntohl(*bp++);
+       vs->vol_copy_date       = 0;
+       vs->vol_backup_date     = 0;
        *_bp = bp;
 }
 
@@ -395,16 +403,16 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
        if (ret < 0)
                return ret;
 
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        xdr_decode_AFSCallBack(call, vnode, &bp);
-       if (call->reply[1])
-               xdr_decode_AFSVolSync(&bp, call->reply[1]);
+       xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
        _leave(" = 0 [done]");
        return 0;
@@ -431,7 +439,10 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_fetch_file_status(fc, volsync, new_inode);
+
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode,
@@ -445,6 +456,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
        call->reply[0] = vnode;
        call->reply[1] = volsync;
        call->expected_version = new_inode ? 1 : vnode->status.data_version;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -468,139 +480,117 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
        struct afs_read *req = call->reply[2];
        const __be32 *bp;
        unsigned int size;
-       void *buffer;
        int ret;
 
-       _enter("{%u,%zu/%u;%llu/%llu}",
-              call->unmarshall, call->offset, call->count,
-              req->remain, req->actual_len);
+       _enter("{%u,%zu/%llu}",
+              call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
 
        switch (call->unmarshall) {
        case 0:
                req->actual_len = 0;
-               call->offset = 0;
+               req->index = 0;
+               req->offset = req->pos & (PAGE_SIZE - 1);
                call->unmarshall++;
-               if (call->operation_ID != FSFETCHDATA64) {
-                       call->unmarshall++;
-                       goto no_msw;
+               if (call->operation_ID == FSFETCHDATA64) {
+                       afs_extract_to_tmp64(call);
+               } else {
+                       call->tmp_u = htonl(0);
+                       afs_extract_to_tmp(call);
                }
 
-               /* extract the upper part of the returned data length of an
-                * FSFETCHDATA64 op (which should always be 0 using this
-                * client) */
-       case 1:
-               _debug("extract data length (MSW)");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
-               if (ret < 0)
-                       return ret;
-
-               req->actual_len = ntohl(call->tmp);
-               req->actual_len <<= 32;
-               call->offset = 0;
-               call->unmarshall++;
-
-       no_msw:
                /* extract the returned data length */
-       case 2:
+       case 1:
                _debug("extract data length");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
-               req->actual_len |= ntohl(call->tmp);
+               req->actual_len = be64_to_cpu(call->tmp64);
                _debug("DATA length: %llu", req->actual_len);
-
-               req->remain = req->actual_len;
-               call->offset = req->pos & (PAGE_SIZE - 1);
-               req->index = 0;
-               if (req->actual_len == 0)
+               req->remain = min(req->len, req->actual_len);
+               if (req->remain == 0)
                        goto no_more_data;
+
                call->unmarshall++;
 
        begin_page:
                ASSERTCMP(req->index, <, req->nr_pages);
-               if (req->remain > PAGE_SIZE - call->offset)
-                       size = PAGE_SIZE - call->offset;
+               if (req->remain > PAGE_SIZE - req->offset)
+                       size = PAGE_SIZE - req->offset;
                else
                        size = req->remain;
-               call->count = call->offset + size;
-               ASSERTCMP(call->count, <=, PAGE_SIZE);
-               req->remain -= size;
+               call->bvec[0].bv_len = size;
+               call->bvec[0].bv_offset = req->offset;
+               call->bvec[0].bv_page = req->pages[req->index];
+               iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
+               ASSERTCMP(size, <=, PAGE_SIZE);
 
                /* extract the returned data */
-       case 3:
-               _debug("extract data %llu/%llu %zu/%u",
-                      req->remain, req->actual_len, call->offset, call->count);
+       case 2:
+               _debug("extract data %zu/%llu",
+                      iov_iter_count(&call->iter), req->remain);
 
-               buffer = kmap(req->pages[req->index]);
-               ret = afs_extract_data(call, buffer, call->count, true);
-               kunmap(req->pages[req->index]);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
-               if (call->offset == PAGE_SIZE) {
+               req->remain -= call->bvec[0].bv_len;
+               req->offset += call->bvec[0].bv_len;
+               ASSERTCMP(req->offset, <=, PAGE_SIZE);
+               if (req->offset == PAGE_SIZE) {
+                       req->offset = 0;
                        if (req->page_done)
                                req->page_done(call, req);
                        req->index++;
-                       if (req->remain > 0) {
-                               call->offset = 0;
-                               if (req->index >= req->nr_pages) {
-                                       call->unmarshall = 4;
-                                       goto begin_discard;
-                               }
+                       if (req->remain > 0)
                                goto begin_page;
-                       }
                }
-               goto no_more_data;
+
+               ASSERTCMP(req->remain, ==, 0);
+               if (req->actual_len <= req->len)
+                       goto no_more_data;
 
                /* Discard any excess data the server gave us */
-       begin_discard:
-       case 4:
-               size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
-               call->count = size;
-               _debug("extract discard %llu/%llu %zu/%u",
-                      req->remain, req->actual_len, call->offset, call->count);
-
-               call->offset = 0;
-               ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
-               req->remain -= call->offset;
+               iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
+               call->unmarshall = 3;
+       case 3:
+               _debug("extract discard %zu/%llu",
+                      iov_iter_count(&call->iter), req->actual_len - req->len);
+
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
-               if (req->remain > 0)
-                       goto begin_discard;
 
        no_more_data:
-               call->offset = 0;
-               call->unmarshall = 5;
+               call->unmarshall = 4;
+               afs_extract_to_buf(call, (21 + 3 + 6) * 4);
 
                /* extract the metadata */
-       case 5:
-               ret = afs_extract_data(call, call->buffer,
-                                      (21 + 3 + 6) * 4, false);
+       case 4:
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
-               if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                                     &vnode->status.data_version, req) < 0)
-                       return afs_protocol_error(call, -EBADMSG);
+               ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                                       &vnode->status.data_version, req);
+               if (ret < 0)
+                       return ret;
                xdr_decode_AFSCallBack(call, vnode, &bp);
-               if (call->reply[1])
-                       xdr_decode_AFSVolSync(&bp, call->reply[1]);
+               xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
-               call->offset = 0;
                call->unmarshall++;
 
-       case 6:
+       case 5:
                break;
        }
 
        for (; req->index < req->nr_pages; req->index++) {
-               if (call->count < PAGE_SIZE)
+               if (req->offset < PAGE_SIZE)
                        zero_user_segment(req->pages[req->index],
-                                         call->count, PAGE_SIZE);
+                                         req->offset, PAGE_SIZE);
                if (req->page_done)
                        req->page_done(call, req);
-               call->count = 0;
+               req->offset = 0;
        }
 
        _leave(" = 0 [done]");
@@ -653,6 +643,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
        call->reply[1] = NULL; /* volsync */
        call->reply[2] = req;
        call->expected_version = vnode->status.data_version;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -682,6 +673,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_fetch_data(fc, req);
+
        if (upper_32_bits(req->pos) ||
            upper_32_bits(req->len) ||
            upper_32_bits(req->pos + req->len))
@@ -698,6 +692,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
        call->reply[1] = NULL; /* volsync */
        call->reply[2] = req;
        call->expected_version = vnode->status.data_version;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -733,11 +728,14 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
        xdr_decode_AFSFid(&bp, call->reply[1]);
-       if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 ||
-           afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
-       xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
+       ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]);
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -778,6 +776,15 @@ int afs_fs_create(struct afs_fs_cursor *fc,
        size_t namesz, reqsz, padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){
+               if (S_ISDIR(mode))
+                       return yfs_fs_make_dir(fc, name, mode, current_data_version,
+                                              newfid, newstatus, newcb);
+               else
+                       return yfs_fs_create_file(fc, name, mode, current_data_version,
+                                                 newfid, newstatus, newcb);
+       }
+
        _enter("");
 
        namesz = strlen(name);
@@ -796,6 +803,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
        call->reply[2] = newstatus;
        call->reply[3] = newcb;
        call->expected_version = current_data_version + 1;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -839,9 +847,10 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -868,15 +877,18 @@ static const struct afs_call_type afs_RXFSRemoveDir = {
 /*
  * remove a file or directory
  */
-int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
-                 u64 current_data_version)
+int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+                 const char *name, bool isdir, u64 current_data_version)
 {
-       struct afs_vnode *vnode = fc->vnode;
+       struct afs_vnode *dvnode = fc->vnode;
        struct afs_call *call;
-       struct afs_net *net = afs_v2net(vnode);
+       struct afs_net *net = afs_v2net(dvnode);
        size_t namesz, reqsz, padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_remove(fc, vnode, name, isdir, current_data_version);
+
        _enter("");
 
        namesz = strlen(name);
@@ -890,15 +902,16 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
                return -ENOMEM;
 
        call->key = fc->key;
-       call->reply[0] = vnode;
+       call->reply[0] = dvnode;
+       call->reply[1] = vnode;
        call->expected_version = current_data_version + 1;
 
        /* marshall the parameters */
        bp = call->request;
        *bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE);
-       *bp++ = htonl(vnode->fid.vid);
-       *bp++ = htonl(vnode->fid.vnode);
-       *bp++ = htonl(vnode->fid.unique);
+       *bp++ = htonl(dvnode->fid.vid);
+       *bp++ = htonl(dvnode->fid.vnode);
+       *bp++ = htonl(dvnode->fid.unique);
        *bp++ = htonl(namesz);
        memcpy(bp, name, namesz);
        bp = (void *) bp + namesz;
@@ -908,7 +921,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
        }
 
        afs_use_fs_server(call, fc->cbi);
-       trace_afs_make_fs_call(call, &vnode->fid);
+       trace_afs_make_fs_call(call, &dvnode->fid);
        return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -929,10 +942,13 @@ static int afs_deliver_fs_link(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 ||
-           afs_decode_status(call, &bp, &dvnode->status, dvnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = afs_decode_status(call, &bp, &dvnode->status, dvnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -961,6 +977,9 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
        size_t namesz, reqsz, padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_link(fc, vnode, name, current_data_version);
+
        _enter("");
 
        namesz = strlen(name);
@@ -1016,10 +1035,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
        xdr_decode_AFSFid(&bp, call->reply[1]);
-       if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) ||
-           afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+       if (ret < 0)
+               return ret;
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -1052,6 +1074,10 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
        size_t namesz, reqsz, padsz, c_namesz, c_padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_symlink(fc, name, contents, current_data_version,
+                                     newfid, newstatus);
+
        _enter("");
 
        namesz = strlen(name);
@@ -1122,13 +1148,16 @@ static int afs_deliver_fs_rename(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
-       if (new_dvnode != orig_dvnode &&
-           afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
-                             &call->expected_version_2, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       if (new_dvnode != orig_dvnode) {
+               ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
+                                       &call->expected_version_2, NULL);
+               if (ret < 0)
+                       return ret;
+       }
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -1161,6 +1190,12 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
        size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_rename(fc, orig_name,
+                                    new_dvnode, new_name,
+                                    current_orig_data_version,
+                                    current_new_data_version);
+
        _enter("");
 
        o_namesz = strlen(orig_name);
@@ -1231,9 +1266,10 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        afs_pages_written_back(vnode, call);
@@ -1273,7 +1309,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        call = afs_alloc_flat_call(net, &afs_RXFSStoreData64,
@@ -1330,7 +1366,10 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
        loff_t size, pos, i_size;
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_store_data(fc, mapping, first, last, offset, to);
+
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        size = (loff_t)to - (loff_t)offset;
@@ -1407,9 +1446,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       if (afs_decode_status(call, &bp, &vnode->status, vnode,
-                             &call->expected_version, NULL) < 0)
-               return afs_protocol_error(call, -EBADMSG);
+       ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
        /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
        _leave(" = 0 [done]");
@@ -1451,7 +1491,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        ASSERT(attr->ia_valid & ATTR_SIZE);
@@ -1498,7 +1538,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        ASSERT(attr->ia_valid & ATTR_SIZE);
@@ -1544,10 +1584,13 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_setattr(fc, attr);
+
        if (attr->ia_valid & ATTR_SIZE)
                return afs_fs_setattr_size(fc, attr);
 
-       _enter(",%x,{%x:%u},,",
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
        call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus,
@@ -1581,164 +1624,114 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 {
        const __be32 *bp;
        char *p;
+       u32 size;
        int ret;
 
        _enter("{%u}", call->unmarshall);
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
                call->unmarshall++;
+               afs_extract_to_buf(call, 12 * 4);
 
                /* extract the returned status record */
        case 1:
                _debug("extract status");
-               ret = afs_extract_data(call, call->buffer,
-                                      12 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
                xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
-               call->offset = 0;
                call->unmarshall++;
+               afs_extract_to_tmp(call);
 
                /* extract the volume name length */
        case 2:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count = ntohl(call->tmp);
                _debug("volname length: %u", call->count);
                if (call->count >= AFSNAMEMAX)
-                       return afs_protocol_error(call, -EBADMSG);
-               call->offset = 0;
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_volname_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the volume name */
        case 3:
                _debug("extract volname");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("volname '%s'", p);
-
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
-               /* extract the volume name padding */
-               if ((call->count & 3) == 0) {
-                       call->unmarshall++;
-                       goto no_volname_padding;
-               }
-               call->count = 4 - (call->count & 3);
-
-       case 4:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, true);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
-               call->unmarshall++;
-       no_volname_padding:
-
                /* extract the offline message length */
-       case 5:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+       case 4:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count = ntohl(call->tmp);
                _debug("offline msg length: %u", call->count);
                if (call->count >= AFSNAMEMAX)
-                       return afs_protocol_error(call, -EBADMSG);
-               call->offset = 0;
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_offline_msg_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the offline message */
-       case 6:
+       case 5:
                _debug("extract offline");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, true);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("offline '%s'", p);
 
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
-               /* extract the offline message padding */
-               if ((call->count & 3) == 0) {
-                       call->unmarshall++;
-                       goto no_offline_padding;
-               }
-               call->count = 4 - (call->count & 3);
-
-       case 7:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, true);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
-               call->unmarshall++;
-       no_offline_padding:
-
                /* extract the message of the day length */
-       case 8:
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+       case 6:
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                call->count = ntohl(call->tmp);
                _debug("motd length: %u", call->count);
                if (call->count >= AFSNAMEMAX)
-                       return afs_protocol_error(call, -EBADMSG);
-               call->offset = 0;
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_motd_len);
+               size = (call->count + 3) & ~3; /* It's padded */
+               afs_extract_begin(call, call->reply[2], size);
                call->unmarshall++;
 
                /* extract the message of the day */
-       case 9:
+       case 7:
                _debug("extract motd");
-               if (call->count > 0) {
-                       ret = afs_extract_data(call, call->reply[2],
-                                              call->count, true);
-                       if (ret < 0)
-                               return ret;
-               }
+               ret = afs_extract_data(call, false);
+               if (ret < 0)
+                       return ret;
 
                p = call->reply[2];
                p[call->count] = 0;
                _debug("motd '%s'", p);
 
-               call->offset = 0;
                call->unmarshall++;
 
-               /* extract the message of the day padding */
-               call->count = (4 - (call->count & 3)) & 3;
-
-       case 10:
-               ret = afs_extract_data(call, call->buffer,
-                                      call->count, false);
-               if (ret < 0)
-                       return ret;
-
-               call->offset = 0;
-               call->unmarshall++;
-       case 11:
+       case 8:
                break;
        }
 
@@ -1778,6 +1771,9 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
        __be32 *bp;
        void *tmpbuf;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_get_volume_status(fc, vs);
+
        _enter("");
 
        tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
@@ -1867,6 +1863,9 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_set_lock(fc, type);
+
        _enter("");
 
        call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
@@ -1899,6 +1898,9 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_extend_lock(fc);
+
        _enter("");
 
        call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
@@ -1930,6 +1932,9 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
        struct afs_net *net = afs_v2net(vnode);
        __be32 *bp;
 
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_release_lock(fc);
+
        _enter("");
 
        call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
@@ -2004,19 +2009,16 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
        u32 count;
        int ret;
 
-       _enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+       _enter("{%u,%zu}", call->unmarshall, iov_iter_count(&call->iter));
 
-again:
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* Extract the capabilities word count */
        case 1:
-               ret = afs_extract_data(call, &call->tmp,
-                                      1 * sizeof(__be32),
-                                      true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2024,24 +2026,17 @@ again:
 
                call->count = count;
                call->count2 = count;
-               call->offset = 0;
+               iov_iter_discard(&call->iter, READ, count * sizeof(__be32));
                call->unmarshall++;
 
                /* Extract capabilities words */
        case 2:
-               count = min(call->count, 16U);
-               ret = afs_extract_data(call, call->buffer,
-                                      count * sizeof(__be32),
-                                      call->count > 16);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                /* TODO: Examine capabilities */
 
-               call->count -= count;
-               if (call->count > 0)
-                       goto again;
-               call->offset = 0;
                call->unmarshall++;
                break;
        }
@@ -2050,6 +2045,14 @@ again:
        return 0;
 }
 
+static void afs_destroy_fs_get_capabilities(struct afs_call *call)
+{
+       struct afs_server *server = call->reply[0];
+
+       afs_put_server(call->net, server);
+       afs_flat_call_destructor(call);
+}
+
 /*
  * FS.GetCapabilities operation type
  */
@@ -2057,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
        .name           = "FS.GetCapabilities",
        .op             = afs_FS_GetCapabilities,
        .deliver        = afs_deliver_fs_get_capabilities,
-       .destructor     = afs_flat_call_destructor,
+       .done           = afs_fileserver_probe_result,
+       .destructor     = afs_destroy_fs_get_capabilities,
 };
 
 /*
@@ -2067,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
 int afs_fs_get_capabilities(struct afs_net *net,
                            struct afs_server *server,
                            struct afs_addr_cursor *ac,
-                           struct key *key)
+                           struct key *key,
+                           unsigned int server_index,
+                           bool async)
 {
        struct afs_call *call;
        __be32 *bp;
@@ -2079,6 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net,
                return -ENOMEM;
 
        call->key = key;
+       call->reply[0] = afs_get_server(server);
+       call->reply[1] = (void *)(long)server_index;
+       call->upgrade = true;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -2086,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net,
 
        /* Can't take a ref on server */
        trace_afs_make_fs_call(call, NULL);
-       return afs_make_call(ac, call, GFP_NOFS, false);
+       return afs_make_call(ac, call, GFP_NOFS, async);
 }
 
 /*
@@ -2097,7 +2107,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
        struct afs_file_status *status = call->reply[1];
        struct afs_callback *callback = call->reply[2];
        struct afs_volsync *volsync = call->reply[3];
-       struct afs_vnode *vnode = call->reply[0];
+       struct afs_fid *fid = call->reply[0];
        const __be32 *bp;
        int ret;
 
@@ -2105,21 +2115,16 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
        if (ret < 0)
                return ret;
 
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", fid->vid, fid->vnode);
 
        /* unmarshall the reply once we've received all of it */
        bp = call->buffer;
-       afs_decode_status(call, &bp, status, vnode,
-                         &call->expected_version, NULL);
-       callback[call->count].version   = ntohl(bp[0]);
-       callback[call->count].expiry    = ntohl(bp[1]);
-       callback[call->count].type      = ntohl(bp[2]);
-       if (vnode)
-               xdr_decode_AFSCallBack(call, vnode, &bp);
-       else
-               bp += 3;
-       if (volsync)
-               xdr_decode_AFSVolSync(&bp, volsync);
+       ret = afs_decode_status(call, &bp, status, NULL,
+                               &call->expected_version, NULL);
+       if (ret < 0)
+               return ret;
+       xdr_decode_AFSCallBack_raw(call, &bp, callback);
+       xdr_decode_AFSVolSync(&bp, volsync);
 
        _leave(" = 0 [done]");
        return 0;
@@ -2148,7 +2153,10 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
        struct afs_call *call;
        __be32 *bp;
 
-       _enter(",%x,{%x:%u},,",
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync);
+
+       _enter(",%x,{%llx:%llu},,",
               key_serial(fc->key), fid->vid, fid->vnode);
 
        call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
@@ -2158,11 +2166,12 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
        }
 
        call->key = fc->key;
-       call->reply[0] = NULL; /* vnode for fid[0] */
+       call->reply[0] = fid;
        call->reply[1] = status;
        call->reply[2] = callback;
        call->reply[3] = volsync;
        call->expected_version = 1; /* vnode->status.data_version */
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
@@ -2193,38 +2202,40 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
        switch (call->unmarshall) {
        case 0:
-               call->offset = 0;
+               afs_extract_to_tmp(call);
                call->unmarshall++;
 
                /* Extract the file status count and array in two steps */
        case 1:
                _debug("extract status count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                tmp = ntohl(call->tmp);
                _debug("status count: %u/%u", tmp, call->count2);
                if (tmp != call->count2)
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_ibulkst_count);
 
                call->count = 0;
                call->unmarshall++;
        more_counts:
-               call->offset = 0;
+               afs_extract_to_buf(call, 21 * sizeof(__be32));
 
        case 2:
                _debug("extract status array %u", call->count);
-               ret = afs_extract_data(call, call->buffer, 21 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
                statuses = call->reply[1];
-               if (afs_decode_status(call, &bp, &statuses[call->count],
-                                     call->count == 0 ? vnode : NULL,
-                                     NULL, NULL) < 0)
-                       return afs_protocol_error(call, -EBADMSG);
+               ret = afs_decode_status(call, &bp, &statuses[call->count],
+                                       call->count == 0 ? vnode : NULL,
+                                       NULL, NULL);
+               if (ret < 0)
+                       return ret;
 
                call->count++;
                if (call->count < call->count2)
@@ -2232,27 +2243,28 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
                call->count = 0;
                call->unmarshall++;
-               call->offset = 0;
+               afs_extract_to_tmp(call);
 
                /* Extract the callback count and array in two steps */
        case 3:
                _debug("extract CB count");
-               ret = afs_extract_data(call, &call->tmp, 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
                tmp = ntohl(call->tmp);
                _debug("CB count: %u", tmp);
                if (tmp != call->count2)
-                       return afs_protocol_error(call, -EBADMSG);
+                       return afs_protocol_error(call, -EBADMSG,
+                                                 afs_eproto_ibulkst_cb_count);
                call->count = 0;
                call->unmarshall++;
        more_cbs:
-               call->offset = 0;
+               afs_extract_to_buf(call, 3 * sizeof(__be32));
 
        case 4:
                _debug("extract CB array");
-               ret = afs_extract_data(call, call->buffer, 3 * 4, true);
+               ret = afs_extract_data(call, true);
                if (ret < 0)
                        return ret;
 
@@ -2260,7 +2272,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
                bp = call->buffer;
                callbacks = call->reply[2];
                callbacks[call->count].version  = ntohl(bp[0]);
-               callbacks[call->count].expiry   = ntohl(bp[1]);
+               callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1]));
                callbacks[call->count].type     = ntohl(bp[2]);
                statuses = call->reply[1];
                if (call->count == 0 && vnode && statuses[0].abort_code == 0)
@@ -2269,19 +2281,17 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
                if (call->count < call->count2)
                        goto more_cbs;
 
-               call->offset = 0;
+               afs_extract_to_buf(call, 6 * sizeof(__be32));
                call->unmarshall++;
 
        case 5:
-               ret = afs_extract_data(call, call->buffer, 6 * 4, false);
+               ret = afs_extract_data(call, false);
                if (ret < 0)
                        return ret;
 
                bp = call->buffer;
-               if (call->reply[3])
-                       xdr_decode_AFSVolSync(&bp, call->reply[3]);
+               xdr_decode_AFSVolSync(&bp, call->reply[3]);
 
-               call->offset = 0;
                call->unmarshall++;
 
        case 6:
@@ -2317,7 +2327,11 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
        __be32 *bp;
        int i;
 
-       _enter(",%x,{%x:%u},%u",
+       if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+               return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks,
+                                                nr_fids, volsync);
+
+       _enter(",%x,{%llx:%llu},%u",
               key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
 
        call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus,
@@ -2334,6 +2348,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
        call->reply[2] = callbacks;
        call->reply[3] = volsync;
        call->count2 = nr_fids;
+       call->want_reply_time = true;
 
        /* marshall the parameters */
        bp = call->request;
index 479b7fdda1244f5bf210694e275826cba99b5553..4c6d8e1112c2b716ef788afbb31635150af3f87b 100644 (file)
@@ -82,7 +82,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
        default:
                printk("kAFS: AFS vnode with undefined type\n");
                read_sequnlock_excl(&vnode->cb_lock);
-               return afs_protocol_error(NULL, -EBADMSG);
+               return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
        }
 
        inode->i_blocks         = 0;
@@ -100,7 +100,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
        struct afs_fs_cursor fc;
        int ret;
 
-       _enter("%s,{%x:%u.%u,S=%lx}",
+       _enter("%s,{%llx:%llu.%u,S=%lx}",
               vnode->volume->name,
               vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
               vnode->flags);
@@ -127,9 +127,9 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
 int afs_iget5_test(struct inode *inode, void *opaque)
 {
        struct afs_iget_data *data = opaque;
+       struct afs_vnode *vnode = AFS_FS_I(inode);
 
-       return inode->i_ino == data->fid.vnode &&
-               inode->i_generation == data->fid.unique;
+       return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0;
 }
 
 /*
@@ -150,11 +150,14 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
        struct afs_iget_data *data = opaque;
        struct afs_vnode *vnode = AFS_FS_I(inode);
 
-       inode->i_ino = data->fid.vnode;
-       inode->i_generation = data->fid.unique;
        vnode->fid = data->fid;
        vnode->volume = data->volume;
 
+       /* YFS supports 96-bit vnode IDs, but Linux only supports
+        * 64-bit inode numbers.
+        */
+       inode->i_ino = data->fid.vnode;
+       inode->i_generation = data->fid.unique;
        return 0;
 }
 
@@ -193,7 +196,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
                return ERR_PTR(-ENOMEM);
        }
 
-       _debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }",
+       _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
               inode, inode->i_ino, data.fid.vid, data.fid.vnode,
               data.fid.unique);
 
@@ -252,8 +255,8 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
 
        key.vnode_id            = vnode->fid.vnode;
        key.unique              = vnode->fid.unique;
-       key.vnode_id_ext[0]     = 0;
-       key.vnode_id_ext[1]     = 0;
+       key.vnode_id_ext[0]     = vnode->fid.vnode >> 32;
+       key.vnode_id_ext[1]     = vnode->fid.vnode_hi;
        aux.data_version        = vnode->status.data_version;
 
        vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
@@ -277,7 +280,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
        struct inode *inode;
        int ret;
 
-       _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique);
+       _enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique);
 
        as = sb->s_fs_info;
        data.volume = as->volume;
@@ -289,7 +292,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
                return ERR_PTR(-ENOMEM);
        }
 
-       _debug("GOT INODE %p { vl=%x vn=%x, u=%x }",
+       _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }",
               inode, fid->vid, fid->vnode, fid->unique);
 
        vnode = AFS_FS_I(inode);
@@ -314,11 +317,11 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
                         * didn't give us a callback) */
                        vnode->cb_version = 0;
                        vnode->cb_type = 0;
-                       vnode->cb_expires_at = 0;
+                       vnode->cb_expires_at = ktime_get();
                } else {
                        vnode->cb_version = cb->version;
                        vnode->cb_type = cb->type;
-                       vnode->cb_expires_at = cb->expiry;
+                       vnode->cb_expires_at = cb->expires_at;
                        vnode->cb_interest = afs_get_cb_interest(cbi);
                        set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
                }
@@ -352,7 +355,7 @@ bad_inode:
  */
 void afs_zap_data(struct afs_vnode *vnode)
 {
-       _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+       _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
 #ifdef CONFIG_AFS_FSCACHE
        fscache_invalidate(vnode->cache);
@@ -382,7 +385,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
        bool valid = false;
        int ret;
 
-       _enter("{v={%x:%u} fl=%lx},%x",
+       _enter("{v={%llx:%llu} fl=%lx},%x",
               vnode->fid.vid, vnode->fid.vnode, vnode->flags,
               key_serial(key));
 
@@ -501,7 +504,7 @@ void afs_evict_inode(struct inode *inode)
 
        vnode = AFS_FS_I(inode);
 
-       _enter("{%x:%u.%d}",
+       _enter("{%llx:%llu.%d}",
               vnode->fid.vid,
               vnode->fid.vnode,
               vnode->fid.unique);
@@ -550,7 +553,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
        struct key *key;
        int ret;
 
-       _enter("{%x:%u},{n=%pd},%x",
+       _enter("{%llx:%llu},{n=%pd},%x",
               vnode->fid.vid, vnode->fid.vnode, dentry,
               attr->ia_valid);
 
index 72de1f157d20235b4c2a103d5f098b5b9cf7dd1f..5da3b09b751867bc9c0bbb8c23b362fbab942a93 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/backing-dev.h>
 #include <linux/uuid.h>
 #include <linux/mm_types.h>
+#include <linux/dns_resolver.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
@@ -75,10 +76,13 @@ struct afs_addr_list {
        u32                     version;        /* Version */
        unsigned char           max_addrs;
        unsigned char           nr_addrs;
-       unsigned char           index;          /* Address currently in use */
+       unsigned char           preferred;      /* Preferred address */
        unsigned char           nr_ipv4;        /* Number of IPv4 addresses */
+       enum dns_record_source  source:8;
+       enum dns_lookup_status  status:8;
        unsigned long           probed;         /* Mask of servers that have been probed */
-       unsigned long           yfs;            /* Mask of servers that are YFS */
+       unsigned long           failed;         /* Mask of addrs that failed locally/ICMP */
+       unsigned long           responded;      /* Mask of addrs that responded */
        struct sockaddr_rxrpc   addrs[];
 #define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
 };
@@ -88,6 +92,7 @@ struct afs_addr_list {
  */
 struct afs_call {
        const struct afs_call_type *type;       /* type of call */
+       struct afs_addr_list    *alist;         /* Address is alist[addr_ix] */
        wait_queue_head_t       waitq;          /* processes awaiting completion */
        struct work_struct      async_work;     /* async I/O processor */
        struct work_struct      work;           /* actual work processor */
@@ -98,16 +103,22 @@ struct afs_call {
        struct afs_cb_interest  *cbi;           /* Callback interest for server used */
        void                    *request;       /* request data (first part) */
        struct address_space    *mapping;       /* Pages being written from */
+       struct iov_iter         iter;           /* Buffer iterator */
+       struct iov_iter         *_iter;         /* Iterator currently in use */
+       union { /* Convenience for ->iter */
+               struct kvec     kvec[1];
+               struct bio_vec  bvec[1];
+       };
        void                    *buffer;        /* reply receive buffer */
        void                    *reply[4];      /* Where to put the reply */
        pgoff_t                 first;          /* first page in mapping to deal with */
        pgoff_t                 last;           /* last page in mapping to deal with */
-       size_t                  offset;         /* offset into received data store */
        atomic_t                usage;
        enum afs_call_state     state;
        spinlock_t              state_lock;
        int                     error;          /* error code */
        u32                     abort_code;     /* Remote abort ID or 0 */
+       u32                     epoch;
        unsigned                request_size;   /* size of request data */
        unsigned                reply_max;      /* maximum size of reply */
        unsigned                first_offset;   /* offset into mapping[first] */
@@ -117,19 +128,28 @@ struct afs_call {
                unsigned        count2;         /* count used in unmarshalling */
        };
        unsigned char           unmarshall;     /* unmarshalling phase */
+       unsigned char           addr_ix;        /* Address in ->alist */
        bool                    incoming;       /* T if incoming call */
        bool                    send_pages;     /* T if data from mapping should be sent */
        bool                    need_attention; /* T if RxRPC poked us */
        bool                    async;          /* T if asynchronous */
        bool                    ret_reply0;     /* T if should return reply[0] on success */
        bool                    upgrade;        /* T to request service upgrade */
+       bool                    want_reply_time; /* T if want reply_time */
        u16                     service_id;     /* Actual service ID (after upgrade) */
        unsigned int            debug_id;       /* Trace ID */
        u32                     operation_ID;   /* operation ID for an incoming call */
        u32                     count;          /* count for use in unmarshalling */
-       __be32                  tmp;            /* place to extract temporary data */
+       union {                                 /* place to extract temporary data */
+               struct {
+                       __be32  tmp_u;
+                       __be32  tmp;
+               } __attribute__((packed));
+               __be64          tmp64;
+       };
        afs_dataversion_t       expected_version; /* Updated version expected from store */
        afs_dataversion_t       expected_version_2; /* 2nd updated version expected from store */
+       ktime_t                 reply_time;     /* Time of first reply packet */
 };
 
 struct afs_call_type {
@@ -146,6 +166,9 @@ struct afs_call_type {
 
        /* Work function */
        void (*work)(struct work_struct *work);
+
+       /* Call done function (gets called immediately on success or failure) */
+       void (*done)(struct afs_call *call);
 };
 
 /*
@@ -185,6 +208,7 @@ struct afs_read {
        refcount_t              usage;
        unsigned int            index;          /* Which page we're reading into */
        unsigned int            nr_pages;
+       unsigned int            offset;         /* offset into current page */
        void (*page_done)(struct afs_call *, struct afs_read *);
        struct page             **pages;
        struct page             *array[];
@@ -343,12 +367,69 @@ struct afs_cell {
        rwlock_t                proc_lock;
 
        /* VL server list. */
-       rwlock_t                vl_addrs_lock;  /* Lock on vl_addrs */
-       struct afs_addr_list    __rcu *vl_addrs; /* List of VL servers */
+       rwlock_t                vl_servers_lock; /* Lock on vl_servers */
+       struct afs_vlserver_list __rcu *vl_servers;
+
        u8                      name_len;       /* Length of name */
        char                    name[64 + 1];   /* Cell name, case-flattened and NUL-padded */
 };
 
+/*
+ * Volume Location server record.
+ */
+struct afs_vlserver {
+       struct rcu_head         rcu;
+       struct afs_addr_list    __rcu *addresses; /* List of addresses for this VL server */
+       unsigned long           flags;
+#define AFS_VLSERVER_FL_PROBED 0               /* The VL server has been probed */
+#define AFS_VLSERVER_FL_PROBING        1               /* VL server is being probed */
+#define AFS_VLSERVER_FL_IS_YFS 2               /* Server is YFS not AFS */
+       rwlock_t                lock;           /* Lock on addresses */
+       atomic_t                usage;
+
+       /* Probe state */
+       wait_queue_head_t       probe_wq;
+       atomic_t                probe_outstanding;
+       spinlock_t              probe_lock;
+       struct {
+               unsigned int    rtt;            /* RTT as ktime/64 */
+               u32             abort_code;
+               short           error;
+               bool            have_result;
+               bool            responded:1;
+               bool            is_yfs:1;
+               bool            not_yfs:1;
+               bool            local_failure:1;
+       } probe;
+
+       u16                     port;
+       u16                     name_len;       /* Length of name */
+       char                    name[];         /* Server name, case-flattened */
+};
+
+/*
+ * Weighted list of Volume Location servers.
+ */
+struct afs_vlserver_entry {
+       u16                     priority;       /* Preference (as SRV) */
+       u16                     weight;         /* Weight (as SRV) */
+       enum dns_record_source  source:8;
+       enum dns_lookup_status  status:8;
+       struct afs_vlserver     *server;
+};
+
+struct afs_vlserver_list {
+       struct rcu_head         rcu;
+       atomic_t                usage;
+       u8                      nr_servers;
+       u8                      index;          /* Server currently in use */
+       u8                      preferred;      /* Preferred server */
+       enum dns_record_source  source:8;
+       enum dns_lookup_status  status:8;
+       rwlock_t                lock;
+       struct afs_vlserver_entry servers[];
+};
+
 /*
  * Cached VLDB entry.
  *
@@ -403,8 +484,12 @@ struct afs_server {
 #define AFS_SERVER_FL_PROBING  6               /* Fileserver is being probed */
 #define AFS_SERVER_FL_NO_IBULK 7               /* Fileserver doesn't support FS.InlineBulkStatus */
 #define AFS_SERVER_FL_MAY_HAVE_CB 8            /* May have callbacks on this fileserver */
+#define AFS_SERVER_FL_IS_YFS   9               /* Server is YFS not AFS */
+#define AFS_SERVER_FL_NO_RM2   10              /* Fileserver doesn't support YFS.RemoveFile2 */
+#define AFS_SERVER_FL_HAVE_EPOCH 11            /* ->epoch is valid */
        atomic_t                usage;
        u32                     addr_version;   /* Address list version */
+       u32                     cm_epoch;       /* Server RxRPC epoch */
 
        /* file service access */
        rwlock_t                fs_lock;        /* access lock */
@@ -413,6 +498,26 @@ struct afs_server {
        struct hlist_head       cb_volumes;     /* List of volume interests on this server */
        unsigned                cb_s_break;     /* Break-everything counter. */
        rwlock_t                cb_break_lock;  /* Volume finding lock */
+
+       /* Probe state */
+       wait_queue_head_t       probe_wq;
+       atomic_t                probe_outstanding;
+       spinlock_t              probe_lock;
+       struct {
+               unsigned int    rtt;            /* RTT as ktime/64 */
+               u32             abort_code;
+               u32             cm_epoch;
+               short           error;
+               bool            have_result;
+               bool            responded:1;
+               bool            is_yfs:1;
+               bool            not_yfs:1;
+               bool            local_failure:1;
+               bool            no_epoch:1;
+               bool            cm_probed:1;
+               bool            said_rebooted:1;
+               bool            said_inconsistent:1;
+       } probe;
 };
 
 /*
@@ -447,8 +552,8 @@ struct afs_server_entry {
 
 struct afs_server_list {
        refcount_t              usage;
-       unsigned short          nr_servers;
-       unsigned short          index;          /* Server currently in use */
+       unsigned char           nr_servers;
+       unsigned char           preferred;      /* Preferred server */
        unsigned short          vnovol_mask;    /* Servers to be skipped due to VNOVOL */
        unsigned int            seq;            /* Set to ->servers_seq when installed */
        rwlock_t                lock;
@@ -550,6 +655,15 @@ struct afs_vnode {
        afs_callback_type_t     cb_type;        /* type of callback */
 };
 
+static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+       return vnode->cache;
+#else
+       return NULL;
+#endif
+}
+
 /*
  * cached security record for one user's attempt to access a vnode
  */
@@ -586,13 +700,31 @@ struct afs_interface {
  */
 struct afs_addr_cursor {
        struct afs_addr_list    *alist;         /* Current address list (pins ref) */
-       struct sockaddr_rxrpc   *addr;
+       unsigned long           tried;          /* Tried addresses */
+       signed char             index;          /* Current address */
+       bool                    responded;      /* T if the current address responded */
+       unsigned short          nr_iterations;  /* Number of address iterations */
+       short                   error;
        u32                     abort_code;
-       unsigned short          start;          /* Starting point in alist->addrs[] */
-       unsigned short          index;          /* Wrapping offset from start to current addr */
+};
+
+/*
+ * Cursor for iterating over a set of volume location servers.
+ */
+struct afs_vl_cursor {
+       struct afs_addr_cursor  ac;
+       struct afs_cell         *cell;          /* The cell we're querying */
+       struct afs_vlserver_list *server_list;  /* Current server list (pins ref) */
+       struct afs_vlserver     *server;        /* Server on which this resides */
+       struct key              *key;           /* Key for the server */
+       unsigned long           untried;        /* Bitmask of untried servers */
+       short                   index;          /* Current server */
        short                   error;
-       bool                    begun;          /* T if we've begun iteration */
-       bool                    responded;      /* T if the current address responded */
+       unsigned short          flags;
+#define AFS_VL_CURSOR_STOP     0x0001          /* Set to cease iteration */
+#define AFS_VL_CURSOR_RETRY    0x0002          /* Set to do a retry */
+#define AFS_VL_CURSOR_RETRIED  0x0004          /* Set if started a retry */
+       unsigned short          nr_iterations;  /* Number of server iterations */
 };
 
 /*
@@ -604,10 +736,11 @@ struct afs_fs_cursor {
        struct afs_server_list  *server_list;   /* Current server list (pins ref) */
        struct afs_cb_interest  *cbi;           /* Server on which this resides (pins ref) */
        struct key              *key;           /* Key for the server */
+       unsigned long           untried;        /* Bitmask of untried servers */
        unsigned int            cb_break;       /* cb_break + cb_s_break before the call */
        unsigned int            cb_break_2;     /* cb_break + cb_s_break (2nd vnode) */
-       unsigned char           start;          /* Initial index in server list */
-       unsigned char           index;          /* Number of servers tried beyond start */
+       short                   index;          /* Current server */
+       short                   error;
        unsigned short          flags;
 #define AFS_FS_CURSOR_STOP     0x0001          /* Set to cease iteration */
 #define AFS_FS_CURSOR_VBUSY    0x0002          /* Set if seen VBUSY */
@@ -615,6 +748,7 @@ struct afs_fs_cursor {
 #define AFS_FS_CURSOR_VNOVOL   0x0008          /* Set if seen VNOVOL */
 #define AFS_FS_CURSOR_CUR_ONLY 0x0010          /* Set if current server only (file lock held) */
 #define AFS_FS_CURSOR_NO_VSLEEP        0x0020          /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+       unsigned short          nr_iterations;  /* Number of server iterations */
 };
 
 /*
@@ -640,12 +774,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
                                                unsigned short,
                                                unsigned short);
 extern void afs_put_addrlist(struct afs_addr_list *);
-extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
-                                                 unsigned short, unsigned short);
-extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
+extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
+                                                     const char *, size_t, char,
+                                                     unsigned short, unsigned short);
+extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
 extern bool afs_iterate_addresses(struct afs_addr_cursor *);
 extern int afs_end_cursor(struct afs_addr_cursor *);
-extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
 
 extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
 extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
@@ -668,6 +802,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
  * callback.c
  */
 extern void afs_init_callback_state(struct afs_server *);
+extern void __afs_break_callback(struct afs_vnode *);
 extern void afs_break_callback(struct afs_vnode *);
 extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
 
@@ -688,10 +823,13 @@ static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
        return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
 }
 
-static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
-                                           struct afs_cb_interest *cbi)
+static inline bool afs_cb_is_broken(unsigned int cb_break,
+                                   const struct afs_vnode *vnode,
+                                   const struct afs_cb_interest *cbi)
 {
-       return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+       return !cbi || cb_break != (vnode->cb_break +
+                                   cbi->server->cb_s_break +
+                                   vnode->volume->cb_v_break);
 }
 
 /*
@@ -781,7 +919,7 @@ extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
 extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
 extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64,
                         struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64);
+extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
 extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
 extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
                          struct afs_fid *, struct afs_file_status *);
@@ -797,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *);
 extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
                                        struct afs_addr_cursor *, struct key *);
 extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
-                                  struct afs_addr_cursor *, struct key *);
+                                  struct afs_addr_cursor *, struct key *, unsigned int, bool);
 extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
                                     struct afs_fid *, struct afs_file_status *,
                                     struct afs_callback *, unsigned int,
@@ -806,6 +944,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
                               struct afs_fid *, struct afs_file_status *,
                               struct afs_callback *, struct afs_volsync *);
 
+/*
+ * fs_probe.c
+ */
+extern void afs_fileserver_probe_result(struct afs_call *);
+extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *);
+extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+
 /*
  * inode.c
  */
@@ -922,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *);
 extern void __net_exit afs_close_socket(struct afs_net *);
 extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
-extern int afs_queue_call_work(struct afs_call *);
 extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
                                            const struct afs_call_type *,
@@ -930,12 +1074,39 @@ extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
-extern int afs_protocol_error(struct afs_call *, int);
+extern int afs_extract_data(struct afs_call *, bool);
+extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
+
+static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
+{
+       call->kvec[0].iov_base = buf;
+       call->kvec[0].iov_len = size;
+       iov_iter_kvec(&call->iter, READ, call->kvec, 1, size);
+}
+
+static inline void afs_extract_to_tmp(struct afs_call *call)
+{
+       afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
+}
+
+static inline void afs_extract_to_tmp64(struct afs_call *call)
+{
+       afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64));
+}
+
+static inline void afs_extract_discard(struct afs_call *call, size_t size)
+{
+       iov_iter_discard(&call->iter, READ, size);
+}
+
+static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
+{
+       afs_extract_begin(call, call->buffer, size);
+}
 
 static inline int afs_transfer_reply(struct afs_call *call)
 {
-       return afs_extract_data(call, call->buffer, call->reply_max, false);
+       return afs_extract_data(call, false);
 }
 
 static inline bool afs_check_call_state(struct afs_call *call,
@@ -1012,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *);
 extern void afs_manage_servers(struct work_struct *);
 extern void afs_servers_timer(struct timer_list *);
 extern void __net_exit afs_purge_servers(struct afs_net *);
-extern bool afs_probe_fileserver(struct afs_fs_cursor *);
 extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
 
 /*
@@ -1039,14 +1209,51 @@ extern void afs_fs_exit(void);
 /*
  * vlclient.c
  */
-extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
-                                                        struct afs_addr_cursor *,
-                                                        struct key *, const char *, int);
-extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
-                                               struct key *, const uuid_t *);
-extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
-extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
-                                                    struct key *, const uuid_t *);
+extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
+                                                        const char *, int);
+extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
+extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *,
+                                  struct afs_vlserver *, unsigned int, bool);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
+
+/*
+ * vl_probe.c
+ */
+extern void afs_vlserver_probe_result(struct afs_call *);
+extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
+extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
+
+/*
+ * vl_rotate.c
+ */
+extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
+                                        struct afs_cell *, struct key *);
+extern bool afs_select_vlserver(struct afs_vl_cursor *);
+extern bool afs_select_current_vlserver(struct afs_vl_cursor *);
+extern int afs_end_vlserver_operation(struct afs_vl_cursor *);
+
+/*
+ * vlserver_list.c
+ */
+static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
+{
+       atomic_inc(&vlserver->usage);
+       return vlserver;
+}
+
+static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
+{
+       if (vllist)
+               atomic_inc(&vllist->usage);
+       return vllist;
+}
+
+extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short);
+extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *);
+extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int);
+extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *);
+extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
+                                                          const void *, size_t);
 
 /*
  * volume.c
@@ -1089,6 +1296,36 @@ extern int afs_launder_page(struct page *);
 extern const struct xattr_handler *afs_xattr_handlers[];
 extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
 
+/*
+ * yfsclient.c
+ */
+extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
+extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
+extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64,
+                             struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64,
+                        struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
+extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
+                         struct afs_fid *, struct afs_file_status *);
+extern int yfs_fs_rename(struct afs_fs_cursor *, const char *,
+                        struct afs_vnode *, const char *, u64, u64);
+extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
+                            pgoff_t, pgoff_t, unsigned, unsigned);
+extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
+extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
+extern int yfs_fs_extend_lock(struct afs_fs_cursor *);
+extern int yfs_fs_release_lock(struct afs_fs_cursor *);
+extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
+                              struct afs_fid *, struct afs_file_status *,
+                              struct afs_callback *, struct afs_volsync *);
+extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
+                                    struct afs_fid *, struct afs_file_status *,
+                                    struct afs_callback *, unsigned int,
+                                    struct afs_volsync *);
 
 /*
  * Miscellaneous inline functions.
@@ -1120,6 +1357,17 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
        }
 }
 
+static inline int afs_io_error(struct afs_call *call, enum afs_io_error where)
+{
+       trace_afs_io_error(call->debug_id, -EIO, where);
+       return -EIO;
+}
+
+static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
+{
+       trace_afs_file_error(vnode, -EIO, where);
+       return -EIO;
+}
 
 /*****************************************************************************/
 /*
index 99fd13500a97f9e77e2cbf603ae012bc460d1a2f..2e51c6994148f30f4ec8d858b1e318b1d58980c0 100644 (file)
@@ -130,9 +130,10 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
                        goto error_no_page;
                }
 
-               ret = -EIO;
-               if (PageError(page))
+               if (PageError(page)) {
+                       ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
                        goto error;
+               }
 
                buf = kmap_atomic(page);
                memcpy(devname, buf, size);
index 9101f62707af2da3dbff5e33c6067d0cafbb9013..be2ee3bbd0a953349ccba4a30eecbd2366b840c1 100644 (file)
 #include <linux/uaccess.h>
 #include "internal.h"
 
+struct afs_vl_seq_net_private {
+       struct seq_net_private          seq;    /* Must be first */
+       struct afs_vlserver_list        *vllist;
+};
+
 static inline struct afs_net *afs_seq2net(struct seq_file *m)
 {
        return afs_net(seq_file_net(m));
@@ -32,16 +37,24 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
  */
 static int afs_proc_cells_show(struct seq_file *m, void *v)
 {
-       struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
+       struct afs_vlserver_list *vllist;
+       struct afs_cell *cell;
 
        if (v == SEQ_START_TOKEN) {
                /* display header on line 1 */
-               seq_puts(m, "USE NAME\n");
+               seq_puts(m, "USE    TTL SV NAME\n");
                return 0;
        }
 
+       cell = list_entry(v, struct afs_cell, proc_link);
+       vllist = rcu_dereference(cell->vl_servers);
+
        /* display one cell per line on subsequent lines */
-       seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name);
+       seq_printf(m, "%3u %6lld %2u %s\n",
+                  atomic_read(&cell->usage),
+                  cell->dns_expiry - ktime_get_real_seconds(),
+                  vllist ? vllist->nr_servers : 0,
+                  cell->name);
        return 0;
 }
 
@@ -208,7 +221,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
                return 0;
        }
 
-       seq_printf(m, "%3d %08x %s\n",
+       seq_printf(m, "%3d %08llx %s\n",
                   atomic_read(&vol->usage), vol->vid,
                   afs_vol_types[vol->type]);
 
@@ -247,61 +260,102 @@ static const struct seq_operations afs_proc_cell_volumes_ops = {
        .show   = afs_proc_cell_volumes_show,
 };
 
+static const char *const dns_record_sources[NR__dns_record_source + 1] = {
+       [DNS_RECORD_UNAVAILABLE]        = "unav",
+       [DNS_RECORD_FROM_CONFIG]        = "cfg",
+       [DNS_RECORD_FROM_DNS_A]         = "A",
+       [DNS_RECORD_FROM_DNS_AFSDB]     = "AFSDB",
+       [DNS_RECORD_FROM_DNS_SRV]       = "SRV",
+       [DNS_RECORD_FROM_NSS]           = "nss",
+       [NR__dns_record_source]         = "[weird]"
+};
+
+static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = {
+       [DNS_LOOKUP_NOT_DONE]           = "no-lookup",
+       [DNS_LOOKUP_GOOD]               = "good",
+       [DNS_LOOKUP_GOOD_WITH_BAD]      = "good/bad",
+       [DNS_LOOKUP_BAD]                = "bad",
+       [DNS_LOOKUP_GOT_NOT_FOUND]      = "not-found",
+       [DNS_LOOKUP_GOT_LOCAL_FAILURE]  = "local-failure",
+       [DNS_LOOKUP_GOT_TEMP_FAILURE]   = "temp-failure",
+       [DNS_LOOKUP_GOT_NS_FAILURE]     = "ns-failure",
+       [NR__dns_lookup_status]         = "[weird]"
+};
+
 /*
  * Display the list of Volume Location servers we're using for a cell.
  */
 static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
 {
-       struct sockaddr_rxrpc *addr = v;
+       const struct afs_vl_seq_net_private *priv = m->private;
+       const struct afs_vlserver_list *vllist = priv->vllist;
+       const struct afs_vlserver_entry *entry;
+       const struct afs_vlserver *vlserver;
+       const struct afs_addr_list *alist;
+       int i;
 
-       /* display header on line 1 */
-       if (v == (void *)1) {
-               seq_puts(m, "ADDRESS\n");
+       if (v == SEQ_START_TOKEN) {
+               seq_printf(m, "# source %s, status %s\n",
+                          dns_record_sources[vllist->source],
+                          dns_lookup_statuses[vllist->status]);
                return 0;
        }
 
-       /* display one cell per line on subsequent lines */
-       seq_printf(m, "%pISp\n", &addr->transport);
+       entry = v;
+       vlserver = entry->server;
+       alist = rcu_dereference(vlserver->addresses);
+
+       seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n",
+                  vlserver->name, entry->priority, entry->weight,
+                  dns_record_sources[alist ? alist->source : entry->source],
+                  dns_lookup_statuses[alist ? alist->status : entry->status]);
+       if (alist) {
+               for (i = 0; i < alist->nr_addrs; i++)
+                       seq_printf(m, " %c %pISpc\n",
+                                  alist->preferred == i ? '>' : '-',
+                                  &alist->addrs[i].transport);
+       }
        return 0;
 }
 
 static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
        __acquires(rcu)
 {
-       struct afs_addr_list *alist;
+       struct afs_vl_seq_net_private *priv = m->private;
+       struct afs_vlserver_list *vllist;
        struct afs_cell *cell = PDE_DATA(file_inode(m->file));
        loff_t pos = *_pos;
 
        rcu_read_lock();
 
-       alist = rcu_dereference(cell->vl_addrs);
+       vllist = rcu_dereference(cell->vl_servers);
+       priv->vllist = vllist;
 
-       /* allow for the header line */
-       if (!pos)
-               return (void *) 1;
-       pos--;
+       if (pos < 0)
+               *_pos = pos = 0;
+       if (pos == 0)
+               return SEQ_START_TOKEN;
 
-       if (!alist || pos >= alist->nr_addrs)
+       if (!vllist || pos - 1 >= vllist->nr_servers)
                return NULL;
 
-       return alist->addrs + pos;
+       return &vllist->servers[pos - 1];
 }
 
 static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v,
                                          loff_t *_pos)
 {
-       struct afs_addr_list *alist;
-       struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+       struct afs_vl_seq_net_private *priv = m->private;
+       struct afs_vlserver_list *vllist = priv->vllist;
        loff_t pos;
 
-       alist = rcu_dereference(cell->vl_addrs);
-
        pos = *_pos;
-       (*_pos)++;
-       if (!alist || pos >= alist->nr_addrs)
+       pos++;
+       *_pos = pos;
+       if (!vllist || pos - 1 >= vllist->nr_servers)
                return NULL;
 
-       return alist->addrs + pos;
+       return &vllist->servers[pos - 1];
 }
 
 static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v)
@@ -337,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
                   &server->uuid,
                   atomic_read(&server->usage),
                   &alist->addrs[0].transport,
-                  alist->index == 0 ? "*" : "");
+                  alist->preferred == 0 ? "*" : "");
        for (i = 1; i < alist->nr_addrs; i++)
                seq_printf(m, "                                         %pISpc%s\n",
                           &alist->addrs[i].transport,
-                          alist->index == i ? "*" : "");
+                          alist->preferred == i ? "*" : "");
        return 0;
 }
 
@@ -562,7 +616,7 @@ int afs_proc_cell_setup(struct afs_cell *cell)
 
        if (!proc_create_net_data("vlservers", 0444, dir,
                                  &afs_proc_cell_vlservers_ops,
-                                 sizeof(struct seq_net_private),
+                                 sizeof(struct afs_vl_seq_net_private),
                                  cell) ||
            !proc_create_net_data("volumes", 0444, dir,
                                  &afs_proc_cell_volumes_ops,
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
new file mode 100644 (file)
index 0000000..07bc10f
--- /dev/null
@@ -0,0 +1,163 @@
+/* YFS protocol bits
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define YFS_FS_SERVICE 2500
+#define YFS_CM_SERVICE 2501
+
+#define YFSCBMAX 1024
+
+enum YFS_CM_Operations {
+       YFSCBProbe              = 206,  /* probe client */
+       YFSCBGetLock            = 207,  /* get contents of CM lock table */
+       YFSCBXStatsVersion      = 209,  /* get version of extended statistics */
+       YFSCBGetXStats          = 210,  /* get contents of extended statistics data */
+       YFSCBInitCallBackState3 = 213,  /* initialise callback state, version 3 */
+       YFSCBProbeUuid          = 214,  /* check the client hasn't rebooted */
+       YFSCBGetServerPrefs     = 215,
+       YFSCBGetCellServDV      = 216,
+       YFSCBGetLocalCell       = 217,
+       YFSCBGetCacheConfig     = 218,
+       YFSCBGetCellByNum       = 65537,
+       YFSCBTellMeAboutYourself = 65538, /* get client capabilities */
+       YFSCBCallBack           = 64204,
+};
+
+enum YFS_FS_Operations {
+       YFSFETCHACL             = 64131, /* YFS Fetch file ACL */
+       YFSFETCHSTATUS          = 64132, /* YFS Fetch file status */
+       YFSSTOREACL             = 64134, /* YFS Store file ACL */
+       YFSSTORESTATUS          = 64135, /* YFS Store file status */
+       YFSREMOVEFILE           = 64136, /* YFS Remove a file */
+       YFSCREATEFILE           = 64137, /* YFS Create a file */
+       YFSRENAME               = 64138, /* YFS Rename or move a file or directory */
+       YFSSYMLINK              = 64139, /* YFS Create a symbolic link */
+       YFSLINK                 = 64140, /* YFS Create a hard link */
+       YFSMAKEDIR              = 64141, /* YFS Create a directory */
+       YFSREMOVEDIR            = 64142, /* YFS Remove a directory */
+       YFSGETVOLUMESTATUS      = 64149, /* YFS Get volume status information */
+       YFSSETVOLUMESTATUS      = 64150, /* YFS Set volume status information */
+       YFSSETLOCK              = 64156, /* YFS Request a file lock */
+       YFSEXTENDLOCK           = 64157, /* YFS Extend a file lock */
+       YFSRELEASELOCK          = 64158, /* YFS Release a file lock */
+       YFSLOOKUP               = 64161, /* YFS lookup file in directory */
+       YFSFLUSHCPS             = 64165,
+       YFSFETCHOPAQUEACL       = 64168,
+       YFSWHOAMI               = 64170,
+       YFSREMOVEACL            = 64171,
+       YFSREMOVEFILE2          = 64173,
+       YFSSTOREOPAQUEACL2      = 64174,
+       YFSINLINEBULKSTATUS     = 64536, /* YFS Fetch multiple file statuses with errors */
+       YFSFETCHDATA64          = 64537, /* YFS Fetch file data */
+       YFSSTOREDATA64          = 64538, /* YFS Store file data */
+       YFSUPDATESYMLINK        = 64540,
+};
+
+struct yfs_xdr_u64 {
+       __be32                  msw;
+       __be32                  lsw;
+} __packed;
+
+static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x)
+{
+       return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw);
+}
+
+static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x)
+{
+       return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) };
+}
+
+struct yfs_xdr_vnode {
+       struct yfs_xdr_u64      lo;
+       __be32                  hi;
+       __be32                  unique;
+} __packed;
+
+struct yfs_xdr_YFSFid {
+       struct yfs_xdr_u64      volume;
+       struct yfs_xdr_vnode    vnode;
+} __packed;
+
+
+struct yfs_xdr_YFSFetchStatus {
+       __be32                  type;
+       __be32                  nlink;
+       struct yfs_xdr_u64      size;
+       struct yfs_xdr_u64      data_version;
+       struct yfs_xdr_u64      author;
+       struct yfs_xdr_u64      owner;
+       struct yfs_xdr_u64      group;
+       __be32                  mode;
+       __be32                  caller_access;
+       __be32                  anon_access;
+       struct yfs_xdr_vnode    parent;
+       __be32                  data_access_protocol;
+       struct yfs_xdr_u64      mtime_client;
+       struct yfs_xdr_u64      mtime_server;
+       __be32                  lock_count;
+       __be32                  abort_code;
+} __packed;
+
+struct yfs_xdr_YFSCallBack {
+       __be32                  version;
+       struct yfs_xdr_u64      expiration_time;
+       __be32                  type;
+} __packed;
+
+struct yfs_xdr_YFSStoreStatus {
+       __be32                  mask;
+       __be32                  mode;
+       struct yfs_xdr_u64      mtime_client;
+       struct yfs_xdr_u64      owner;
+       struct yfs_xdr_u64      group;
+} __packed;
+
+struct yfs_xdr_RPCFlags {
+       __be32                  rpc_flags;
+} __packed;
+
+struct yfs_xdr_YFSVolSync {
+       struct yfs_xdr_u64      vol_creation_date;
+       struct yfs_xdr_u64      vol_update_date;
+       struct yfs_xdr_u64      max_quota;
+       struct yfs_xdr_u64      blocks_in_use;
+       struct yfs_xdr_u64      blocks_avail;
+} __packed;
+
+enum yfs_volume_type {
+       yfs_volume_type_ro = 0,
+       yfs_volume_type_rw = 1,
+};
+
+#define yfs_FVSOnline          0x1
+#define yfs_FVSInservice       0x2
+#define yfs_FVSBlessed         0x4
+#define yfs_FVSNeedsSalvage    0x8
+
+struct yfs_xdr_YFSFetchVolumeStatus {
+       struct yfs_xdr_u64      vid;
+       struct yfs_xdr_u64      parent_id;
+       __be32                  flags;
+       __be32                  type;
+       struct yfs_xdr_u64      max_quota;
+       struct yfs_xdr_u64      blocks_in_use;
+       struct yfs_xdr_u64      part_blocks_avail;
+       struct yfs_xdr_u64      part_max_blocks;
+       struct yfs_xdr_u64      vol_copy_date;
+       struct yfs_xdr_u64      vol_backup_date;
+} __packed;
+
+struct yfs_xdr_YFSStoreVolumeStatus {
+       __be32                  mask;
+       struct yfs_xdr_u64      min_quota;
+       struct yfs_xdr_u64      max_quota;
+       struct yfs_xdr_u64      file_quota;
+} __packed;
index 1faef56b12bd3f9591b2acc29ce89c6689e224e6..00504254c1c24b6186ec676edcfd1fdcdfffbc63 100644 (file)
 #include "internal.h"
 #include "afs_fs.h"
 
-/*
- * Initialise a filesystem server cursor for iterating over FS servers.
- */
-static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-       memset(fc, 0, sizeof(*fc));
-}
-
 /*
  * Begin an operation on the fileserver.
  *
@@ -35,13 +27,14 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode
 bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
                               struct key *key)
 {
-       afs_init_fs_cursor(fc, vnode);
+       memset(fc, 0, sizeof(*fc));
        fc->vnode = vnode;
        fc->key = key;
        fc->ac.error = SHRT_MAX;
+       fc->error = -EDESTADDRREQ;
 
        if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
-               fc->ac.error = -EINTR;
+               fc->error = -EINTR;
                fc->flags |= AFS_FS_CURSOR_STOP;
                return false;
        }
@@ -65,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
        fc->server_list = afs_get_serverlist(vnode->volume->servers);
        read_unlock(&vnode->volume->servers_lock);
 
+       fc->untried = (1UL << fc->server_list->nr_servers) - 1;
+       fc->index = READ_ONCE(fc->server_list->preferred);
+
        cbi = vnode->cb_interest;
        if (cbi) {
                /* See if the vnode's preferred record is still available */
                for (i = 0; i < fc->server_list->nr_servers; i++) {
                        if (fc->server_list->servers[i].cb_interest == cbi) {
-                               fc->start = i;
+                               fc->index = i;
                                goto found_interest;
                        }
                }
@@ -80,7 +76,7 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
                 * and have to return an error.
                 */
                if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
-                       fc->ac.error = -ESTALE;
+                       fc->error = -ESTALE;
                        return false;
                }
 
@@ -94,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
 
                afs_put_cb_interest(afs_v2net(vnode), cbi);
                cbi = NULL;
-       } else {
-               fc->start = READ_ONCE(fc->server_list->index);
        }
 
 found_interest:
-       fc->index = fc->start;
        return true;
 }
 
@@ -117,7 +110,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
        default:                m = "busy";             break;
        }
 
-       pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
+       pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
 }
 
 /*
@@ -127,7 +120,7 @@ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
 {
        msleep_interruptible(1000);
        if (signal_pending(current)) {
-               fc->ac.error = -ERESTARTSYS;
+               fc->error = -ERESTARTSYS;
                return false;
        }
 
@@ -143,27 +136,32 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
        struct afs_addr_list *alist;
        struct afs_server *server;
        struct afs_vnode *vnode = fc->vnode;
+       u32 rtt, abort_code;
+       int error = fc->ac.error, i;
 
-       _enter("%u/%u,%u/%u,%d,%d",
-              fc->index, fc->start,
-              fc->ac.index, fc->ac.start,
-              fc->ac.error, fc->ac.abort_code);
+       _enter("%lx[%d],%lx[%d],%d,%d",
+              fc->untried, fc->index,
+              fc->ac.tried, fc->ac.index,
+              error, fc->ac.abort_code);
 
        if (fc->flags & AFS_FS_CURSOR_STOP) {
                _leave(" = f [stopped]");
                return false;
        }
 
+       fc->nr_iterations++;
+
        /* Evaluate the result of the previous operation, if there was one. */
-       switch (fc->ac.error) {
+       switch (error) {
        case SHRT_MAX:
                goto start;
 
        case 0:
        default:
                /* Success or local failure.  Stop. */
+               fc->error = error;
                fc->flags |= AFS_FS_CURSOR_STOP;
-               _leave(" = f [okay/local %d]", fc->ac.error);
+               _leave(" = f [okay/local %d]", error);
                return false;
 
        case -ECONNABORTED:
@@ -178,7 +176,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * - May indicate that the fileserver couldn't attach to the vol.
                         */
                        if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
-                               fc->ac.error = -EREMOTEIO;
+                               fc->error = -EREMOTEIO;
                                goto next_server;
                        }
 
@@ -187,12 +185,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                        write_unlock(&vnode->volume->servers_lock);
 
                        set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
-                       fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-                       if (fc->ac.error < 0)
-                               goto failed;
+                       error = afs_check_volume_status(vnode->volume, fc->key);
+                       if (error < 0)
+                               goto failed_set_error;
 
                        if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
-                               fc->ac.error = -ENOMEDIUM;
+                               fc->error = -ENOMEDIUM;
                                goto failed;
                        }
 
@@ -200,7 +198,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * it's the fileserver having trouble.
                         */
                        if (vnode->volume->servers == fc->server_list) {
-                               fc->ac.error = -EREMOTEIO;
+                               fc->error = -EREMOTEIO;
                                goto next_server;
                        }
 
@@ -215,7 +213,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                case VONLINE:
                case VDISKFULL:
                case VOVERQUOTA:
-                       fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+                       fc->error = afs_abort_to_error(fc->ac.abort_code);
                        goto next_server;
 
                case VOFFLINE:
@@ -224,11 +222,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                                clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
                        }
                        if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
-                               fc->ac.error = -EADV;
+                               fc->error = -EADV;
                                goto failed;
                        }
                        if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
-                               fc->ac.error = -ESTALE;
+                               fc->error = -ESTALE;
                                goto failed;
                        }
                        goto busy;
@@ -240,7 +238,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * have a file lock we need to maintain.
                         */
                        if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
-                               fc->ac.error = -EBUSY;
+                               fc->error = -EBUSY;
                                goto failed;
                        }
                        if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
@@ -269,16 +267,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * honour, just in case someone sets up a loop.
                         */
                        if (fc->flags & AFS_FS_CURSOR_VMOVED) {
-                               fc->ac.error = -EREMOTEIO;
+                               fc->error = -EREMOTEIO;
                                goto failed;
                        }
                        fc->flags |= AFS_FS_CURSOR_VMOVED;
 
                        set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
                        set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
-                       fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-                       if (fc->ac.error < 0)
-                               goto failed;
+                       error = afs_check_volume_status(vnode->volume, fc->key);
+                       if (error < 0)
+                               goto failed_set_error;
 
                        /* If the server list didn't change, then the VLDB is
                         * out of sync with the fileservers.  This is hopefully
@@ -290,7 +288,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                         * TODO: Retry a few times with sleeps.
                         */
                        if (vnode->volume->servers == fc->server_list) {
-                               fc->ac.error = -ENOMEDIUM;
+                               fc->error = -ENOMEDIUM;
                                goto failed;
                        }
 
@@ -299,20 +297,25 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                default:
                        clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
                        clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
-                       fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+                       fc->error = afs_abort_to_error(fc->ac.abort_code);
                        goto failed;
                }
 
+       case -ETIMEDOUT:
+       case -ETIME:
+               if (fc->error != -EDESTADDRREQ)
+                       goto iterate_address;
+               /* Fall through */
        case -ENETUNREACH:
        case -EHOSTUNREACH:
        case -ECONNREFUSED:
-       case -ETIMEDOUT:
-       case -ETIME:
                _debug("no conn");
+               fc->error = error;
                goto iterate_address;
 
        case -ECONNRESET:
                _debug("call reset");
+               fc->error = error;
                goto failed;
        }
 
@@ -328,15 +331,57 @@ start:
        /* See if we need to do an update of the volume record.  Note that the
         * volume may have moved or even have been deleted.
         */
-       fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-       if (fc->ac.error < 0)
-               goto failed;
+       error = afs_check_volume_status(vnode->volume, fc->key);
+       if (error < 0)
+               goto failed_set_error;
 
        if (!afs_start_fs_iteration(fc, vnode))
                goto failed;
 
-use_server:
-       _debug("use");
+       _debug("__ VOL %llx __", vnode->volume->vid);
+       error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
+       if (error < 0)
+               goto failed_set_error;
+
+pick_server:
+       _debug("pick [%lx]", fc->untried);
+
+       error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
+       if (error < 0)
+               goto failed_set_error;
+
+       /* Pick the untried server with the lowest RTT.  If we have outstanding
+        * callbacks, we stick with the server we're already using if we can.
+        */
+       if (fc->cbi) {
+               _debug("cbi %u", fc->index);
+               if (test_bit(fc->index, &fc->untried))
+                       goto selected_server;
+               afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+               fc->cbi = NULL;
+               _debug("nocbi");
+       }
+
+       fc->index = -1;
+       rtt = U32_MAX;
+       for (i = 0; i < fc->server_list->nr_servers; i++) {
+               struct afs_server *s = fc->server_list->servers[i].server;
+
+               if (!test_bit(i, &fc->untried) || !s->probe.responded)
+                       continue;
+               if (s->probe.rtt < rtt) {
+                       fc->index = i;
+                       rtt = s->probe.rtt;
+               }
+       }
+
+       if (fc->index == -1)
+               goto no_more_servers;
+
+selected_server:
+       _debug("use %d", fc->index);
+       __clear_bit(fc->index, &fc->untried);
+
        /* We're starting on a different fileserver from the list.  We need to
         * check it, create a callback intercept, find its address list and
         * probe its capabilities before we use it.
@@ -354,10 +399,10 @@ use_server:
         * break request before we've finished decoding the reply and
         * installing the vnode.
         */
-       fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
-                                                      fc->index);
-       if (fc->ac.error < 0)
-               goto failed;
+       error = afs_register_server_cb_interest(vnode, fc->server_list,
+                                               fc->index);
+       if (error < 0)
+               goto failed_set_error;
 
        fc->cbi = afs_get_cb_interest(vnode->cb_interest);
 
@@ -369,66 +414,88 @@ use_server:
 
        memset(&fc->ac, 0, sizeof(fc->ac));
 
-       /* Probe the current fileserver if we haven't done so yet. */
-       if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
-               fc->ac.alist = afs_get_addrlist(alist);
-
-               if (!afs_probe_fileserver(fc)) {
-                       switch (fc->ac.error) {
-                       case -ENOMEM:
-                       case -ERESTARTSYS:
-                       case -EINTR:
-                               goto failed;
-                       default:
-                               goto next_server;
-                       }
-               }
-       }
-
        if (!fc->ac.alist)
                fc->ac.alist = alist;
        else
                afs_put_addrlist(alist);
 
-       fc->ac.start = READ_ONCE(alist->index);
-       fc->ac.index = fc->ac.start;
+       fc->ac.index = -1;
 
 iterate_address:
        ASSERT(fc->ac.alist);
-       _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
        /* Iterate over the current server's address list to try and find an
         * address on which it will respond to us.
         */
        if (!afs_iterate_addresses(&fc->ac))
                goto next_server;
 
+       _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
+
        _leave(" = t");
        return true;
 
 next_server:
        _debug("next");
        afs_end_cursor(&fc->ac);
-       afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
-       fc->cbi = NULL;
-       fc->index++;
-       if (fc->index >= fc->server_list->nr_servers)
-               fc->index = 0;
-       if (fc->index != fc->start)
-               goto use_server;
+       goto pick_server;
 
+no_more_servers:
        /* That's all the servers poked to no good effect.  Try again if some
         * of them were busy.
         */
        if (fc->flags & AFS_FS_CURSOR_VBUSY)
                goto restart_from_beginning;
 
-       fc->ac.error = -EDESTADDRREQ;
-       goto failed;
+       abort_code = 0;
+       error = -EDESTADDRREQ;
+       for (i = 0; i < fc->server_list->nr_servers; i++) {
+               struct afs_server *s = fc->server_list->servers[i].server;
+               int probe_error = READ_ONCE(s->probe.error);
+
+               switch (probe_error) {
+               case 0:
+                       continue;
+               default:
+                       if (error == -ETIMEDOUT ||
+                           error == -ETIME)
+                               continue;
+               case -ETIMEDOUT:
+               case -ETIME:
+                       if (error == -ENOMEM ||
+                           error == -ENONET)
+                               continue;
+               case -ENOMEM:
+               case -ENONET:
+                       if (error == -ENETUNREACH)
+                               continue;
+               case -ENETUNREACH:
+                       if (error == -EHOSTUNREACH)
+                               continue;
+               case -EHOSTUNREACH:
+                       if (error == -ECONNREFUSED)
+                               continue;
+               case -ECONNREFUSED:
+                       if (error == -ECONNRESET)
+                               continue;
+               case -ECONNRESET: /* Responded, but call expired. */
+                       if (error == -ECONNABORTED)
+                               continue;
+               case -ECONNABORTED:
+                       abort_code = s->probe.abort_code;
+                       error = probe_error;
+                       continue;
+               }
+       }
+
+       if (error == -ECONNABORTED)
+               error = afs_abort_to_error(abort_code);
 
+failed_set_error:
+       fc->error = error;
 failed:
        fc->flags |= AFS_FS_CURSOR_STOP;
        afs_end_cursor(&fc->ac);
-       _leave(" = f [failed %d]", fc->ac.error);
+       _leave(" = f [failed %d]", fc->error);
        return false;
 }
 
@@ -442,13 +509,14 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
        struct afs_vnode *vnode = fc->vnode;
        struct afs_cb_interest *cbi = vnode->cb_interest;
        struct afs_addr_list *alist;
+       int error = fc->ac.error;
 
        _enter("");
 
-       switch (fc->ac.error) {
+       switch (error) {
        case SHRT_MAX:
                if (!cbi) {
-                       fc->ac.error = -ESTALE;
+                       fc->error = -ESTALE;
                        fc->flags |= AFS_FS_CURSOR_STOP;
                        return false;
                }
@@ -461,25 +529,26 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
                afs_get_addrlist(alist);
                read_unlock(&cbi->server->fs_lock);
                if (!alist) {
-                       fc->ac.error = -ESTALE;
+                       fc->error = -ESTALE;
                        fc->flags |= AFS_FS_CURSOR_STOP;
                        return false;
                }
 
                memset(&fc->ac, 0, sizeof(fc->ac));
                fc->ac.alist = alist;
-               fc->ac.start = READ_ONCE(alist->index);
-               fc->ac.index = fc->ac.start;
+               fc->ac.index = -1;
                goto iterate_address;
 
        case 0:
        default:
                /* Success or local failure.  Stop. */
+               fc->error = error;
                fc->flags |= AFS_FS_CURSOR_STOP;
-               _leave(" = f [okay/local %d]", fc->ac.error);
+               _leave(" = f [okay/local %d]", error);
                return false;
 
        case -ECONNABORTED:
+               fc->error = afs_abort_to_error(fc->ac.abort_code);
                fc->flags |= AFS_FS_CURSOR_STOP;
                _leave(" = f [abort]");
                return false;
@@ -490,6 +559,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
        case -ETIMEDOUT:
        case -ETIME:
                _debug("no conn");
+               fc->error = error;
                goto iterate_address;
        }
 
@@ -506,13 +576,66 @@ iterate_address:
        return false;
 }
 
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
+{
+       static int count;
+       int i;
+
+       if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+               return;
+       count++;
+
+       rcu_read_lock();
+
+       pr_notice("EDESTADDR occurred\n");
+       pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
+                 fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
+       pr_notice("FC: ut=%lx ix=%d ni=%u\n",
+                 fc->untried, fc->index, fc->nr_iterations);
+
+       if (fc->server_list) {
+               const struct afs_server_list *sl = fc->server_list;
+               pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
+                         sl->nr_servers, sl->preferred, sl->vnovol_mask);
+               for (i = 0; i < sl->nr_servers; i++) {
+                       const struct afs_server *s = sl->servers[i].server;
+                       pr_notice("FC: server fl=%lx av=%u %pU\n",
+                                 s->flags, s->addr_version, &s->uuid);
+                       if (s->addresses) {
+                               const struct afs_addr_list *a =
+                                       rcu_dereference(s->addresses);
+                               pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
+                                         a->version,
+                                         a->nr_ipv4, a->nr_addrs, a->max_addrs,
+                                         a->preferred);
+                               pr_notice("FC:  - pr=%lx R=%lx F=%lx\n",
+                                         a->probed, a->responded, a->failed);
+                               if (a == fc->ac.alist)
+                                       pr_notice("FC:  - current\n");
+                       }
+               }
+       }
+
+       pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+                 fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
+                 fc->ac.responded, fc->ac.nr_iterations);
+       rcu_read_unlock();
+}
+
 /*
  * Tidy up a filesystem cursor and unlock the vnode.
  */
 int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 {
        struct afs_net *net = afs_v2net(fc->vnode);
-       int ret;
+
+       if (fc->error == -EDESTADDRREQ ||
+           fc->error == -ENETUNREACH ||
+           fc->error == -EHOSTUNREACH)
+               afs_dump_edestaddrreq(fc);
 
        mutex_unlock(&fc->vnode->io_lock);
 
@@ -520,9 +643,8 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
        afs_put_cb_interest(net, fc->cbi);
        afs_put_serverlist(net, fc->server_list);
 
-       ret = fc->ac.error;
-       if (ret == -ECONNABORTED)
-               afs_abort_to_error(fc->ac.abort_code);
+       if (fc->error == -ECONNABORTED)
+               fc->error = afs_abort_to_error(fc->ac.abort_code);
 
-       return fc->ac.error;
+       return fc->error;
 }
index 77a83790a31f38c9e25ffeaa1c190eb8958e7fa9..59970886690f1b6a3767b72ec33c3c2fe81fd61f 100644 (file)
@@ -16,6 +16,7 @@
 #include <net/af_rxrpc.h>
 #include "internal.h"
 #include "afs_cm.h"
+#include "protocol_yfs.h"
 
 struct workqueue_struct *afs_async_calls;
 
@@ -75,6 +76,18 @@ int afs_open_socket(struct afs_net *net)
        if (ret < 0)
                goto error_2;
 
+       srx.srx_service = YFS_CM_SERVICE;
+       ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+       if (ret < 0)
+               goto error_2;
+
+       /* Ideally, we'd turn on service upgrade here, but we can't because
+        * OpenAFS is buggy and leaks the userStatus field from packet to
+        * packet and between FS packets and CB packets - so if we try to do an
+        * upgrade on an FS packet, OpenAFS will leak that into the CB packet
+        * it sends back to us.
+        */
+
        rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
                                           afs_rx_discard_new_call);
 
@@ -143,6 +156,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
        INIT_WORK(&call->async_work, afs_process_async_call);
        init_waitqueue_head(&call->waitq);
        spin_lock_init(&call->state_lock);
+       call->_iter = &call->iter;
 
        o = atomic_inc_return(&net->nr_outstanding_calls);
        trace_afs_call(call, afs_call_trace_alloc, 1, o,
@@ -176,6 +190,7 @@ void afs_put_call(struct afs_call *call)
 
                afs_put_server(call->net, call->cm_server);
                afs_put_cb_interest(call->net, call->cbi);
+               afs_put_addrlist(call->alist);
                kfree(call->request);
 
                trace_afs_call(call, afs_call_trace_free, 0, o,
@@ -189,21 +204,22 @@ void afs_put_call(struct afs_call *call)
 }
 
 /*
- * Queue the call for actual work.  Returns 0 unconditionally for convenience.
+ * Queue the call for actual work.
  */
-int afs_queue_call_work(struct afs_call *call)
+static void afs_queue_call_work(struct afs_call *call)
 {
-       int u = atomic_inc_return(&call->usage);
+       if (call->type->work) {
+               int u = atomic_inc_return(&call->usage);
 
-       trace_afs_call(call, afs_call_trace_work, u,
-                      atomic_read(&call->net->nr_outstanding_calls),
-                      __builtin_return_address(0));
+               trace_afs_call(call, afs_call_trace_work, u,
+                              atomic_read(&call->net->nr_outstanding_calls),
+                              __builtin_return_address(0));
 
-       INIT_WORK(&call->work, call->type->work);
+               INIT_WORK(&call->work, call->type->work);
 
-       if (!queue_work(afs_wq, &call->work))
-               afs_put_call(call);
-       return 0;
+               if (!queue_work(afs_wq, &call->work))
+                       afs_put_call(call);
+       }
 }
 
 /*
@@ -233,6 +249,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net,
                        goto nomem_free;
        }
 
+       afs_extract_to_buf(call, call->reply_max);
        call->operation_ID = type->op;
        init_waitqueue_head(&call->waitq);
        return call;
@@ -286,7 +303,7 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
                offset = 0;
        }
 
-       iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+       iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes);
 }
 
 /*
@@ -342,7 +359,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
                   gfp_t gfp, bool async)
 {
-       struct sockaddr_rxrpc *srx = ac->addr;
+       struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index];
        struct rxrpc_call *rxcall;
        struct msghdr msg;
        struct kvec iov[1];
@@ -359,6 +376,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
               atomic_read(&call->net->nr_outstanding_calls));
 
        call->async = async;
+       call->addr_ix = ac->index;
+       call->alist = afs_get_addrlist(ac->alist);
 
        /* Work out the length we're going to transmit.  This is awkward for
         * calls such as FS.StoreData where there's an extra injection of data
@@ -390,6 +409,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
                                         call->debug_id);
        if (IS_ERR(rxcall)) {
                ret = PTR_ERR(rxcall);
+               call->error = ret;
                goto error_kill_call;
        }
 
@@ -401,8 +421,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 
        msg.msg_name            = NULL;
        msg.msg_namelen         = 0;
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
-                     call->request_size);
+       iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size);
        msg.msg_control         = NULL;
        msg.msg_controllen      = 0;
        msg.msg_flags           = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
@@ -432,7 +451,7 @@ error_do_abort:
                rxrpc_kernel_abort_call(call->net->socket, rxcall,
                                        RX_USER_ABORT, ret, "KSD");
        } else {
-               iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0);
+               iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0);
                rxrpc_kernel_recv_data(call->net->socket, rxcall,
                                       &msg.msg_iter, false,
                                       &call->abort_code, &call->service_id);
@@ -442,6 +461,8 @@ error_do_abort:
        call->error = ret;
        trace_afs_call_done(call);
 error_kill_call:
+       if (call->type->done)
+               call->type->done(call);
        afs_put_call(call);
        ac->error = ret;
        _leave(" = %d", ret);
@@ -466,14 +487,12 @@ static void afs_deliver_to_call(struct afs_call *call)
               state == AFS_CALL_SV_AWAIT_ACK
               ) {
                if (state == AFS_CALL_SV_AWAIT_ACK) {
-                       struct iov_iter iter;
-
-                       iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0);
+                       iov_iter_kvec(&call->iter, READ, NULL, 0, 0);
                        ret = rxrpc_kernel_recv_data(call->net->socket,
-                                                    call->rxcall, &iter, false,
-                                                    &remote_abort,
+                                                    call->rxcall, &call->iter,
+                                                    false, &remote_abort,
                                                     &call->service_id);
-                       trace_afs_recv_data(call, 0, 0, false, ret);
+                       trace_afs_receive_data(call, &call->iter, false, ret);
 
                        if (ret == -EINPROGRESS || ret == -EAGAIN)
                                return;
@@ -485,10 +504,17 @@ static void afs_deliver_to_call(struct afs_call *call)
                        return;
                }
 
+               if (call->want_reply_time &&
+                   rxrpc_kernel_get_reply_time(call->net->socket,
+                                               call->rxcall,
+                                               &call->reply_time))
+                       call->want_reply_time = false;
+
                ret = call->type->deliver(call);
                state = READ_ONCE(call->state);
                switch (ret) {
                case 0:
+                       afs_queue_call_work(call);
                        if (state == AFS_CALL_CL_PROC_REPLY) {
                                if (call->cbi)
                                        set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
@@ -500,7 +526,6 @@ static void afs_deliver_to_call(struct afs_call *call)
                case -EINPROGRESS:
                case -EAGAIN:
                        goto out;
-               case -EIO:
                case -ECONNABORTED:
                        ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
                        goto done;
@@ -509,6 +534,10 @@ static void afs_deliver_to_call(struct afs_call *call)
                        rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
                                                abort_code, ret, "KIV");
                        goto local_abort;
+               case -EIO:
+                       pr_err("kAFS: Call %u in bad state %u\n",
+                              call->debug_id, state);
+                       /* Fall through */
                case -ENODATA:
                case -EBADMSG:
                case -EMSGSIZE:
@@ -517,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call)
                        if (state != AFS_CALL_CL_AWAIT_REPLY)
                                abort_code = RXGEN_SS_UNMARSHAL;
                        rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-                                               abort_code, -EBADMSG, "KUM");
+                                               abort_code, ret, "KUM");
                        goto local_abort;
                }
        }
 
 done:
+       if (call->type->done)
+               call->type->done(call);
        if (state == AFS_CALL_COMPLETE && call->incoming)
                afs_put_call(call);
 out:
@@ -728,6 +759,7 @@ void afs_charge_preallocation(struct work_struct *work)
                        call->async = true;
                        call->state = AFS_CALL_SV_AWAIT_OP_ID;
                        init_waitqueue_head(&call->waitq);
+                       afs_extract_to_tmp(call);
                }
 
                if (rxrpc_kernel_charge_accept(net->socket,
@@ -773,18 +805,15 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
 {
        int ret;
 
-       _enter("{%zu}", call->offset);
-
-       ASSERTCMP(call->offset, <, 4);
+       _enter("{%zu}", iov_iter_count(call->_iter));
 
        /* the operation ID forms the first four bytes of the request data */
-       ret = afs_extract_data(call, &call->tmp, 4, true);
+       ret = afs_extract_data(call, true);
        if (ret < 0)
                return ret;
 
        call->operation_ID = ntohl(call->tmp);
        afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
-       call->offset = 0;
 
        /* ask the cache manager to route the call (it'll change the call type
         * if successful) */
@@ -825,7 +854,7 @@ void afs_send_empty_reply(struct afs_call *call)
 
        msg.msg_name            = NULL;
        msg.msg_namelen         = 0;
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+       iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0);
        msg.msg_control         = NULL;
        msg.msg_controllen      = 0;
        msg.msg_flags           = 0;
@@ -864,7 +893,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
        iov[0].iov_len          = len;
        msg.msg_name            = NULL;
        msg.msg_namelen         = 0;
-       iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
+       iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
        msg.msg_control         = NULL;
        msg.msg_controllen      = 0;
        msg.msg_flags           = 0;
@@ -888,30 +917,19 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 /*
  * Extract a piece of data from the received data socket buffers.
  */
-int afs_extract_data(struct afs_call *call, void *buf, size_t count,
-                    bool want_more)
+int afs_extract_data(struct afs_call *call, bool want_more)
 {
        struct afs_net *net = call->net;
-       struct iov_iter iter;
-       struct kvec iov;
+       struct iov_iter *iter = call->_iter;
        enum afs_call_state state;
        u32 remote_abort = 0;
        int ret;
 
-       _enter("{%s,%zu},,%zu,%d",
-              call->type->name, call->offset, count, want_more);
-
-       ASSERTCMP(call->offset, <=, count);
-
-       iov.iov_base = buf + call->offset;
-       iov.iov_len = count - call->offset;
-       iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset);
+       _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more);
 
-       ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
+       ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
                                     want_more, &remote_abort,
                                     &call->service_id);
-       call->offset += (count - call->offset) - iov_iter_count(&iter);
-       trace_afs_recv_data(call, count, call->offset, want_more, ret);
        if (ret == 0 || ret == -EAGAIN)
                return ret;
 
@@ -926,7 +944,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
                        break;
                case AFS_CALL_COMPLETE:
                        kdebug("prem complete %d", call->error);
-                       return -EIO;
+                       return afs_io_error(call, afs_io_error_extract);
                default:
                        break;
                }
@@ -940,8 +958,9 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 /*
  * Log protocol error production.
  */
-noinline int afs_protocol_error(struct afs_call *call, int error)
+noinline int afs_protocol_error(struct afs_call *call, int error,
+                               enum afs_eproto_cause cause)
 {
-       trace_afs_protocol_error(call, error, __builtin_return_address(0));
+       trace_afs_protocol_error(call, error, cause);
        return error;
 }
index 81dfedb7879ff9bf56ab4fcca26ef6c90d835de2..5f58a9a17e694a09dbe0d0b70d9dbc0cc9833aa4 100644 (file)
@@ -126,7 +126,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
        bool changed = false;
        int i, j;
 
-       _enter("{%x:%u},%x,%x",
+       _enter("{%llx:%llu},%x,%x",
               vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
 
        rcu_read_lock();
@@ -147,7 +147,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                                        break;
                                }
 
-                               if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
+                               if (afs_cb_is_broken(cb_break, vnode,
+                                                    vnode->cb_interest)) {
                                        changed = true;
                                        break;
                                }
@@ -177,7 +178,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
                }
        }
 
-       if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
+       if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest))
                goto someone_else_changed_it;
 
        /* We need a ref on any permits list we want to copy as we'll have to
@@ -256,7 +257,7 @@ found:
 
        spin_lock(&vnode->lock);
        zap = rcu_access_pointer(vnode->permit_cache);
-       if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
+       if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) &&
            zap == permits)
                rcu_assign_pointer(vnode->permit_cache, replacement);
        else
@@ -289,7 +290,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
        bool valid = false;
        int i, ret;
 
-       _enter("{%x:%u},%x",
+       _enter("{%llx:%llu},%x",
               vnode->fid.vid, vnode->fid.vnode, key_serial(key));
 
        /* check the permits to see if we've got one yet */
@@ -349,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
        if (mask & MAY_NOT_BLOCK)
                return -ECHILD;
 
-       _enter("{{%x:%u},%lx},%x,",
+       _enter("{{%llx:%llu},%lx},%x,",
               vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
 
        key = afs_request_key(vnode->volume->cell);
index 1d329e6981d515c06bb5b711a1e3880226c2cce8..642afa2e9783c4f95284980dd8054610fa4d49cf 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include "afs_fs.h"
 #include "internal.h"
+#include "protocol_yfs.h"
 
 static unsigned afs_server_gc_delay = 10;      /* Server record timeout in seconds */
 static unsigned afs_server_update_delay = 30;  /* Time till VLDB recheck in secs */
@@ -230,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
        rwlock_init(&server->fs_lock);
        INIT_HLIST_HEAD(&server->cb_volumes);
        rwlock_init(&server->cb_break_lock);
+       init_waitqueue_head(&server->probe_wq);
+       spin_lock_init(&server->probe_lock);
 
        afs_inc_servers_outstanding(net);
        _leave(" = %p", server);
@@ -246,41 +249,23 @@ enomem:
 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
                                                 struct key *key, const uuid_t *uuid)
 {
-       struct afs_addr_cursor ac;
-       struct afs_addr_list *alist;
+       struct afs_vl_cursor vc;
+       struct afs_addr_list *alist = NULL;
        int ret;
 
-       ret = afs_set_vl_cursor(&ac, cell);
-       if (ret < 0)
-               return ERR_PTR(ret);
-
-       while (afs_iterate_addresses(&ac)) {
-               if (test_bit(ac.index, &ac.alist->yfs))
-                       alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
-               else
-                       alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
-               switch (ac.error) {
-               case 0:
-                       afs_end_cursor(&ac);
-                       return alist;
-               case -ECONNABORTED:
-                       ac.error = afs_abort_to_error(ac.abort_code);
-                       goto error;
-               case -ENOMEM:
-               case -ENONET:
-                       goto error;
-               case -ENETUNREACH:
-               case -EHOSTUNREACH:
-               case -ECONNREFUSED:
-                       break;
-               default:
-                       ac.error = -EIO;
-                       goto error;
+       ret = -ERESTARTSYS;
+       if (afs_begin_vlserver_operation(&vc, cell, key)) {
+               while (afs_select_vlserver(&vc)) {
+                       if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
+                               alist = afs_yfsvl_get_endpoints(&vc, uuid);
+                       else
+                               alist = afs_vl_get_addrs_u(&vc, uuid);
                }
+
+               ret = afs_end_vlserver_operation(&vc);
        }
 
-error:
-       return ERR_PTR(afs_end_cursor(&ac));
+       return ret < 0 ? ERR_PTR(ret) : alist;
 }
 
 /*
@@ -382,9 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
        struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
        struct afs_addr_cursor ac = {
                .alist  = alist,
-               .start  = alist->index,
-               .index  = 0,
-               .addr   = &alist->addrs[alist->index],
+               .index  = alist->preferred,
                .error  = 0,
        };
        _enter("%p", server);
@@ -392,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
        if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
                afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
 
+       wait_var_event(&server->probe_outstanding,
+                      atomic_read(&server->probe_outstanding) == 0);
+
        call_rcu(&server->rcu, afs_server_rcu);
        afs_dec_servers_outstanding(net);
 }
@@ -524,99 +510,6 @@ void afs_purge_servers(struct afs_net *net)
        _leave("");
 }
 
-/*
- * Probe a fileserver to find its capabilities.
- *
- * TODO: Try service upgrade.
- */
-static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
-{
-       _enter("");
-
-       fc->ac.addr = NULL;
-       fc->ac.start = READ_ONCE(fc->ac.alist->index);
-       fc->ac.index = fc->ac.start;
-       fc->ac.error = 0;
-       fc->ac.begun = false;
-
-       while (afs_iterate_addresses(&fc->ac)) {
-               afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
-                                       &fc->ac, fc->key);
-               switch (fc->ac.error) {
-               case 0:
-                       afs_end_cursor(&fc->ac);
-                       set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
-                       return true;
-               case -ECONNABORTED:
-                       fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
-                       goto error;
-               case -ENOMEM:
-               case -ENONET:
-                       goto error;
-               case -ENETUNREACH:
-               case -EHOSTUNREACH:
-               case -ECONNREFUSED:
-               case -ETIMEDOUT:
-               case -ETIME:
-                       break;
-               default:
-                       fc->ac.error = -EIO;
-                       goto error;
-               }
-       }
-
-error:
-       afs_end_cursor(&fc->ac);
-       return false;
-}
-
-/*
- * If we haven't already, try probing the fileserver to get its capabilities.
- * We try not to instigate parallel probes, but it's possible that the parallel
- * probes will fail due to authentication failure when ours would succeed.
- *
- * TODO: Try sending an anonymous probe if an authenticated probe fails.
- */
-bool afs_probe_fileserver(struct afs_fs_cursor *fc)
-{
-       bool success;
-       int ret, retries = 0;
-
-       _enter("");
-
-retry:
-       if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
-               _leave(" = t");
-               return true;
-       }
-
-       if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
-               success = afs_do_probe_fileserver(fc);
-               clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
-               wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
-               _leave(" = t");
-               return success;
-       }
-
-       _debug("wait");
-       ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
-                         TASK_INTERRUPTIBLE);
-       if (ret == -ERESTARTSYS) {
-               fc->ac.error = ret;
-               _leave(" = f [%d]", ret);
-               return false;
-       }
-
-       retries++;
-       if (retries == 4) {
-               fc->ac.error = -ESTALE;
-               _leave(" = f [stale]");
-               return false;
-       }
-       _debug("retry");
-       goto retry;
-}
-
 /*
  * Get an update for a server's address list.
  */
index 8a5760aa583213a608d686b60f0782ecbed648e1..95d0761cdb34ef3c0a214693651292ba08b2def1 100644 (file)
@@ -118,11 +118,11 @@ bool afs_annotate_server_list(struct afs_server_list *new,
        return false;
 
 changed:
-       /* Maintain the same current server as before if possible. */
-       cur = old->servers[old->index].server;
+       /* Maintain the same preferred server as before if possible. */
+       cur = old->servers[old->preferred].server;
        for (j = 0; j < new->nr_servers; j++) {
                if (new->servers[j].server == cur) {
-                       new->index = j;
+                       new->preferred = j;
                        break;
                }
        }
index 4d3e274207fb7aa05aa320b957a03911984cf67d..dcd07fe99871b9b38541293b164ac1cf8328a005 100644 (file)
@@ -406,10 +406,11 @@ static int afs_fill_super(struct super_block *sb,
                inode = afs_iget_pseudo_dir(sb, true);
                sb->s_flags     |= SB_RDONLY;
        } else {
-               sprintf(sb->s_id, "%u", as->volume->vid);
+               sprintf(sb->s_id, "%llu", as->volume->vid);
                afs_activate_volume(as->volume);
                fid.vid         = as->volume->vid;
                fid.vnode       = 1;
+               fid.vnode_hi    = 0;
                fid.unique      = 1;
                inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
        }
@@ -663,7 +664,7 @@ static void afs_destroy_inode(struct inode *inode)
 {
        struct afs_vnode *vnode = AFS_FS_I(inode);
 
-       _enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode);
+       _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode);
 
        _debug("DESTROY INODE %p", inode);
 
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
new file mode 100644 (file)
index 0000000..b4f1a84
--- /dev/null
@@ -0,0 +1,340 @@
+/* AFS vlserver list management.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
+                                       unsigned short port)
+{
+       struct afs_vlserver *vlserver;
+
+       vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
+                          GFP_KERNEL);
+       if (vlserver) {
+               atomic_set(&vlserver->usage, 1);
+               rwlock_init(&vlserver->lock);
+               init_waitqueue_head(&vlserver->probe_wq);
+               spin_lock_init(&vlserver->probe_lock);
+               vlserver->name_len = name_len;
+               vlserver->port = port;
+               memcpy(vlserver->name, name, name_len);
+       }
+       return vlserver;
+}
+
+static void afs_vlserver_rcu(struct rcu_head *rcu)
+{
+       struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
+
+       afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
+       kfree_rcu(vlserver, rcu);
+}
+
+void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
+{
+       if (vlserver) {
+               unsigned int u = atomic_dec_return(&vlserver->usage);
+               //_debug("VL PUT %p{%u}", vlserver, u);
+
+               if (u == 0)
+                       call_rcu(&vlserver->rcu, afs_vlserver_rcu);
+       }
+}
+
+struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
+{
+       struct afs_vlserver_list *vllist;
+
+       vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
+       if (vllist) {
+               atomic_set(&vllist->usage, 1);
+               rwlock_init(&vllist->lock);
+       }
+
+       return vllist;
+}
+
+void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
+{
+       if (vllist) {
+               unsigned int u = atomic_dec_return(&vllist->usage);
+
+               //_debug("VLLS PUT %p{%u}", vllist, u);
+               if (u == 0) {
+                       int i;
+
+                       for (i = 0; i < vllist->nr_servers; i++) {
+                               afs_put_vlserver(net, vllist->servers[i].server);
+                       }
+                       kfree_rcu(vllist, rcu);
+               }
+       }
+}
+
+static u16 afs_extract_le16(const u8 **_b)
+{
+       u16 val;
+
+       val  = (u16)*(*_b)++ << 0;
+       val |= (u16)*(*_b)++ << 8;
+       return val;
+}
+
+/*
+ * Build a VL server address list from a DNS queried server list.
+ */
+static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
+                                                 u8 nr_addrs, u16 port)
+{
+       struct afs_addr_list *alist;
+       const u8 *b = *_b;
+       int ret = -EINVAL;
+
+       alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
+       if (!alist)
+               return ERR_PTR(-ENOMEM);
+       if (nr_addrs == 0)
+               return alist;
+
+       for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) {
+               struct dns_server_list_v1_address hdr;
+               __be32 x[4];
+
+               hdr.address_type = *b++;
+
+               switch (hdr.address_type) {
+               case DNS_ADDRESS_IS_IPV4:
+                       if (end - b < 4) {
+                               _leave(" = -EINVAL [short inet]");
+                               goto error;
+                       }
+                       memcpy(x, b, 4);
+                       afs_merge_fs_addr4(alist, x[0], port);
+                       b += 4;
+                       break;
+
+               case DNS_ADDRESS_IS_IPV6:
+                       if (end - b < 16) {
+                               _leave(" = -EINVAL [short inet6]");
+                               goto error;
+                       }
+                       memcpy(x, b, 16);
+                       afs_merge_fs_addr6(alist, x, port);
+                       b += 16;
+                       break;
+
+               default:
+                       _leave(" = -EADDRNOTAVAIL [unknown af %u]",
+                              hdr.address_type);
+                       ret = -EADDRNOTAVAIL;
+                       goto error;
+               }
+       }
+
+       /* Start with IPv6 if available. */
+       if (alist->nr_ipv4 < alist->nr_addrs)
+               alist->preferred = alist->nr_ipv4;
+
+       *_b = b;
+       return alist;
+
+error:
+       *_b = b;
+       afs_put_addrlist(alist);
+       return ERR_PTR(ret);
+}
+
+/*
+ * Build a VL server list from a DNS queried server list.
+ */
+struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
+                                                   const void *buffer,
+                                                   size_t buffer_size)
+{
+       const struct dns_server_list_v1_header *hdr = buffer;
+       struct dns_server_list_v1_server bs;
+       struct afs_vlserver_list *vllist, *previous;
+       struct afs_addr_list *addrs;
+       struct afs_vlserver