| From 9883035ae7edef3ec62ad215611cb8e17d6a1a5d Mon Sep 17 00:00:00 2001 |
| From: Linus Torvalds <torvalds@linux-foundation.org> |
| Date: Sun, 29 Apr 2012 13:12:42 -0700 |
| Subject: pipes: add a "packetized pipe" mode for writing |
| |
| From: Linus Torvalds <torvalds@linux-foundation.org> |
| |
| commit 9883035ae7edef3ec62ad215611cb8e17d6a1a5d upstream. |
| |
| The actual internal pipe implementation is already really about |
| individual packets (called "pipe buffers"), and this simply exposes that |
| as a special packetized mode. |
| |
| When we are in the packetized mode (marked by O_DIRECT as suggested by |
| Alan Cox), a write() on a pipe will not merge the new data with previous |
| writes, so each write will get a pipe buffer of its own. The pipe |
| buffer is then marked with the PIPE_BUF_FLAG_PACKET flag, which in turn |
| will tell the reader side to break the read at that boundary (and throw |
| away any partial packet contents that do not fit in the read buffer). |
| |
| End result: as long as you do writes less than PIPE_BUF in size (so that |
| the pipe doesn't have to split them up), you can now treat the pipe as a |
| packet interface, where each read() system call will read one packet at |
| a time. You can just use a sufficiently big read buffer (PIPE_BUF is |
| sufficient, since bigger than that doesn't guarantee atomicity anyway), |
| and the return value of the read() will naturally give you the size of |
| the packet. |
| |
| NOTE! We do not support zero-sized packets, and zero-sized reads and |
| writes to a pipe continue to be no-ops. Also note that big packets will |
| currently be split at write time, but that the size at which that |
| happens is not really specified (except that it's bigger than PIPE_BUF). |
| Currently that limit is the system page size, but we might want to |
| explicitly support bigger packets some day. |
| |
| The main user for this is going to be the autofs packet interface, |
| allowing us to stop having to care so deeply about exact packet sizes |
| (which have had bugs with 32/64-bit compatibility modes). But user |
| space can create packetized pipes with "pipe2(fd, O_DIRECT)", which will |
| fail with an EINVAL on kernels that do not support this interface. |
| |
| Tested-by: Michael Tokarev <mjt@tls.msk.ru> |
| Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> |
| Cc: David Miller <davem@davemloft.net> |
| Cc: Ian Kent <raven@themaw.net> |
| Cc: Thomas Meyer <thomas@m3y3r.de> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| fs/pipe.c | 31 +++++++++++++++++++++++++++++-- |
| include/linux/pipe_fs_i.h | 1 + |
| 2 files changed, 30 insertions(+), 2 deletions(-) |
| |
| --- a/fs/pipe.c |
| +++ b/fs/pipe.c |
| @@ -345,6 +345,16 @@ static const struct pipe_buf_operations |
| .get = generic_pipe_buf_get, |
| }; |
| |
| +static const struct pipe_buf_operations packet_pipe_buf_ops = { |
| + .can_merge = 0, |
| + .map = generic_pipe_buf_map, |
| + .unmap = generic_pipe_buf_unmap, |
| + .confirm = generic_pipe_buf_confirm, |
| + .release = anon_pipe_buf_release, |
| + .steal = generic_pipe_buf_steal, |
| + .get = generic_pipe_buf_get, |
| +}; |
| + |
| static ssize_t |
| pipe_read(struct kiocb *iocb, const struct iovec *_iov, |
| unsigned long nr_segs, loff_t pos) |
| @@ -406,6 +416,13 @@ redo: |
| ret += chars; |
| buf->offset += chars; |
| buf->len -= chars; |
| + |
| + /* Was it a packet buffer? Clean up and exit */ |
| + if (buf->flags & PIPE_BUF_FLAG_PACKET) { |
| + total_len = chars; |
| + buf->len = 0; |
| + } |
| + |
| if (!buf->len) { |
| buf->ops = NULL; |
| ops->release(pipe, buf); |
| @@ -458,6 +475,11 @@ redo: |
| return ret; |
| } |
| |
| +static inline int is_packetized(struct file *file) |
| +{ |
| + return (file->f_flags & O_DIRECT) != 0; |
| +} |
| + |
| static ssize_t |
| pipe_write(struct kiocb *iocb, const struct iovec *_iov, |
| unsigned long nr_segs, loff_t ppos) |
| @@ -592,6 +614,11 @@ redo2: |
| buf->ops = &anon_pipe_buf_ops; |
| buf->offset = 0; |
| buf->len = chars; |
| + buf->flags = 0; |
| + if (is_packetized(filp)) { |
| + buf->ops = &packet_pipe_buf_ops; |
| + buf->flags = PIPE_BUF_FLAG_PACKET; |
| + } |
| pipe->nrbufs = ++bufs; |
| pipe->tmp_page = NULL; |
| |
| @@ -1012,7 +1039,7 @@ struct file *create_write_pipe(int flags |
| goto err_dentry; |
| f->f_mapping = inode->i_mapping; |
| |
| - f->f_flags = O_WRONLY | (flags & O_NONBLOCK); |
| + f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); |
| f->f_version = 0; |
| |
| return f; |
| @@ -1056,7 +1083,7 @@ int do_pipe_flags(int *fd, int flags) |
| int error; |
| int fdw, fdr; |
| |
| - if (flags & ~(O_CLOEXEC | O_NONBLOCK)) |
| + if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) |
| return -EINVAL; |
| |
| fw = create_write_pipe(flags); |
| --- a/include/linux/pipe_fs_i.h |
| +++ b/include/linux/pipe_fs_i.h |
| @@ -8,6 +8,7 @@ |
| #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ |
| #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ |
| #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ |
| +#define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */ |
| |
| /** |
| * struct pipe_buffer - a linux kernel pipe buffer |