Index: sys/kern/sys_pipe.c =================================================================== --- sys/kern/sys_pipe.c (revision 228609) +++ sys/kern/sys_pipe.c (working copy) @@ -2,6 +2,9 @@ * Copyright (c) 1996 John S. Dyson * All rights reserved. * + * Copyright (c) 2011 Giovanni Trematerra + * All rights reserved. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -128,6 +131,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + /* * Use this define if you want to disable *fancy* VM things. Expect an * approx 30% decrease in transfer rate. This could be useful for @@ -147,7 +152,7 @@ static fo_kqfilter_t pipe_kqfilter; static fo_stat_t pipe_stat; static fo_close_t pipe_close; -static struct fileops pipeops = { +struct fileops pipeops = { .fo_read = pipe_read, .fo_write = pipe_write, .fo_truncate = pipe_truncate, @@ -164,6 +169,8 @@ static fo_close_t pipe_close; static void filt_pipedetach(struct knote *kn); static int filt_piperead(struct knote *kn, long hint); static int filt_pipewrite(struct knote *kn, long hint); +static void filt_pipedetach_notsup(struct knote *kn); +static int filt_pipenotsup(struct knote *kn, long hint); static struct filterops pipe_rfiltops = { .f_isfd = 1, @@ -175,6 +182,11 @@ static struct filterops pipe_wfiltops = { .f_detach = filt_pipedetach, .f_event = filt_pipewrite }; +static struct filterops pipe_nfiltops = { + .f_isfd = 1, + .f_detach = filt_pipedetach_notsup, + .f_event = filt_pipenotsup +}; /* * Default pipe buffer size(s), this can be kind-of large now because pipe @@ -224,7 +236,13 @@ static int pipe_zone_ctor(void *mem, int size, voi static int pipe_zone_init(void *mem, int size, int flags); static void pipe_zone_fini(void *mem, int size); -static uma_zone_t pipe_zone; +static int pipe_paircreate(struct thread *td, struct pipe **p_rpipe, + struct pipe **p_wpipe); +static void pipe_makeinfo(struct pipeinfo **ppip, struct pipe *rpipe, + struct pipe *wpipe); +static void pipe_destroyinfo(struct pipeinfo *pip); + +static uma_zone_t pipe_zone, pipeinfo_zone; static struct unrhdr *pipeino_unr; static dev_t pipedev_ino; @@ -238,6 +256,9 @@ pipeinit(void *dummy __unused) pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, UMA_ALIGN_PTR, 0); KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); + pipeinfo_zone = uma_zcreate("pipeinfo", sizeof(struct pipeinfo), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + KASSERT(pipeinfo_zone != NULL, ("pipeinfo_zone not initialized")); pipeino_unr = new_unrhdr(1, INT32_MAX, NULL); KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized")); pipedev_ino = devfs_alloc_cdp_inode(); @@ -261,13 +282,9 @@ pipe_zone_ctor(void *mem, int size, void *arg, int */ rpipe = &pp->pp_rpipe; bzero(rpipe, sizeof(*rpipe)); - vfs_timestamp(&rpipe->pipe_ctime); - rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; wpipe = &pp->pp_wpipe; bzero(wpipe, sizeof(*wpipe)); - wpipe->pipe_ctime = rpipe->pipe_ctime; - wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; rpipe->pipe_peer = wpipe; rpipe->pipe_pair = pp; @@ -317,18 +334,36 @@ pipe_zone_fini(void *mem, int size) mtx_destroy(&pp->pp_mtx); } -/* - * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let - * the zone pick up the pieces via pipeclose(). - */ -int -kern_pipe(struct thread *td, int fildes[2]) +static void +pipe_makeinfo(struct pipeinfo **ppip, struct pipe *rpipe, struct pipe *wpipe) { - struct filedesc *fdp = td->td_proc->p_fd; - struct file *rf, *wf; + struct pipeinfo *pip; + + *ppip = pip = uma_zalloc(pipeinfo_zone, M_WAITOK); + pip->pi_rpipe = rpipe; + pip->pi_wpipe = wpipe; + vfs_timestamp(&pip->pi_ctime); + pip->pi_atime = pip->pi_mtime = pip->pi_ctime; + pip->pi_ino = -1; +} + +static void +pipe_destroyinfo(struct pipeinfo *pip) +{ + ino_t ino; + + ino = pip->pi_ino; + uma_zfree(pipeinfo_zone, pip); + if (ino != 0 && ino != (ino_t)-1) + free_unr(pipeino_unr, ino); +} + +static int +pipe_paircreate(struct thread *td, struct pipe **p_rpipe, struct pipe **p_wpipe) +{ struct pipepair *pp; struct pipe *rpipe, *wpipe; - int fd, error; + int error; pp = uma_zalloc(pipe_zone, M_WAITOK); #ifdef MAC @@ -340,8 +375,8 @@ pipe_zone_fini(void *mem, int size) mac_pipe_init(pp); mac_pipe_create(td->td_ucred, pp); #endif - rpipe = &pp->pp_rpipe; - wpipe = &pp->pp_wpipe; + *p_rpipe = rpipe = &pp->pp_rpipe; + *p_wpipe = wpipe = &pp->pp_wpipe; knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); @@ -356,11 +391,64 @@ pipe_zone_fini(void *mem, int size) rpipe->pipe_state |= PIPE_DIRECTOK; wpipe->pipe_state |= PIPE_DIRECTOK; + return (0); +} +int +pipe_ctor(struct pipeinfo **ppip, struct thread *td) +{ + struct pipe *rpipe, *wpipe; + int error; + + error = pipe_paircreate(td, &rpipe, &wpipe); + if (error != 0) + return (error); + pipe_makeinfo(ppip, rpipe, wpipe); + return (0); +} + +void +pipe_destroy(struct pipeinfo *pip) +{ + struct pipe *rpipe; + struct pipe *wpipe; + + rpipe = pip->pi_rpipe; + wpipe = pip->pi_wpipe; + funsetown(&rpipe->pipe_sigio); + pipeclose(rpipe); + if (rpipe != wpipe) { + funsetown(&wpipe->pipe_sigio); + pipeclose(wpipe); + } + pipe_destroyinfo(pip); +} + +/* + * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let + * the zone pick up the pieces via pipeclose(). + */ +int +kern_pipe(struct thread *td, int fildes[2]) +{ + struct filedesc *fdp = td->td_proc->p_fd; + struct file *rf, *wf; + struct pipeinfo *pip_fd0, *pip_fd1; + struct pipe *rpipe, *wpipe; + int fd, error; + + error = pipe_paircreate(td, &rpipe, &wpipe); + if (error != 0) + return (error); + + pipe_makeinfo(&pip_fd0, rpipe, rpipe); + pipe_makeinfo(&pip_fd1, wpipe, wpipe); error = falloc(td, &rf, &fd, 0); if (error) { pipeclose(rpipe); pipeclose(wpipe); + pipe_destroyinfo(pip_fd0); + pipe_destroyinfo(pip_fd1); return (error); } /* An extra reference on `rf' has been held for us by falloc(). */ @@ -372,17 +460,18 @@ pipe_zone_fini(void *mem, int size) * to avoid races against processes which manage to dup() the read * side while we are blocked trying to allocate the write side. */ - finit(rf, FREAD | FWRITE, DTYPE_PIPE, rpipe, &pipeops); + finit(rf, FREAD | FWRITE, DTYPE_PIPE, pip_fd0, &pipeops); error = falloc(td, &wf, &fd, 0); if (error) { fdclose(fdp, rf, fildes[0], td); fdrop(rf, td); /* rpipe has been closed by fdrop(). */ pipeclose(wpipe); + pipe_destroyinfo(pip_fd1); return (error); } /* An extra reference on `wf' has been held for us by falloc(). */ - finit(wf, FREAD | FWRITE, DTYPE_PIPE, wpipe, &pipeops); + finit(wf, FREAD | FWRITE, DTYPE_PIPE, pip_fd1, &pipeops); fdrop(wf, td); fildes[1] = fd; fdrop(rf, td); @@ -569,7 +658,6 @@ pipe_create(pipe, backing) /* If we're not backing this pipe, no need to do anything. */ error = 0; } - pipe->pipe_ino = -1; return (error); } @@ -582,7 +670,8 @@ pipe_read(fp, uio, active_cred, flags, td) struct thread *td; int flags; { - struct pipe *rpipe = fp->f_data; + struct pipeinfo *pip = fp->f_data; + struct pipe *rpipe = pip->pi_rpipe; int error; int nread = 0; u_int size; @@ -720,7 +809,7 @@ locked_error: /* XXX: should probably do this before getting any locks. */ if (error == 0) - vfs_timestamp(&rpipe->pipe_atime); + vfs_timestamp(&pip->pi_atime); unlocked_error: --rpipe->pipe_busy; @@ -961,9 +1050,11 @@ pipe_write(fp, uio, active_cred, flags, td) { int error = 0; int desiredsize, orig_resid; + struct pipeinfo *pip; struct pipe *wpipe, *rpipe; - rpipe = fp->f_data; + pip = fp->f_data; + rpipe = pip->pi_wpipe; wpipe = rpipe->pipe_peer; PIPE_LOCK(rpipe); @@ -1219,7 +1310,7 @@ pipe_write(fp, uio, active_cred, flags, td) } if (error == 0) - vfs_timestamp(&wpipe->pipe_mtime); + vfs_timestamp(&pip->pi_mtime); /* * We have something to offer, @@ -1256,9 +1347,13 @@ pipe_ioctl(fp, cmd, data, active_cred, td) struct ucred *active_cred; struct thread *td; { - struct pipe *mpipe = fp->f_data; + struct pipeinfo *pip = fp->f_data; + struct pipe *mpipe; + struct pipe *opipe; int error; + mpipe = pip->pi_rpipe; + opipe = pip->pi_wpipe; PIPE_LOCK(mpipe); #ifdef MAC @@ -1273,41 +1368,76 @@ pipe_ioctl(fp, cmd, data, active_cred, td) switch (cmd) { case FIONBIO: + /* + * Non-blocking I/O is implemented at the fifo layer using + * MSG_NBIO, so does not need to be forwarded down the stack. + */ break; case FIOASYNC: - if (*(int *)data) { - mpipe->pipe_state |= PIPE_ASYNC; - } else { - mpipe->pipe_state &= ~PIPE_ASYNC; + if (fp->f_flag & FREAD) { + if (*(int *)data) { + mpipe->pipe_state |= PIPE_ASYNC; + } else { + mpipe->pipe_state &= ~PIPE_ASYNC; + } } + if (mpipe != opipe && (fp->f_flag & FWRITE)) { + if (*(int *)data) { + opipe->pipe_state |= PIPE_ASYNC; + } else { + opipe->pipe_state &= ~PIPE_ASYNC; + } + } break; case FIONREAD: - if (mpipe->pipe_state & PIPE_DIRECTW) - *(int *)data = mpipe->pipe_map.cnt; - else - *(int *)data = mpipe->pipe_buffer.cnt; + /* + * FIONREAD will return 0 for non-readable descriptors, and + * the results of FIONREAD on the read socket for readable + * descriptors. + */ + if (!(fp->f_flag & FREAD)) { + *(int *)data = 0; + PIPE_UNLOCK(mpipe); + return (0); + } + *(int *)data = PIPE_CNT(mpipe); break; case FIOSETOWN: - PIPE_UNLOCK(mpipe); - error = fsetown(*(int *)data, &mpipe->pipe_sigio); - goto out_unlocked; + if (fp->f_flag & FREAD) { + error = fsetown(*(int *)data, &mpipe->pipe_sigio); + if (error) + break; + } + if (mpipe != opipe && (fp->f_flag & FWRITE)) + error = fsetown(*(int *)data, &opipe->pipe_sigio); + break; case FIOGETOWN: - *(int *)data = fgetown(&mpipe->pipe_sigio); + if (fp->f_flag & FREAD) + *(int *)data = fgetown(&mpipe->pipe_sigio); + if (mpipe != opipe && fp->f_flag & FWRITE) + *(int *)data = fgetown(&opipe->pipe_sigio); break; /* This is deprecated, FIOSETOWN should be used instead. */ case TIOCSPGRP: - PIPE_UNLOCK(mpipe); - error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); - goto out_unlocked; + if (fp->f_flag & FREAD) { + error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); + break; + } + if (mpipe != opipe && fp->f_flag & FWRITE) + error = fsetown(-(*(int *)data), &opipe->pipe_sigio); + break; /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: - *(int *)data = -fgetown(&mpipe->pipe_sigio); + if (fp->f_flag & FREAD) + *(int *)data = -fgetown(&mpipe->pipe_sigio); + if (mpipe != opipe && fp->f_flag & FWRITE) + *(int *)data = -fgetown(&opipe->pipe_sigio); break; default: @@ -1315,7 +1445,7 @@ pipe_ioctl(fp, cmd, data, active_cred, td) break; } PIPE_UNLOCK(mpipe); -out_unlocked: + return (error); } @@ -1326,58 +1456,66 @@ pipe_poll(fp, events, active_cred, td) struct ucred *active_cred; struct thread *td; { - struct pipe *rpipe = fp->f_data; + struct pipeinfo *pip = fp->f_data; + struct pipe *rpipe; struct pipe *wpipe; int revents = 0; #ifdef MAC int error; #endif - wpipe = rpipe->pipe_peer; + rpipe = pip->pi_rpipe; + wpipe = pip->pi_wpipe->pipe_peer; PIPE_LOCK(rpipe); #ifdef MAC error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); if (error) - goto locked_error; + return (0); #endif - if (events & (POLLIN | POLLRDNORM)) - if ((rpipe->pipe_state & PIPE_DIRECTW) || - (rpipe->pipe_buffer.cnt > 0)) - revents |= events & (POLLIN | POLLRDNORM); + if (fp->f_flag & FREAD) { + if (events & (POLLIN | POLLRDNORM)) + if ((rpipe->pipe_state & PIPE_DIRECTW) || + (rpipe->pipe_buffer.cnt > 0)) + revents |= events & (POLLIN | POLLRDNORM); - if (events & (POLLOUT | POLLWRNORM)) - if (wpipe->pipe_present != PIPE_ACTIVE || - (wpipe->pipe_state & PIPE_EOF) || - (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && - ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF || - wpipe->pipe_buffer.size == 0))) - revents |= events & (POLLOUT | POLLWRNORM); + PIPE_UNLOCK(rpipe); + if (fifo_iseof(fp)) + events |= POLLINIGNEOF; + PIPE_LOCK(rpipe); - if ((events & POLLINIGNEOF) == 0) { - if (rpipe->pipe_state & PIPE_EOF) { - revents |= (events & (POLLIN | POLLRDNORM)); - if (wpipe->pipe_present != PIPE_ACTIVE || - (wpipe->pipe_state & PIPE_EOF)) - revents |= POLLHUP; + if ((events & POLLINIGNEOF) == 0) { + if (rpipe->pipe_state & PIPE_EOF) { + revents |= (events & (POLLIN | POLLRDNORM)); + if (wpipe->pipe_present != PIPE_ACTIVE || + (wpipe->pipe_state & PIPE_EOF)) + revents |= POLLHUP; + } } } + if (fp->f_flag & FWRITE) + if (events & (POLLOUT | POLLWRNORM)) + if (wpipe->pipe_present != PIPE_ACTIVE || + (wpipe->pipe_state & PIPE_EOF) || + (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && + ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= + PIPE_BUF || wpipe->pipe_buffer.size == 0))) + revents |= events & (POLLOUT | POLLWRNORM); if (revents == 0) { - if (events & (POLLIN | POLLRDNORM)) { - selrecord(td, &rpipe->pipe_sel); - if (SEL_WAITING(&rpipe->pipe_sel)) - rpipe->pipe_state |= PIPE_SEL; - } + if (fp->f_flag & FREAD) + if (events & (POLLIN | POLLRDNORM)) { + selrecord(td, &rpipe->pipe_sel); + if (SEL_WAITING(&rpipe->pipe_sel)) + rpipe->pipe_state |= PIPE_SEL; + } - if (events & (POLLOUT | POLLWRNORM)) { - selrecord(td, &wpipe->pipe_sel); - if (SEL_WAITING(&wpipe->pipe_sel)) - wpipe->pipe_state |= PIPE_SEL; - } + if (fp->f_flag & FWRITE) + if (events & (POLLOUT | POLLWRNORM)) { + selrecord(td, &wpipe->pipe_sel); + if (SEL_WAITING(&wpipe->pipe_sel)) + wpipe->pipe_state |= PIPE_SEL; + } } -#ifdef MAC -locked_error: -#endif PIPE_UNLOCK(rpipe); return (revents); @@ -1394,18 +1532,19 @@ pipe_stat(fp, ub, active_cred, td) struct ucred *active_cred; struct thread *td; { - struct pipe *pipe; + struct pipeinfo *pip = fp->f_data; + struct pipe *spipe; int new_unr; #ifdef MAC int error; #endif - pipe = fp->f_data; - PIPE_LOCK(pipe); + spipe = (fp->f_flag & FWRITE) ? pip->pi_rpipe : pip->pi_wpipe; + PIPE_LOCK(spipe); #ifdef MAC - error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); + error = mac_pipe_check_stat(active_cred, spipe->pipe_pair); if (error) { - PIPE_UNLOCK(pipe); + PIPE_UNLOCK(spipe); return (error); } #endif @@ -1419,30 +1558,28 @@ pipe_stat(fp, ub, active_cred, td) * -1 - not yet initialized; * 0 - alloc_unr failed, return 0 as st_ino forever. */ - if (pipe->pipe_ino == (ino_t)-1) { + if (pip->pi_ino == (ino_t)-1) { new_unr = alloc_unr(pipeino_unr); if (new_unr != -1) - pipe->pipe_ino = new_unr; + pip->pi_ino = new_unr; else - pipe->pipe_ino = 0; + pip->pi_ino = 0; } - PIPE_UNLOCK(pipe); + PIPE_UNLOCK(spipe); bzero(ub, sizeof(*ub)); ub->st_mode = S_IFIFO; ub->st_blksize = PAGE_SIZE; - if (pipe->pipe_state & PIPE_DIRECTW) - ub->st_size = pipe->pipe_map.cnt; - else - ub->st_size = pipe->pipe_buffer.cnt; + ub->st_size = PIPE_CNT(spipe); ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; - ub->st_atim = pipe->pipe_atime; - ub->st_mtim = pipe->pipe_mtime; - ub->st_ctim = pipe->pipe_ctime; + ub->st_atim = pip->pi_atime; + ub->st_mtim = pip->pi_mtime; + ub->st_ctim = pip->pi_ctime; ub->st_uid = fp->f_cred->cr_uid; ub->st_gid = fp->f_cred->cr_gid; ub->st_dev = pipedev_ino; - ub->st_ino = pipe->pipe_ino; + ub->st_ino = pip->pi_ino; + /* * Left as 0: st_nlink, st_rdev, st_flags, st_gen. */ @@ -1455,13 +1592,17 @@ pipe_close(fp, td) struct file *fp; struct thread *td; { - struct pipe *cpipe = fp->f_data; + int error; - fp->f_ops = &badfileops; - fp->f_data = NULL; - funsetown(&cpipe->pipe_sigio); - pipeclose(cpipe); - return (0); + if (fp->f_vnode == NULL) { + fp->f_ops = &badfileops; + pipe_destroy(fp->f_data); + fp->f_data = NULL; + error = 0; + } else + error = vnops.fo_close(fp, td); + + return (error); } static void @@ -1497,7 +1638,6 @@ pipeclose(cpipe) { struct pipepair *pp; struct pipe *ppipe; - ino_t ino; KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); @@ -1556,12 +1696,6 @@ pipeclose(cpipe) knlist_destroy(&cpipe->pipe_sel.si_note); /* - * Postpone the destroy of the fake inode number allocated for - * our end, until pipe mtx is unlocked. - */ - ino = cpipe->pipe_ino; - - /* * If both endpoints are now closed, release the memory for the * pipe pair. If not, unlock. */ @@ -1573,18 +1707,31 @@ pipeclose(cpipe) uma_zfree(pipe_zone, cpipe->pipe_pair); } else PIPE_UNLOCK(cpipe); - - if (ino != 0 && ino != (ino_t)-1) - free_unr(pipeino_unr, ino); } /*ARGSUSED*/ static int pipe_kqfilter(struct file *fp, struct knote *kn) { - struct pipe *cpipe; + struct pipeinfo *pip = fp->f_data; + struct pipe *cpipe, *rpipe; - cpipe = kn->kn_fp->f_data; + rpipe = cpipe = pip->pi_rpipe; + + /* + * If a filter is requested that is not supported by this file + * descriptor, don't return an error, but also don't ever generate an + * event. + */ + if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) { + kn->kn_fop = &pipe_nfiltops; + return (0); + } + if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) { + kn->kn_fop = &pipe_nfiltops; + return (0); + } + PIPE_LOCK(cpipe); switch (kn->kn_filter) { case EVFILT_READ: @@ -1598,13 +1745,20 @@ pipe_kqfilter(struct file *fp, struct knote *kn) return (EPIPE); } cpipe = cpipe->pipe_peer; + break; default: PIPE_UNLOCK(cpipe); return (EINVAL); } - knlist_add(&cpipe->pipe_sel.si_note, kn, 1); + if (pip->pi_rpipe == pip->pi_wpipe) { + kn->kn_hook = (void *)rpipe; + knlist_add(&cpipe->pipe_sel.si_note, kn, 1); + } else { + kn->kn_hook = (void *)cpipe; + knlist_add(&rpipe->pipe_sel.si_note, kn, 1); + } PIPE_UNLOCK(cpipe); return (0); } @@ -1612,7 +1766,7 @@ pipe_kqfilter(struct file *fp, struct knote *kn) static void filt_pipedetach(struct knote *kn) { - struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; + struct pipe *cpipe = kn->kn_hook; PIPE_LOCK(cpipe); if (kn->kn_filter == EVFILT_WRITE) @@ -1625,7 +1779,7 @@ filt_pipedetach(struct knote *kn) static int filt_piperead(struct knote *kn, long hint) { - struct pipe *rpipe = kn->kn_fp->f_data; + struct pipe *rpipe = (struct pipe *)kn->kn_hook; struct pipe *wpipe = rpipe->pipe_peer; int ret; @@ -1650,7 +1804,7 @@ filt_piperead(struct knote *kn, long hint) static int filt_pipewrite(struct knote *kn, long hint) { - struct pipe *rpipe = kn->kn_fp->f_data; + struct pipe *rpipe = (struct pipe *)kn->kn_hook; struct pipe *wpipe = rpipe->pipe_peer; PIPE_LOCK(rpipe); @@ -1669,3 +1823,16 @@ filt_pipewrite(struct knote *kn, long hint) PIPE_UNLOCK(rpipe); return (kn->kn_data >= PIPE_BUF); } + +static void +filt_pipedetach_notsup(struct knote *kn) +{ + +} + +static int +filt_pipenotsup(struct knote *kn, long hint) +{ + + return (0); +} Index: sys/fs/fifofs/fifo.h =================================================================== --- sys/fs/fifofs/fifo.h (revision 228609) +++ sys/fs/fifofs/fifo.h (working copy) @@ -35,4 +35,5 @@ */ int fifo_vnoperate(struct vop_generic_args *); int fifo_printinfo(struct vnode *); +int fifo_iseof(struct file *); Index: sys/fs/fifofs/fifo_vnops.c =================================================================== --- sys/fs/fifofs/fifo_vnops.c (revision 228609) +++ sys/fs/fifofs/fifo_vnops.c (working copy) @@ -3,6 +3,8 @@ * The Regents of the University of California. * Copyright (c) 2005 Robert N. M. Watson * All rights reserved. + * Copyright (c) 2011 Giovanni Trematerra + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -42,11 +44,9 @@ #include #include #include -#include +#include #include #include -#include -#include #include #include #include @@ -54,74 +54,28 @@ #include #include -static fo_rdwr_t fifo_read_f; -static fo_rdwr_t fifo_write_f; -static fo_ioctl_t fifo_ioctl_f; -static fo_poll_t fifo_poll_f; -static fo_kqfilter_t fifo_kqfilter_f; -static fo_stat_t fifo_stat_f; -static fo_close_t fifo_close_f; -static fo_truncate_t fifo_truncate_f; - -struct fileops fifo_ops_f = { - .fo_read = fifo_read_f, - .fo_write = fifo_write_f, - .fo_truncate = fifo_truncate_f, - .fo_ioctl = fifo_ioctl_f, - .fo_poll = fifo_poll_f, - .fo_kqfilter = fifo_kqfilter_f, - .fo_stat = fifo_stat_f, - .fo_close = fifo_close_f, - .fo_chmod = vn_chmod, - .fo_chown = vn_chown, - .fo_flags = DFLAG_PASSABLE -}; - /* * This structure is associated with the FIFO vnode and stores * the state associated with the FIFO. * Notes about locking: - * - fi_readsock and fi_writesock are invariant since init time. + * - fi_pipeinfo is invariant since init time. * - fi_readers and fi_writers are vnode lock protected. * - fi_wgen is fif_mtx lock protected. */ struct fifoinfo { - struct socket *fi_readsock; - struct socket *fi_writesock; - long fi_readers; - long fi_writers; + struct pipeinfo *fi_pipeinfo; + long fi_readers; + long fi_writers; int fi_wgen; }; + static vop_print_t fifo_print; static vop_open_t fifo_open; static vop_close_t fifo_close; static vop_pathconf_t fifo_pathconf; static vop_advlock_t fifo_advlock; -static void filt_fifordetach(struct knote *kn); -static int filt_fiforead(struct knote *kn, long hint); -static void filt_fifowdetach(struct knote *kn); -static int filt_fifowrite(struct knote *kn, long hint); -static void filt_fifodetach_notsup(struct knote *kn); -static int filt_fifo_notsup(struct knote *kn, long hint); - -static struct filterops fiforead_filtops = { - .f_isfd = 1, - .f_detach = filt_fifordetach, - .f_event = filt_fiforead, -}; -static struct filterops fifowrite_filtops = { - .f_isfd = 1, - .f_detach = filt_fifowdetach, - .f_event = filt_fifowrite, -}; -static struct filterops fifo_notsup_filtops = { - .f_isfd = 1, - .f_detach = filt_fifodetach_notsup, - .f_event = filt_fifo_notsup, -}; - struct vop_vector fifo_specops = { .vop_default = &default_vnodeops, @@ -164,8 +118,7 @@ fifo_cleanup(struct vnode *vp) ASSERT_VOP_ELOCKED(vp, "fifo_cleanup"); if (fip->fi_readers == 0 && fip->fi_writers == 0) { vp->v_fifoinfo = NULL; - (void)soclose(fip->fi_readsock); - (void)soclose(fip->fi_writesock); + pipe_destroy(fip->fi_pipeinfo); free(fip, M_VNODE); } } @@ -186,48 +139,35 @@ fifo_open(ap) } */ *ap; { struct vnode *vp = ap->a_vp; - struct fifoinfo *fip; - struct thread *td = ap->a_td; - struct ucred *cred = ap->a_cred; struct file *fp = ap->a_fp; - struct socket *rso, *wso; + struct thread *td = ap->a_td; + struct pipeinfo *pip; + struct fifoinfo *fip; + struct pipe *rpipe, *wpipe; int error; ASSERT_VOP_ELOCKED(vp, "fifo_open"); if (fp == NULL) return (EINVAL); if ((fip = vp->v_fifoinfo) == NULL) { - fip = malloc(sizeof(*fip), M_VNODE, M_WAITOK); - error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, cred, td); + error = pipe_ctor(&pip, td); if (error) - goto fail1; - fip->fi_readsock = rso; - error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, cred, td); - if (error) - goto fail2; - fip->fi_writesock = wso; - error = soconnect2(wso, rso); - /* Close the direction we do not use, so we can get POLLHUP. */ - if (error == 0) - error = soshutdown(rso, SHUT_WR); - if (error) { - (void)soclose(wso); -fail2: - (void)soclose(rso); -fail1: - free(fip, M_VNODE); return (error); - } + fip = malloc(sizeof(*fip), M_VNODE, M_WAITOK); + fip->fi_pipeinfo = pip; fip->fi_wgen = fip->fi_readers = fip->fi_writers = 0; - wso->so_snd.sb_lowat = PIPE_BUF; - SOCKBUF_LOCK(&rso->so_rcv); - rso->so_rcv.sb_state |= SBS_CANTRCVMORE; - SOCKBUF_UNLOCK(&rso->so_rcv); - KASSERT(vp->v_fifoinfo == NULL, - ("fifo_open: v_fifoinfo race")); + + KASSERT(vp->v_fifoinfo == NULL, ("fifo_open: v_fifoinfo race")); + vp->v_fifoinfo = fip; } + pip = fip->fi_pipeinfo; + + KASSERT(pip != NULL, ("fifo_open: pipeinfo is NULL")); + rpipe = pip->pi_rpipe; + wpipe = pip->pi_wpipe; + /* * Use the fifo_mtx lock here, in addition to the vnode lock, * in order to allow vnode lock dropping before msleep() calls @@ -237,13 +177,11 @@ fifo_open(ap) if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { - SOCKBUF_LOCK(&fip->fi_writesock->so_snd); - fip->fi_writesock->so_snd.sb_state &= ~SBS_CANTSENDMORE; - SOCKBUF_UNLOCK(&fip->fi_writesock->so_snd); - if (fip->fi_writers > 0) { + PIPE_LOCK(rpipe); + wpipe->pipe_state &= ~PIPE_EOF; + PIPE_UNLOCK(rpipe); + if (fip->fi_writers > 0) wakeup(&fip->fi_writers); - sowwakeup(fip->fi_writesock); - } } fp->f_seqcount = fip->fi_wgen - fip->fi_writers; } @@ -256,13 +194,11 @@ fifo_open(ap) } fip->fi_writers++; if (fip->fi_writers == 1) { - SOCKBUF_LOCK(&fip->fi_readsock->so_rcv); - fip->fi_readsock->so_rcv.sb_state &= ~SBS_CANTRCVMORE; - SOCKBUF_UNLOCK(&fip->fi_readsock->so_rcv); - if (fip->fi_readers > 0) { + PIPE_LOCK(wpipe); + rpipe->pipe_state &= ~PIPE_EOF; + PIPE_UNLOCK(wpipe); + if (fip->fi_readers > 0) wakeup(&fip->fi_readers); - sorwakeup(fip->fi_readsock); - } } } if ((ap->a_mode & O_NONBLOCK) == 0) { @@ -274,7 +210,7 @@ fifo_open(ap) if (error) { fip->fi_readers--; if (fip->fi_readers == 0) { - socantsendmore(fip->fi_writesock); + wpipe->pipe_state |= PIPE_EOF; fifo_cleanup(vp); } return (error); @@ -294,7 +230,7 @@ fifo_open(ap) if (error) { fip->fi_writers--; if (fip->fi_writers == 0) { - socantrcvmore(fip->fi_readsock); + rpipe->pipe_state |= PIPE_EOF; mtx_lock(&fifo_mtx); fip->fi_wgen++; mtx_unlock(&fifo_mtx); @@ -313,79 +249,10 @@ fifo_open(ap) mtx_unlock(&fifo_mtx); KASSERT(fp != NULL, ("can't fifo/vnode bypass")); KASSERT(fp->f_ops == &badfileops, ("not badfileops in fifo_open")); - finit(fp, fp->f_flag, DTYPE_FIFO, fip, &fifo_ops_f); + finit(fp, fp->f_flag, DTYPE_FIFO, pip, &pipeops); return (0); } -static void -filt_fifordetach(struct knote *kn) -{ - struct socket *so = (struct socket *)kn->kn_hook; - - SOCKBUF_LOCK(&so->so_rcv); - knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1); - if (knlist_empty(&so->so_rcv.sb_sel.si_note)) - so->so_rcv.sb_flags &= ~SB_KNOTE; - SOCKBUF_UNLOCK(&so->so_rcv); -} - -static int -filt_fiforead(struct knote *kn, long hint) -{ - struct socket *so = (struct socket *)kn->kn_hook; - - SOCKBUF_LOCK_ASSERT(&so->so_rcv); - kn->kn_data = so->so_rcv.sb_cc; - if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { - kn->kn_flags |= EV_EOF; - return (1); - } else { - kn->kn_flags &= ~EV_EOF; - return (kn->kn_data > 0); - } -} - -static void -filt_fifowdetach(struct knote *kn) -{ - struct socket *so = (struct socket *)kn->kn_hook; - - SOCKBUF_LOCK(&so->so_snd); - knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1); - if (knlist_empty(&so->so_snd.sb_sel.si_note)) - so->so_snd.sb_flags &= ~SB_KNOTE; - SOCKBUF_UNLOCK(&so->so_snd); -} - -static int -filt_fifowrite(struct knote *kn, long hint) -{ - struct socket *so = (struct socket *)kn->kn_hook; - - SOCKBUF_LOCK_ASSERT(&so->so_snd); - kn->kn_data = sbspace(&so->so_snd); - if (so->so_snd.sb_state & SBS_CANTSENDMORE) { - kn->kn_flags |= EV_EOF; - return (1); - } else { - kn->kn_flags &= ~EV_EOF; - return (kn->kn_data >= so->so_snd.sb_lowat); - } -} - -static void -filt_fifodetach_notsup(struct knote *kn) -{ - -} - -static int -filt_fifo_notsup(struct knote *kn, long hint) -{ - - return (0); -} - /* * Device close routine */ @@ -401,21 +268,23 @@ fifo_close(ap) { struct vnode *vp = ap->a_vp; struct fifoinfo *fip = vp->v_fifoinfo; + struct pipeinfo *pip = fip->fi_pipeinfo; ASSERT_VOP_ELOCKED(vp, "fifo_close"); - if (fip == NULL) { - printf("fifo_close: no v_fifoinfo %p\n", vp); - return (0); - } if (ap->a_fflag & FREAD) { fip->fi_readers--; - if (fip->fi_readers == 0) - socantsendmore(fip->fi_writesock); + if (fip->fi_readers == 0) { + PIPE_LOCK(pip->pi_wpipe); + pip->pi_wpipe->pipe_state |= PIPE_EOF; + PIPE_UNLOCK(pip->pi_wpipe); + } } if (ap->a_fflag & FWRITE) { fip->fi_writers--; if (fip->fi_writers == 0) { - socantrcvmore(fip->fi_readsock); + PIPE_LOCK(pip->pi_rpipe); + pip->pi_rpipe->pipe_state |= PIPE_EOF; + PIPE_UNLOCK(pip->pi_rpipe); mtx_lock(&fifo_mtx); fip->fi_wgen++; mtx_unlock(&fifo_mtx); @@ -504,212 +373,20 @@ fifo_advlock(ap) return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } -static int -fifo_close_f(struct file *fp, struct thread *td) +int +fifo_iseof(struct file *fp) { - - return (vnops.fo_close(fp, td)); -} - -/* - * The implementation of ioctl() for named fifos is complicated by the fact - * that we permit O_RDWR fifo file descriptors, meaning that the actions of - * ioctls may have to be applied to both the underlying sockets rather than - * just one. The original implementation simply forward the ioctl to one - * or both sockets based on fp->f_flag. We now consider each ioctl - * separately, as the composition effect requires careful ordering. - * - * We do not blindly pass all ioctls through to the socket in order to avoid - * providing unnecessary ioctls that might be improperly depended on by - * applications (such as socket-specific, routing, and interface ioctls). - * - * Unlike sys_pipe.c, fifos do not implement the deprecated TIOCSPGRP and - * TIOCGPGRP ioctls. Earlier implementations of fifos did forward SIOCSPGRP - * and SIOCGPGRP ioctls, so we might need to re-add those here. - */ -static int -fifo_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, - struct thread *td) -{ - struct fifoinfo *fi; - struct file filetmp; /* Local, so need not be locked. */ - int error; - - error = ENOTTY; - fi = fp->f_data; - - switch (com) { - case FIONBIO: - /* - * Non-blocking I/O is implemented at the fifo layer using - * MSG_NBIO, so does not need to be forwarded down the stack. - */ - return (0); - - case FIOASYNC: - case FIOSETOWN: - case FIOGETOWN: - /* - * These socket ioctls don't have any ordering requirements, - * so are called in an arbitrary order, and only on the - * sockets indicated by the file descriptor rights. - * - * XXXRW: If O_RDWR and the read socket accepts an ioctl but - * the write socket doesn't, the socketpair is left in an - * inconsistent state. - */ - if (fp->f_flag & FREAD) { - filetmp.f_data = fi->fi_readsock; - filetmp.f_cred = cred; - error = soo_ioctl(&filetmp, com, data, cred, td); - if (error) - return (error); - } - if (fp->f_flag & FWRITE) { - filetmp.f_data = fi->fi_writesock; - filetmp.f_cred = cred; - error = soo_ioctl(&filetmp, com, data, cred, td); - } - return (error); - - case FIONREAD: - /* - * FIONREAD will return 0 for non-readable descriptors, and - * the results of FIONREAD on the read socket for readable - * descriptors. - */ - if (!(fp->f_flag & FREAD)) { - *(int *)data = 0; - return (0); - } - filetmp.f_data = fi->fi_readsock; - filetmp.f_cred = cred; - return (soo_ioctl(&filetmp, com, data, cred, td)); - - default: - return (ENOTTY); - } -} - -/* - * Because fifos are now a file descriptor layer object, EVFILT_VNODE is not - * implemented. Likely, fifo_kqfilter() should be removed, and - * fifo_kqfilter_f() should know how to forward the request to the underling - * vnode using f_vnode in the file descriptor here. - */ -static int -fifo_kqfilter_f(struct file *fp, struct knote *kn) -{ - struct fifoinfo *fi; - struct socket *so; - struct sockbuf *sb; - - fi = fp->f_data; - - /* - * If a filter is requested that is not supported by this file - * descriptor, don't return an error, but also don't ever generate an - * event. - */ - if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) { - kn->kn_fop = &fifo_notsup_filtops; - return (0); - } - - if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) { - kn->kn_fop = &fifo_notsup_filtops; - return (0); - } - - switch (kn->kn_filter) { - case EVFILT_READ: - kn->kn_fop = &fiforead_filtops; - so = fi->fi_readsock; - sb = &so->so_rcv; - break; - case EVFILT_WRITE: - kn->kn_fop = &fifowrite_filtops; - so = fi->fi_writesock; - sb = &so->so_snd; - break; - default: - return (EINVAL); - } - - kn->kn_hook = (caddr_t)so; - - SOCKBUF_LOCK(sb); - knlist_add(&sb->sb_sel.si_note, kn, 1); - sb->sb_flags |= SB_KNOTE; - SOCKBUF_UNLOCK(sb); - - return (0); -} - -static int -fifo_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) -{ struct fifoinfo *fip; - struct file filetmp; - int levents, revents = 0; + int iseof; - fip = fp->f_data; - levents = events & - (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND); - if ((fp->f_flag & FREAD) && levents) { - filetmp.f_data = fip->fi_readsock; - filetmp.f_cred = cred; - mtx_lock(&fifo_mtx); - if (fp->f_seqcount == fip->fi_wgen) - levents |= POLLINIGNEOF; - mtx_unlock(&fifo_mtx); - revents |= soo_poll(&filetmp, levents, cred, td); - } - levents = events & (POLLOUT | POLLWRNORM | POLLWRBAND); - if ((fp->f_flag & FWRITE) && levents) { - filetmp.f_data = fip->fi_writesock; - filetmp.f_cred = cred; - revents |= soo_poll(&filetmp, levents, cred, td); - } - return (revents); -} - -static int -fifo_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) -{ - struct fifoinfo *fip; - int sflags; - - fip = fp->f_data; - KASSERT(uio->uio_rw == UIO_READ,("fifo_read mode")); - if (uio->uio_resid == 0) + if (fp->f_vnode == NULL || fp->f_vnode->v_fifoinfo == NULL) return (0); - sflags = (fp->f_flag & FNONBLOCK) ? MSG_NBIO : 0; - return (soreceive(fip->fi_readsock, NULL, uio, NULL, NULL, &sflags)); -} -static int -fifo_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) -{ + fip = fp->f_vnode->v_fifoinfo; + mtx_lock(&fifo_mtx); + iseof = (fp->f_seqcount == fip->fi_wgen); + mtx_unlock(&fifo_mtx); - return (vnops.fo_stat(fp, sb, cred, td)); + return (iseof); } -static int -fifo_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td) -{ - - return (vnops.fo_truncate(fp, length, cred, td)); -} - -static int -fifo_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) -{ - struct fifoinfo *fip; - int sflags; - - fip = fp->f_data; - KASSERT(uio->uio_rw == UIO_WRITE,("fifo_write mode")); - sflags = (fp->f_flag & FNONBLOCK) ? MSG_NBIO : 0; - return (sosend(fip->fi_writesock, NULL, uio, 0, NULL, sflags, td)); -} Index: sys/sys/pipe.h =================================================================== --- sys/sys/pipe.h (revision 228609) +++ sys/sys/pipe.h (working copy) @@ -28,6 +28,8 @@ #error "no user-servicable parts inside" #endif +#include + /* * Pipe buffer size, keep moderate in value, pipes take kva space. */ @@ -103,16 +105,12 @@ struct pipe { struct pipebuf pipe_buffer; /* data storage */ struct pipemapping pipe_map; /* pipe mapping for direct I/O */ struct selinfo pipe_sel; /* for compat with select */ - struct timespec pipe_atime; /* time of last access */ - struct timespec pipe_mtime; /* time of last modify */ - struct timespec pipe_ctime; /* time of status change */ struct sigio *pipe_sigio; /* information for async I/O */ struct pipe *pipe_peer; /* link with other direction */ struct pipepair *pipe_pair; /* container structure pointer */ u_int pipe_state; /* pipe status info */ int pipe_busy; /* busy flag, mostly to handle rundown sanely */ int pipe_present; /* still present? */ - ino_t pipe_ino; /* fake inode for stat(2) */ }; /* @@ -138,5 +136,24 @@ struct pipepair { #define PIPE_UNLOCK(pipe) mtx_unlock(PIPE_MTX(pipe)) #define PIPE_LOCK_ASSERT(pipe, type) mtx_assert(PIPE_MTX(pipe), (type)) +#define PIPE_CNT(pipe) ((pipe->pipe_state & PIPE_DIRECTW) ? \ + pipe->pipe_map.cnt : pipe->pipe_buffer.cnt) +/* + * Per-file descriptor structure. + */ +struct pipeinfo { + struct pipe *pi_rpipe; /* pipe we read from */ + struct pipe *pi_wpipe; /* pipe we write to */ + struct timespec pi_atime; /* time of last access */ + struct timespec pi_mtime; /* time of last modify */ + struct timespec pi_ctime; /* time of status change */ + ino_t pi_ino; /* fake pipe inode for stat(2) */ +}; + +extern struct fileops pipeops; + +int pipe_ctor(struct pipeinfo **ppip, struct thread *td); +void pipe_destroy(struct pipeinfo *pip); + #endif /* !_SYS_PIPE_H_ */