/*	$NetBSD: genfs_vnops.c,v 1.219.4.1 2023/03/05 14:34:59 martin Exp $	*/

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.219.4.1 2023/03/05 14:34:59 martin Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/fstrans.h>
#include <sys/namei.h>
#include <sys/vnode_impl.h>
#include <sys/fcntl.h>
#include <sys/kmem.h>
#include <sys/poll.h>
#include <sys/mman.h>
#include <sys/file.h>
#include <sys/kauth.h>
#include <sys/stat.h>
#include <sys/extattr.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/genfs_node.h>
#include <miscfs/specfs/specdev.h>

static void filt_genfsdetach(struct knote *);
static int filt_genfsread(struct knote *, long);
static int filt_genfsvnode(struct knote *, long);

/*
 * Find the end of the first path component in NAME and return its
 * length.
 */
int
genfs_parsepath(void *v)
{
	struct vop_parsepath_args /* {
		struct vnode *a_dvp;
		const char *a_name;
		size_t *a_ret;
	} */ *ap = v;
	const char *name = ap->a_name;
	size_t pos;

	(void)ap->a_dvp;

	pos = 0;
	while (name[pos] != '\0' && name[pos] != '/') {
		pos++;
	}
	*ap->a_retval = pos;
	return 0;
}

int
genfs_poll(void *v)
{
	struct vop_poll_args /* {
		struct vnode *a_vp;
		int a_events;
		struct lwp *a_l;
	} */ *ap = v;

	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
}

int
genfs_seek(void *v)
{
	struct vop_seek_args /* {
		struct vnode *a_vp;
		off_t a_oldoff;
		off_t a_newoff;
		kauth_cred_t cred;
	} */ *ap = v;

	if (ap->a_newoff < 0)
		return (EINVAL);

	return (0);
}

int
genfs_abortop(void *v)
{
	struct vop_abortop_args /* {
		struct vnode *a_dvp;
		struct componentname *a_cnp;
	} */ *ap = v;

	(void)ap;

	return (0);
}

int
genfs_fcntl(void *v)
{
	struct vop_fcntl_args /* {
		struct vnode *a_vp;
		u_int a_command;
		void *a_data;
		int a_fflag;
		kauth_cred_t a_cred;
		struct lwp *a_l;
	} */ *ap = v;

	if (ap->a_command == F_SETFL)
		return (0);
	else
		return (EOPNOTSUPP);
}

/*ARGSUSED*/
int
genfs_badop(void *v)
{

	panic("genfs: bad op");
}

/*ARGSUSED*/
int
genfs_nullop(void *v)
{

	return (0);
}

/*ARGSUSED*/
int
genfs_einval(void *v)
{

	return (EINVAL);
}

int
genfs_erofs_link(void *v)
{
	/* also for symlink */
	struct vop_link_v2_args /* {
		struct vnode *a_dvp;
		struct vnode **a_vpp;
		struct componentname *a_cnp;
	} */ *ap = v;

	VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
	return EROFS;
}

/*
 * Called when an fs doesn't support a particular vop.
 * This takes care to vrele, vput, or vunlock passed in vnodes
 * and calls VOP_ABORTOP for a componentname (in non-rename VOP).
 */
int
genfs_eopnotsupp(void *v)
{
	struct vop_generic_args /*
		struct vnodeop_desc *a_desc;
		/ * other random data follows, presumably * /
	} */ *ap = v;
	struct vnodeop_desc *desc = ap->a_desc;
	struct vnode *vp, *vp_last = NULL;
	int flags, i, j, offset_cnp, offset_vp;

	KASSERT(desc->vdesc_offset != VOP_LOOKUP_DESCOFFSET);
	KASSERT(desc->vdesc_offset != VOP_ABORTOP_DESCOFFSET);

	/*
	 * Abort any componentname that lookup potentially left state in.
	 *
	 * As is logical, componentnames for VOP_RENAME are handled by
	 * the caller of VOP_RENAME.  Yay, rename!
	 */
	if (desc->vdesc_offset != VOP_RENAME_DESCOFFSET &&
	    (offset_vp = desc->vdesc_vp_offsets[0]) != VDESC_NO_OFFSET &&
	    (offset_cnp = desc->vdesc_componentname_offset) != VDESC_NO_OFFSET){
		struct componentname *cnp;
		struct vnode *dvp;

		dvp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
		cnp = *VOPARG_OFFSETTO(struct componentname **, offset_cnp, ap);

		VOP_ABORTOP(dvp, cnp);
	}

	flags = desc->vdesc_flags;
	for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
		if ((offset_vp = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
			break;	/* stop at end of list */
		if ((j = flags & VDESC_VP0_WILLPUT)) {
			vp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);

			/* Skip if NULL */
			if (!vp)
				continue;

			switch (j) {
			case VDESC_VP0_WILLPUT:
				/* Check for dvp == vp cases */
				if (vp == vp_last)
					vrele(vp);
				else {
					vput(vp);
					vp_last = vp;
				}
				break;
			case VDESC_VP0_WILLRELE:
				vrele(vp);
				break;
			}
		}
	}

	return (EOPNOTSUPP);
}

/*ARGSUSED*/
int
genfs_ebadf(void *v)
{

	return (EBADF);
}

/* ARGSUSED */
int
genfs_enoioctl(void *v)
{

	return (EPASSTHROUGH);
}


/*
 * Eliminate all activity associated with the requested vnode
 * and with all vnodes aliased to the requested vnode.
 */
int
genfs_revoke(void *v)
{
	struct vop_revoke_args /* {
		struct vnode *a_vp;
		int a_flags;
	} */ *ap = v;

#ifdef DIAGNOSTIC
	if ((ap->a_flags & REVOKEALL) == 0)
		panic("genfs_revoke: not revokeall");
#endif
	vrevoke(ap->a_vp);
	return (0);
}

/*
 * Lock the node (for deadfs).
 */
int
genfs_deadlock(void *v)
{
	struct vop_lock_args /* {
		struct vnode *a_vp;
		int a_flags;
	} */ *ap = v;
	vnode_t *vp = ap->a_vp;
	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
	int flags = ap->a_flags;
	krw_t op;

	if (! ISSET(flags, LK_RETRY))
		return ENOENT;

	if (ISSET(flags, LK_DOWNGRADE)) {
		rw_downgrade(&vip->vi_lock);
	} else if (ISSET(flags, LK_UPGRADE)) {
		KASSERT(ISSET(flags, LK_NOWAIT));
		if (!rw_tryupgrade(&vip->vi_lock)) {
			return EBUSY;
		}
	} else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
		op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
		if (ISSET(flags, LK_NOWAIT)) {
			if (!rw_tryenter(&vip->vi_lock, op))
				return EBUSY;
		} else {
			rw_enter(&vip->vi_lock, op);
		}
	}
	VSTATE_ASSERT_UNLOCKED(vp, VS_RECLAIMED);
	return 0;
}

/*
 * Unlock the node (for deadfs).
 */
int
genfs_deadunlock(void *v)
{
	struct vop_unlock_args /* {
		struct vnode *a_vp;
	} */ *ap = v;
	vnode_t *vp = ap->a_vp;
	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

	rw_exit(&vip->vi_lock);

	return 0;
}

/*
 * Lock the node.
 */
int
genfs_lock(void *v)
{
	struct vop_lock_args /* {
		struct vnode *a_vp;
		int a_flags;
	} */ *ap = v;
	vnode_t *vp = ap->a_vp;
	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
	int flags = ap->a_flags;
	krw_t op;

	if (ISSET(flags, LK_DOWNGRADE)) {
		rw_downgrade(&vip->vi_lock);
	} else if (ISSET(flags, LK_UPGRADE)) {
		KASSERT(ISSET(flags, LK_NOWAIT));
		if (!rw_tryupgrade(&vip->vi_lock)) {
			return EBUSY;
		}
	} else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
		op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
		if (ISSET(flags, LK_NOWAIT)) {
			if (!rw_tryenter(&vip->vi_lock, op))
				return EBUSY;
		} else {
			rw_enter(&vip->vi_lock, op);
		}
	}
	VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);
	return 0;
}

/*
 * Unlock the node.
 */
int
genfs_unlock(void *v)
{
	struct vop_unlock_args /* {
		struct vnode *a_vp;
	} */ *ap = v;
	vnode_t *vp = ap->a_vp;
	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

	rw_exit(&vip->vi_lock);

	return 0;
}

/*
 * Return whether or not the node is locked.
 */
int
genfs_islocked(void *v)
{
	struct vop_islocked_args /* {
		struct vnode *a_vp;
	} */ *ap = v;
	vnode_t *vp = ap->a_vp;
	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

	if (rw_write_held(&vip->vi_lock))
		return LK_EXCLUSIVE;

	if (rw_read_held(&vip->vi_lock))
		return LK_SHARED;

	return 0;
}

int
genfs_mmap(void *v)
{

	return (0);
}

/*
 * VOP_PUTPAGES() for vnodes which never have pages.
 */

int
genfs_null_putpages(void *v)
{
	struct vop_putpages_args /* {
		struct vnode *a_vp;
		voff_t a_offlo;
		voff_t a_offhi;
		int a_flags;
	} */ *ap = v;
	struct vnode *vp = ap->a_vp;

	KASSERT(vp->v_uobj.uo_npages == 0);
	rw_exit(vp->v_uobj.vmobjlock);
	return (0);
}

void
genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
{
	struct genfs_node *gp = VTOG(vp);

	rw_init(&gp->g_glock);
	gp->g_op = ops;
}

void
genfs_node_destroy(struct vnode *vp)
{
	struct genfs_node *gp = VTOG(vp);

	rw_destroy(&gp->g_glock);
}

void
genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
{
	int bsize;

	bsize = 1 << vp->v_mount->mnt_fs_bshift;
	*eobp = (size + bsize - 1) & ~(bsize - 1);
}

static void
filt_genfsdetach(struct knote *kn)
{
	struct vnode *vp = (struct vnode *)kn->kn_hook;

	vn_knote_detach(vp, kn);
}

static int
filt_genfsread(struct knote *kn, long hint)
{
	struct vnode *vp = (struct vnode *)kn->kn_hook;
	int rv;

	/*
	 * filesystem is gone, so set the EOF flag and schedule
	 * the knote for deletion.
	 */
	switch (hint) {
	case NOTE_REVOKE:
		KASSERT(mutex_owned(vp->v_interlock));
		knote_set_eof(kn, EV_ONESHOT);
		return (1);
	case 0:
		mutex_enter(vp->v_interlock);
		kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
		rv = (kn->kn_data != 0);
		mutex_exit(vp->v_interlock);
		return rv;
	default:
		KASSERT(mutex_owned(vp->v_interlock));
		kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
		return (kn->kn_data != 0);
	}
}

static int
filt_genfswrite(struct knote *kn, long hint)
{
	struct vnode *vp = (struct vnode *)kn->kn_hook;

	/*
	 * filesystem is gone, so set the EOF flag and schedule
	 * the knote for deletion.
	 */
	switch (hint) {
	case NOTE_REVOKE:
		KASSERT(mutex_owned(vp->v_interlock));
		knote_set_eof(kn, EV_ONESHOT);
		return (1);
	case 0:
		mutex_enter(vp->v_interlock);
		kn->kn_data = 0;
		mutex_exit(vp->v_interlock);
		return 1;
	default:
		KASSERT(mutex_owned(vp->v_interlock));
		kn->kn_data = 0;
		return 1;
	}
}

static int
filt_genfsvnode(struct knote *kn, long hint)
{
	struct vnode *vp = (struct vnode *)kn->kn_hook;
	int fflags;

	switch (hint) {
	case NOTE_REVOKE:
		KASSERT(mutex_owned(vp->v_interlock));
		knote_set_eof(kn, 0);
		if ((kn->kn_sfflags & hint) != 0)
			kn->kn_fflags |= hint;
		return (1);
	case 0:
		mutex_enter(vp->v_interlock);
		fflags = kn->kn_fflags;
		mutex_exit(vp->v_interlock);
		break;
	default:
		KASSERT(mutex_owned(vp->v_interlock));
		if ((kn->kn_sfflags & hint) != 0)
			kn->kn_fflags |= hint;
		fflags = kn->kn_fflags;
		break;
	}

	return (fflags != 0);
}

static const struct filterops genfsread_filtops = {
	.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
	.f_attach = NULL,
	.f_detach = filt_genfsdetach,
	.f_event = filt_genfsread,
};

static const struct filterops genfswrite_filtops = {
	.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
	.f_attach = NULL,
	.f_detach = filt_genfsdetach,
	.f_event = filt_genfswrite,
};

static const struct filterops genfsvnode_filtops = {
	.f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
	.f_attach = NULL,
	.f_detach = filt_genfsdetach,
	.f_event = filt_genfsvnode,
};

int
genfs_kqfilter(void *v)
{
	struct vop_kqfilter_args /* {
		struct vnode	*a_vp;
		struct knote	*a_kn;
	} */ *ap = v;
	struct vnode *vp;
	struct knote *kn;

	vp = ap->a_vp;
	kn = ap->a_kn;
	switch (kn->kn_filter) {
	case EVFILT_READ:
		kn->kn_fop = &genfsread_filtops;
		break;
	case EVFILT_WRITE:
		kn->kn_fop = &genfswrite_filtops;
		break;
	case EVFILT_VNODE:
		kn->kn_fop = &genfsvnode_filtops;
		break;
	default:
		return (EINVAL);
	}

	kn->kn_hook = vp;

	vn_knote_attach(vp, kn);

	return (0);
}

void
genfs_node_wrlock(struct vnode *vp)
{
	struct genfs_node *gp = VTOG(vp);

	rw_enter(&gp->g_glock, RW_WRITER);
}

void
genfs_node_rdlock(struct vnode *vp)
{
	struct genfs_node *gp = VTOG(vp);

	rw_enter(&gp->g_glock, RW_READER);
}

int
genfs_node_rdtrylock(struct vnode *vp)
{
	struct genfs_node *gp = VTOG(vp);

	return rw_tryenter(&gp->g_glock, RW_READER);
}

void
genfs_node_unlock(struct vnode *vp)
{
	struct genfs_node *gp = VTOG(vp);

	rw_exit(&gp->g_glock);
}

int
genfs_node_wrlocked(struct vnode *vp)
{
	struct genfs_node *gp = VTOG(vp);

	return rw_write_held(&gp->g_glock);
}

/*
 * Common filesystem object access control check routine.  Accepts a
 * vnode, cred, uid, gid, mode, acl, requested access mode.
 * Returns 0 on success, or an errno on failure.
 */
int
genfs_can_access(vnode_t *vp, kauth_cred_t cred, uid_t file_uid, gid_t file_gid,
    mode_t file_mode, struct acl *acl, accmode_t accmode)
{
	accmode_t dac_granted;
	int error;

	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

	/*
	 * Look for a normal, non-privileged way to access the file/directory
	 * as requested.  If it exists, go with that.
	 */

	dac_granted = 0;

	/* Check the owner. */
	if (kauth_cred_geteuid(cred) == file_uid) {
		dac_granted |= VADMIN;
		if (file_mode & S_IXUSR)
			dac_granted |= VEXEC;
		if (file_mode & S_IRUSR)
			dac_granted |= VREAD;
		if (file_mode & S_IWUSR)
			dac_granted |= (VWRITE | VAPPEND);

		goto privchk;
	}

	/* Otherwise, check the groups (first match) */
	/* Otherwise, check the groups. */
	error = kauth_cred_groupmember(cred, file_gid);
	if (error > 0)
		return error;
	if (error == 0) {
		if (file_mode & S_IXGRP)
			dac_granted |= VEXEC;
		if (file_mode & S_IRGRP)
			dac_granted |= VREAD;
		if (file_mode & S_IWGRP)
			dac_granted |= (VWRITE | VAPPEND);

		goto privchk;
	}

	/* Otherwise, check everyone else. */
	if (file_mode & S_IXOTH)
		dac_granted |= VEXEC;
	if (file_mode & S_IROTH)
		dac_granted |= VREAD;
	if (file_mode & S_IWOTH)
		dac_granted |= (VWRITE | VAPPEND);

privchk:
	if ((accmode & dac_granted) == accmode)
		return 0;

	return (accmode & VADMIN) ? EPERM : EACCES;
}

/*
 * Implement a version of genfs_can_access() that understands POSIX.1e ACL
 * semantics;
 * the access ACL has already been prepared for evaluation by the file system
 * and is passed via 'uid', 'gid', and 'acl'.  Return 0 on success, else an
 * errno value.
 */
int
genfs_can_access_acl_posix1e(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
    gid_t file_gid, mode_t file_mode, struct acl *acl, accmode_t accmode)
{
	struct acl_entry *acl_other, *acl_mask;
	accmode_t dac_granted;
	accmode_t acl_mask_granted;
	int group_matched, i;
	int error;

	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

	/*
	 * The owner matches if the effective uid associated with the
	 * credential matches that of the ACL_USER_OBJ entry.  While we're
	 * doing the first scan, also cache the location of the ACL_MASK and
	 * ACL_OTHER entries, preventing some future iterations.
	 */
	acl_mask = acl_other = NULL;
	for (i = 0; i < acl->acl_cnt; i++) {
		struct acl_entry *ae = &acl->acl_entry[i];
		switch (ae->ae_tag) {
		case ACL_USER_OBJ:
			if (kauth_cred_geteuid(cred) != file_uid)
				break;
			dac_granted = 0;
			dac_granted |= VADMIN;
			if (ae->ae_perm & ACL_EXECUTE)
				dac_granted |= VEXEC;
			if (ae->ae_perm & ACL_READ)
				dac_granted |= VREAD;
			if (ae->ae_perm & ACL_WRITE)
				dac_granted |= (VWRITE | VAPPEND);
			goto out;

		case ACL_MASK:
			acl_mask = ae;
			break;

		case ACL_OTHER:
			acl_other = ae;
			break;

		default:
			break;
		}
	}

	/*
	 * An ACL_OTHER entry should always exist in a valid access ACL.  If
	 * it doesn't, then generate a serious failure.	 For now, this means
	 * a debugging message and EPERM, but in the future should probably
	 * be a panic.
	 */
	if (acl_other == NULL) {
		/*
		 * XXX This should never happen
		 */
		printf("%s: ACL_OTHER missing\n", __func__);
		return EPERM;
	}

	/*
	 * Checks against ACL_USER, ACL_GROUP_OBJ, and ACL_GROUP fields are
	 * masked by an ACL_MASK entry, if any.	 As such, first identify the
	 * ACL_MASK field, then iterate through identifying potential user
	 * matches, then group matches.	 If there is no ACL_MASK, assume that
	 * the mask allows all requests to succeed.
	 */
	if (acl_mask != NULL) {
		acl_mask_granted = 0;
		if (acl_mask->ae_perm & ACL_EXECUTE)
			acl_mask_granted |= VEXEC;
		if (acl_mask->ae_perm & ACL_READ)
			acl_mask_granted |= VREAD;
		if (acl_mask->ae_perm & ACL_WRITE)
			acl_mask_granted |= (VWRITE | VAPPEND);
	} else
		acl_mask_granted = VEXEC | VREAD | VWRITE | VAPPEND;

	/*
	 * Check ACL_USER ACL entries.	There will either be one or no
	 * matches; if there is one, we accept or rejected based on the
	 * match; otherwise, we continue on to groups.
	 */
	for (i = 0; i < acl->acl_cnt; i++) {
		struct acl_entry *ae = &acl->acl_entry[i];
		switch (ae->ae_tag) {
		case ACL_USER:
			if (kauth_cred_geteuid(cred) != ae->ae_id)
				break;
			dac_granted = 0;
			if (ae->ae_perm & ACL_EXECUTE)
				dac_granted |= VEXEC;
			if (ae->ae_perm & ACL_READ)
				dac_granted |= VREAD;
			if (ae->ae_perm & ACL_WRITE)
				dac_granted |= (VWRITE | VAPPEND);
			dac_granted &= acl_mask_granted;
			goto out;
		}
	}

	/*
	 * Group match is best-match, not first-match, so find a "best"
	 * match.  Iterate across, testing each potential group match.	Make
	 * sure we keep track of whether we found a match or not, so that we
	 * know if we should try again with any available privilege, or if we
	 * should move on to ACL_OTHER.
	 */
	group_matched = 0;
	for (i = 0; i < acl->acl_cnt; i++) {
		struct acl_entry *ae = &acl->acl_entry[i];
		switch (ae->ae_tag) {
		case ACL_GROUP_OBJ:
			error = kauth_cred_groupmember(cred, file_gid);
			if (error > 0)
				return error;
			if (error)
				break;
			dac_granted = 0;
			if (ae->ae_perm & ACL_EXECUTE)
				dac_granted |= VEXEC;
			if (ae->ae_perm & ACL_READ)
				dac_granted |= VREAD;
			if (ae->ae_perm & ACL_WRITE)
				dac_granted |= (VWRITE | VAPPEND);
			dac_granted  &= acl_mask_granted;

			if ((accmode & dac_granted) == accmode)
				return 0;

			group_matched = 1;
			break;

		case ACL_GROUP:
			error = kauth_cred_groupmember(cred, ae->ae_id);
			if (error > 0)
				return error;
			if (error)
				break;
			dac_granted = 0;
			if (ae->ae_perm & ACL_EXECUTE)
				dac_granted |= VEXEC;
			if (ae->ae_perm & ACL_READ)
				dac_granted |= VREAD;
			if (ae->ae_perm & ACL_WRITE)
				dac_granted |= (VWRITE | VAPPEND);
			dac_granted  &= acl_mask_granted;

			if ((accmode & dac_granted) == accmode)
				return 0;

			group_matched = 1;
			break;

		default:
			break;
		}
	}

	if (group_matched == 1) {
		/*
		 * There was a match, but it did not grant rights via pure
		 * DAC.	 Try again, this time with privilege.
		 */
		for (i = 0; i < acl->acl_cnt; i++) {
			struct acl_entry *ae = &acl->acl_entry[i];
			switch (ae->ae_tag) {
			case ACL_GROUP_OBJ:
				error = kauth_cred_groupmember(cred, file_gid);
				if (error > 0)
					return error;
				if (error)
					break;
				dac_granted = 0;
				if (ae->ae_perm & ACL_EXECUTE)
					dac_granted |= VEXEC;
				if (ae->ae_perm & ACL_READ)
					dac_granted |= VREAD;
				if (ae->ae_perm & ACL_WRITE)
					dac_granted |= (VWRITE | VAPPEND);
				dac_granted &= acl_mask_granted;
				goto out;

			case ACL_GROUP:
				error = kauth_cred_groupmember(cred, ae->ae_id);
				if (error > 0)
					return error;
				if (error)
					break;
				dac_granted = 0;
				if (ae->ae_perm & ACL_EXECUTE)
				dac_granted |= VEXEC;
				if (ae->ae_perm & ACL_READ)
					dac_granted |= VREAD;
				if (ae->ae_perm & ACL_WRITE)
					dac_granted |= (VWRITE | VAPPEND);
				dac_granted &= acl_mask_granted;

				goto out;
			default:
				break;
			}
		}
		/*
		 * Even with privilege, group membership was not sufficient.
		 * Return failure.
		 */
		dac_granted = 0;
		goto out;
	}
		
	/*
	 * Fall back on ACL_OTHER.  ACL_MASK is not applied to ACL_OTHER.
	 */
	dac_granted = 0;
	if (acl_other->ae_perm & ACL_EXECUTE)
		dac_granted |= VEXEC;
	if (acl_other->ae_perm & ACL_READ)
		dac_granted |= VREAD;
	if (acl_other->ae_perm & ACL_WRITE)
		dac_granted |= (VWRITE | VAPPEND);

out:
	if ((accmode & dac_granted) == accmode)
		return 0;
	return (accmode & VADMIN) ? EPERM : EACCES;
}

static struct {
	accmode_t accmode;
	int mask;
} accmode2mask[] = {
	{ VREAD, ACL_READ_DATA },
	{ VWRITE, ACL_WRITE_DATA },
	{ VAPPEND, ACL_APPEND_DATA },
	{ VEXEC, ACL_EXECUTE },
	{ VREAD_NAMED_ATTRS, ACL_READ_NAMED_ATTRS },
	{ VWRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS },
	{ VDELETE_CHILD, ACL_DELETE_CHILD },
	{ VREAD_ATTRIBUTES, ACL_READ_ATTRIBUTES },
	{ VWRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES },
	{ VDELETE, ACL_DELETE },
	{ VREAD_ACL, ACL_READ_ACL },
	{ VWRITE_ACL, ACL_WRITE_ACL },
	{ VWRITE_OWNER, ACL_WRITE_OWNER },
	{ VSYNCHRONIZE, ACL_SYNCHRONIZE },
	{ 0, 0 },
};

static int
_access_mask_from_accmode(accmode_t accmode)
{
	int access_mask = 0, i;

	for (i = 0; accmode2mask[i].accmode != 0; i++) {
		if (accmode & accmode2mask[i].accmode)
			access_mask |= accmode2mask[i].mask;
	}

	/*
	 * VAPPEND is just a modifier for VWRITE; if the caller asked
	 * for 'VAPPEND | VWRITE', we want to check for ACL_APPEND_DATA only.
	 */
	if (access_mask & ACL_APPEND_DATA)
		access_mask &= ~ACL_WRITE_DATA;

	return (access_mask);
}

/*
 * Return 0, iff access is allowed, 1 otherwise.
 */
static int
_acl_denies(const struct acl *aclp, int access_mask, kauth_cred_t cred,
    int file_uid, int file_gid, int *denied_explicitly)
{
	int i, error;
	const struct acl_entry *ae;

	if (denied_explicitly != NULL)
		*denied_explicitly = 0;

	KASSERT(aclp->acl_cnt <= ACL_MAX_ENTRIES);

	for (i = 0; i < aclp->acl_cnt; i++) {
		ae = &(aclp->acl_entry[i]);

		if (ae->ae_entry_type != ACL_ENTRY_TYPE_ALLOW &&
		    ae->ae_entry_type != ACL_ENTRY_TYPE_DENY)
			continue;
		if (ae->ae_flags & ACL_ENTRY_INHERIT_ONLY)
			continue;
		switch (ae->ae_tag) {
		case ACL_USER_OBJ:
			if (kauth_cred_geteuid(cred) != file_uid)
				continue;
			break;
		case ACL_USER:
			if (kauth_cred_geteuid(cred) != ae->ae_id)
				continue;
			break;
		case ACL_GROUP_OBJ:
			error = kauth_cred_groupmember(cred, file_gid);
			if (error > 0)
				return error;
			if (error != 0)
				continue;
			break;
		case ACL_GROUP:
			error = kauth_cred_groupmember(cred, ae->ae_id);
			if (error > 0)
				return error;
			if (error != 0)
				continue;
			break;
		default:
			KASSERT(ae->ae_tag == ACL_EVERYONE);
		}

		if (ae->ae_entry_type == ACL_ENTRY_TYPE_DENY) {
			if (ae->ae_perm & access_mask) {
				if (denied_explicitly != NULL)
					*denied_explicitly = 1;
				return (1);
			}
		}

		access_mask &= ~(ae->ae_perm);
		if (access_mask == 0)
			return (0);
	}

	if (access_mask == 0)
		return (0);

	return (1);
}

int
genfs_can_access_acl_nfs4(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
    gid_t file_gid, mode_t file_mode, struct acl *aclp, accmode_t accmode)
{
	int denied, explicitly_denied, access_mask, is_directory,
	    must_be_owner = 0;
	file_mode = 0;

	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND |
	    VEXPLICIT_DENY | VREAD_NAMED_ATTRS | VWRITE_NAMED_ATTRS |
	    VDELETE_CHILD | VREAD_ATTRIBUTES | VWRITE_ATTRIBUTES | VDELETE |
	    VREAD_ACL | VWRITE_ACL | VWRITE_OWNER | VSYNCHRONIZE)) == 0);
	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

	if (accmode & VADMIN)
		must_be_owner = 1;

	/*
	 * Ignore VSYNCHRONIZE permission.
	 */
	accmode &= ~VSYNCHRONIZE;

	access_mask = _access_mask_from_accmode(accmode);

	if (vp && vp->v_type == VDIR)
		is_directory = 1;
	else
		is_directory = 0;

	/*
	 * File owner is always allowed to read and write the ACL
	 * and basic attributes.  This is to prevent a situation
	 * where user would change ACL in a way that prevents him
	 * from undoing the change.
	 */
	if (kauth_cred_geteuid(cred) == file_uid)
		access_mask &= ~(ACL_READ_ACL | ACL_WRITE_ACL |
		    ACL_READ_ATTRIBUTES | ACL_WRITE_ATTRIBUTES);

	/*
	 * Ignore append permission for regular files; use write
	 * permission instead.
	 */
	if (!is_directory && (access_mask & ACL_APPEND_DATA)) {
		access_mask &= ~ACL_APPEND_DATA;
		access_mask |= ACL_WRITE_DATA;
	}

	denied = _acl_denies(aclp, access_mask, cred, file_uid, file_gid,
	    &explicitly_denied);

	if (must_be_owner) {
		if (kauth_cred_geteuid(cred) != file_uid)
			denied = EPERM;
	}

	/*
	 * For VEXEC, ensure that at least one execute bit is set for
	 * non-directories. We have to check the mode here to stay
	 * consistent with execve(2). See the test in
	 * exec_check_permissions().
	 */
	__acl_nfs4_sync_mode_from_acl(&file_mode, aclp);
	if (!denied && !is_directory && (accmode & VEXEC) &&
	    (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
		denied = EACCES;

	if (!denied)
		return (0);

	/*
	 * Access failed.  Iff it was not denied explicitly and
	 * VEXPLICIT_DENY flag was specified, allow access.
	 */
	if ((accmode & VEXPLICIT_DENY) && explicitly_denied == 0)
		return (0);

	accmode &= ~VEXPLICIT_DENY;

	if (accmode & (VADMIN_PERMS | VDELETE_CHILD | VDELETE))
		denied = EPERM;
	else
		denied = EACCES;

	return (denied);
}

/*
 * Common routine to check if chmod() is allowed.
 *
 * Policy:
 *   - You must own the file, and
 *     - You must not set the "sticky" bit (meaningless, see chmod(2))
 *     - You must be a member of the group if you're trying to set the
 *	 SGIDf bit
 *
 * vp - vnode of the file-system object
 * cred - credentials of the invoker
 * cur_uid, cur_gid - current uid/gid of the file-system object
 * new_mode - new mode for the file-system object
 *
 * Returns 0 if the change is allowed, or an error value otherwise.
 */
int
genfs_can_chmod(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
    gid_t cur_gid, mode_t new_mode)
{
	int error;

	/*
	 * To modify the permissions on a file, must possess VADMIN
	 * for that file.
	 */
	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred)) != 0)
		return (error);

	/*
	 * Unprivileged users can't set the sticky bit on files.
	 */
	if ((vp->v_type != VDIR) && (new_mode & S_ISTXT))
		return (EFTYPE);

	/*
	 * If the invoker is trying to set the SGID bit on the file,
	 * check group membership.
	 */
	if (new_mode & S_ISGID) {
		int ismember;

		error = kauth_cred_ismember_gid(cred, cur_gid,
		    &ismember);
		if (error || !ismember)
			return (EPERM);
	}

	/*
	 * Deny setting setuid if we are not the file owner.
	 */
	if ((new_mode & S_ISUID) && cur_uid != kauth_cred_geteuid(cred))
		return (EPERM);

	return (0);
}

/*
 * Common routine to check if chown() is allowed.
 *
 * Policy:
 *   - You must own the file, and
 *     - You must not try to change ownership, and
 *     - You must be member of the new group
 *
 * vp - vnode
 * cred - credentials of the invoker
 * cur_uid, cur_gid - current uid/gid of the file-system object
 * new_uid, new_gid - target uid/gid of the file-system object
 *
 * Returns 0 if the change is allowed, or an error value otherwise.
 */
int	
genfs_can_chown(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
    gid_t cur_gid, uid_t new_uid, gid_t new_gid)
{
	int error, ismember;

	/*
	 * To modify the ownership of a file, must possess VADMIN for that
	 * file.
	 */
	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred)) != 0)
		return (error);

	/*
	 * You can only change ownership of a file if:
	 * You own the file and...
	 */
	if (kauth_cred_geteuid(cred) == cur_uid) {
		/*
		 * You don't try to change ownership, and...
		 */
		if (new_uid != cur_uid)
			return (EPERM);

		/*
		 * You don't try to change group (no-op), or...
		 */
		if (new_gid == cur_gid)
			return (0);

		/*
		 * Your effective gid is the new gid, or...
		 */
		if (kauth_cred_getegid(cred) == new_gid)
			return (0);

		/*
		 * The new gid is one you're a member of.
		 */
		ismember = 0;
		error = kauth_cred_ismember_gid(cred, new_gid,
		    &ismember);
		if (!error && ismember)
			return (0);
	}

	return (EPERM);
}

int
genfs_can_chtimes(vnode_t *vp, kauth_cred_t cred, uid_t owner_uid,
    u_int vaflags)
{
	int error;
	/*
	 * Grant permission if the caller is the owner of the file, or
	 * the super-user, or has ACL_WRITE_ATTRIBUTES permission on
	 * on the file.	 If the time pointer is null, then write
	 * permission on the file is also sufficient.
	 *
	 * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes: 
	 * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
	 * will be allowed to set the times [..] to the current 
	 * server time.
	 */
	error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred);
	if (error != 0 && (vaflags & VA_UTIMES_NULL) != 0)
		error = VOP_ACCESS(vp, VWRITE, cred);

	if (error)
		return (vaflags & VA_UTIMES_NULL) == 0 ? EPERM : EACCES;

	return 0;
}

/*
 * Common routine to check if chflags() is allowed.
 *
 * Policy:
 *   - You must own the file, and
 *   - You must not change system flags, and
 *   - You must not change flags on character/block devices.
 *
 * vp - vnode
 * cred - credentials of the invoker
 * owner_uid - uid of the file-system object
 * changing_sysflags - true if the invoker wants to change system flags
 */
int
genfs_can_chflags(vnode_t *vp, kauth_cred_t cred,
     uid_t owner_uid, bool changing_sysflags)
{

	/* The user must own the file. */
	if (kauth_cred_geteuid(cred) != owner_uid) {
		return EPERM;
	}

	if (changing_sysflags) {
		return EPERM;
	}

	/*
	 * Unprivileged users cannot change the flags on devices, even if they
	 * own them.
	 */
	if (vp->v_type == VCHR || vp->v_type == VBLK) {
		return EPERM;
	}

	return 0;
}

/*
 * Common "sticky" policy.
 *
 * When a directory is "sticky" (as determined by the caller), this
 * function may help implementing the following policy:
 * - Renaming a file in it is only possible if the user owns the directory
 *   or the file being renamed.
 * - Deleting a file from it is only possible if the user owns the
 *   directory or the file being deleted.
 */
int
genfs_can_sticky(vnode_t *vp, kauth_cred_t cred, uid_t dir_uid, uid_t file_uid)
{
	if (kauth_cred_geteuid(cred) != dir_uid &&
	    kauth_cred_geteuid(cred) != file_uid)
		return EPERM;

	return 0;
}

int
genfs_can_extattr(vnode_t *vp, kauth_cred_t cred, accmode_t accmode,
    int attrnamespace)
{
	/*
	 * Kernel-invoked always succeeds.
	 */
	if (cred == NOCRED)
		return 0;

	switch (attrnamespace) {
	case EXTATTR_NAMESPACE_SYSTEM:
		return kauth_authorize_system(cred, KAUTH_SYSTEM_FS_EXTATTR,
		    0, vp->v_mount, NULL, NULL);
	case EXTATTR_NAMESPACE_USER:
		return VOP_ACCESS(vp, accmode, cred);
	default:
		return EPERM;
	}
}

int
genfs_access(void *v)
{
	struct vop_access_args *ap = v;

	KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
	    VAPPEND)) == 0);

	return VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred);
}

int
genfs_accessx(void *v)
{
	struct vop_accessx_args *ap = v;
	int error;
	accmode_t accmode = ap->a_accmode;
	error = vfs_unixify_accmode(&accmode);
	if (error != 0)
		return error;

	if (accmode == 0)
		return 0;

	return VOP_ACCESS(ap->a_vp, accmode, ap->a_cred);
}

/*
 * genfs_pathconf:
 *
 * Standard implementation of POSIX pathconf, to get information about limits
 * for a filesystem.
 * Override per filesystem for the case where the filesystem has smaller
 * limits.
 */
int
genfs_pathconf(void *v)
{
	struct vop_pathconf_args *ap = v;

	switch (ap->a_name) {
	case _PC_PATH_MAX:
		*ap->a_retval = PATH_MAX;
		return 0;
	case _PC_ACL_EXTENDED:
	case _PC_ACL_NFS4:
		*ap->a_retval = 0;
		return 0;
	default:
		return EINVAL;
	}
}