vsock-only/0000755000000000000000000000000012025726714011666 5ustar rootrootvsock-only/linux/0000755000000000000000000000000012025726715013026 5ustar rootrootvsock-only/linux/af_vsock.c0000444000000000000000000043736512025726715015005 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * af_vsock.c -- * * Linux socket module for the VMCI Sockets protocol family. */ /* * Implementation notes: * * - There are two kinds of sockets: those created by user action (such as * calling socket(2)) and those created by incoming connection request * packets. * * - There are two "global" tables, one for bound sockets (sockets that have * specified an address that they are responsible for) and one for connected * sockets (sockets that have established a connection with another socket). * These tables are "global" in that all sockets on the system are placed * within them. * - Note, though, that the bound table contains an extra entry for a list of * unbound sockets and SOCK_DGRAM sockets will always remain in that list. * The bound table is used solely for lookup of sockets when packets are * received and that's not necessary for SOCK_DGRAM sockets since we create * a datagram handle for each and need not perform a lookup. Keeping * SOCK_DGRAM sockets out of the bound hash buckets will reduce the chance * of collisions when looking for SOCK_STREAM sockets and prevents us from * having to check the socket type in the hash table lookups. * * - Sockets created by user action will either be "client" sockets that * initiate a connection or "server" sockets that listen for connections; we * do not support simultaneous connects (two "client" sockets connecting). * * - "Server" sockets are referred to as listener sockets throughout this * implementation because they are in the SS_LISTEN state. When a connection * request is received (the second kind of socket mentioned above), we create * a new socket and refer to it as a pending socket. These pending sockets * are placed on the pending connection list of the listener socket. When * future packets are received for the address the listener socket is bound * to, we check if the source of the packet is from one that has an existing * pending connection. If it does, we process the packet for the pending * socket. When that socket reaches the connected state, it is removed from * the listener socket's pending list and enqueued in the listener socket's * accept queue. Callers of accept(2) will accept connected sockets from the * listener socket's accept queue. If the socket cannot be accepted for some * reason then it is marked rejected. Once the connection is accepted, it is * owned by the user process and the responsibility for cleanup falls with * that user process. * * - It is possible that these pending sockets will never reach the connected * state; in fact, we may never receive another packet after the connection * request. Because of this, we must schedule a cleanup function to run in * the future, after some amount of time passes where a connection should * have been established. This function ensures that the socket is off all * lists so it cannot be retrieved, then drops all references to the socket * so it is cleaned up (sock_put() -> sk_free() -> our sk_destruct * implementation). Note this function will also cleanup rejected sockets, * those that reach the connected state but leave it before they have been * accepted. * * - Sockets created by user action will be cleaned up when the user * process calls close(2), causing our release implementation to be called. * Our release implementation will perform some cleanup then drop the * last reference so our sk_destruct implementation is invoked. Our * sk_destruct implementation will perform additional cleanup that's common * for both types of sockets. * * - A socket's reference count is what ensures that the structure won't be * freed. Each entry in a list (such as the "global" bound and connected * tables and the listener socket's pending list and connected queue) ensures * a reference. When we defer work until process context and pass a socket * as our argument, we must ensure the reference count is increased to ensure * the socket isn't freed before the function is run; the deferred function * will then drop the reference. * */ #include "driver-config.h" #include #include #include #include #include #include #include #include #include #if defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) # if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) # include # else # include # endif /* Use weak: not all kernels export sys_ioctl for use by modules */ # if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66) asmlinkage __attribute__((weak)) long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); # else asmlinkage __attribute__((weak)) int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg); # endif #endif #include "compat_module.h" #include "compat_kernel.h" #include "compat_init.h" #include "compat_sock.h" #include "compat_wait.h" #include "compat_version.h" #include "compat_workqueue.h" #include "compat_list.h" #if defined(HAVE_COMPAT_IOCTL) || defined(HAVE_UNLOCKED_IOCTL) # include "compat_semaphore.h" #endif #include "vmware.h" #include "vsockCommon.h" #include "vsockPacket.h" #include "vsockVmci.h" #include "vmci_defs.h" #include "vmci_call_defs.h" #include "vmci_iocontrols.h" #ifdef VMX86_TOOLS # include "vmciGuestKernelAPI.h" #else # include "vmciDatagram.h" #endif #include "af_vsock.h" #include "util.h" #include "vsock_version.h" #include "driverLog.h" #define VSOCK_INVALID_FAMILY NPROTO #define VSOCK_AF_IS_REGISTERED(val) ((val) >= 0 && (val) < NPROTO) /* * Prototypes */ int VSockVmci_GetAFValue(void); /* Internal functions. */ static int VSockVmciRecvDgramCB(void *data, VMCIDatagram *dg); #ifdef VMX86_TOOLS static int VSockVmciRecvStreamCB(void *data, VMCIDatagram *dg); static void VSockVmciPeerAttachCB(VMCIId subId, VMCI_EventData *ed, void *clientData); static void VSockVmciPeerDetachCB(VMCIId subId, VMCI_EventData *ed, void *clientData); static int VSockVmciSendControlPktBH(struct sockaddr_vm *src, struct sockaddr_vm *dst, VSockPacketType type, uint64 size, uint64 mode, VSockWaitingInfo *wait, VMCIHandle handle); static int VSockVmciSendControlPkt(struct sock *sk, VSockPacketType type, uint64 size, uint64 mode, VSockWaitingInfo *wait, VMCIHandle handle); static void VSockVmciRecvPktWork(compat_work_arg work); static int VSockVmciRecvListen(struct sock *sk, VSockPacket *pkt); static int VSockVmciRecvConnectingServer(struct sock *sk, struct sock *pending, VSockPacket *pkt); static int VSockVmciRecvConnectingClient(struct sock *sk, VSockPacket *pkt); static int VSockVmciRecvConnectingClientNegotiate(struct sock *sk, VSockPacket *pkt); static int VSockVmciRecvConnected(struct sock *sk, VSockPacket *pkt); #endif static int __VSockVmciBind(struct sock *sk, struct sockaddr_vm *addr); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14) static struct sock *__VSockVmciCreate(struct socket *sock, unsigned int priority, unsigned short type); #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) static struct sock *__VSockVmciCreate(struct socket *sock, gfp_t priority, unsigned short type); #else static struct sock *__VSockVmciCreate(struct net *net, struct socket *sock, gfp_t priority, unsigned short type); #endif static int VSockVmciRegisterAddressFamily(void); static void VSockVmciUnregisterAddressFamily(void); /* Some kernel versions don't define __user. Define it ourself if so. */ #ifndef __user #define __user #endif /* Socket operations. */ static void VSockVmciSkDestruct(struct sock *sk); static int VSockVmciQueueRcvSkb(struct sock *sk, struct sk_buff *skb); static int VSockVmciRelease(struct socket *sock); static int VSockVmciBind(struct socket *sock, struct sockaddr *addr, int addrLen); static int VSockVmciDgramConnect(struct socket *sock, struct sockaddr *addr, int addrLen, int flags); #ifdef VMX86_TOOLS static int VSockVmciStreamConnect(struct socket *sock, struct sockaddr *addr, int addrLen, int flags); static int VSockVmciAccept(struct socket *sock, struct socket *newsock, int flags); #endif static int VSockVmciGetname(struct socket *sock, struct sockaddr *addr, int *addrLen, int peer); static unsigned int VSockVmciPoll(struct file *file, struct socket *sock, poll_table *wait); #ifdef VMX86_TOOLS static int VSockVmciListen(struct socket *sock, int backlog); #endif static int VSockVmciShutdown(struct socket *sock, int mode); #ifdef VMX86_TOOLS static int VSockVmciStreamSetsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen); static int VSockVmciStreamGetsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user * optlen); #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43) static int VSockVmciDgramSendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm); static int VSockVmciDgramRecvmsg(struct socket *sock, struct msghdr *msg, int len, int flags, struct scm_cookie *scm); # ifdef VMX86_TOOLS static int VSockVmciStreamSendmsg(struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm); static int VSockVmciStreamRecvmsg(struct socket *sock, struct msghdr *msg, int len, int flags, struct scm_cookie *scm); # endif #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65) static int VSockVmciDgramSendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm); static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, int len, int flags, struct scm_cookie *scm); # ifdef VMX86_TOOLS static int VSockVmciStreamSendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, int len, struct scm_cookie *scm); static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, int len, int flags, struct scm_cookie *scm); # endif #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2) static int VSockVmciDgramSendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, int len); static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, int len, int flags); # ifdef VMX86_TOOLS static int VSockVmciStreamSendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, int len); static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, int len, int flags); # endif #else static int VSockVmciDgramSendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len); static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); # ifdef VMX86_TOOLS static int VSockVmciStreamSendmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len); static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); # endif #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) static int VSockVmciCreate(struct socket *sock, int protocol); #else static int VSockVmciCreate(struct net *net, struct socket *sock, int protocol); #endif /* * Device operations. */ int VSockVmciDevOpen(struct inode *inode, struct file *file); int VSockVmciDevRelease(struct inode *inode, struct file *file); static int VSockVmciDevIoctl(struct inode *inode, struct file *filp, u_int iocmd, unsigned long ioarg); #if defined(HAVE_COMPAT_IOCTL) || defined(HAVE_UNLOCKED_IOCTL) static long VSockVmciDevUnlockedIoctl(struct file *filp, u_int iocmd, unsigned long ioarg); #endif /* * Variables. */ /* Protocol family. We only use this for builds against 2.6.9 and later. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 9) static struct proto vsockVmciProto = { .name = "AF_VMCI", #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 10) /* Added in 2.6.10. */ .owner = THIS_MODULE, #endif /* * Before 2.6.9, each address family created their own slab (by calling * kmem_cache_create() directly). From 2.6.9 until 2.6.11, these address * families instead called sk_alloc_slab() and the allocated slab was * assigned to the slab variable in the proto struct and was created of size * slab_obj_size. As of 2.6.12 and later, this slab allocation was moved * into proto_register() and only done if you specified a non-zero value for * the second argument (alloc_slab); the size of the slab element was * changed to obj_size. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9) #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) .slab_obj_size = sizeof (VSockVmciSock), #else .obj_size = sizeof (VSockVmciSock), #endif }; #endif static struct net_proto_family vsockVmciFamilyOps = { .family = VSOCK_INVALID_FAMILY, .create = VSockVmciCreate, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 69) .owner = THIS_MODULE, #endif }; /* Socket operations, split for DGRAM and STREAM sockets. */ static struct proto_ops vsockVmciDgramOps = { .family = VSOCK_INVALID_FAMILY, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 69) .owner = THIS_MODULE, #endif .release = VSockVmciRelease, .bind = VSockVmciBind, .connect = VSockVmciDgramConnect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = VSockVmciGetname, .poll = VSockVmciPoll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = VSockVmciShutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = VSockVmciDgramSendmsg, .recvmsg = VSockVmciDgramRecvmsg, .mmap = sock_no_mmap, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 4) .sendpage = sock_no_sendpage, #endif }; #ifdef VMX86_TOOLS static struct proto_ops vsockVmciStreamOps = { .family = VSOCK_INVALID_FAMILY, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 69) .owner = THIS_MODULE, #endif .release = VSockVmciRelease, .bind = VSockVmciBind, .connect = VSockVmciStreamConnect, .socketpair = sock_no_socketpair, .accept = VSockVmciAccept, .getname = VSockVmciGetname, .poll = VSockVmciPoll, .ioctl = sock_no_ioctl, .listen = VSockVmciListen, .shutdown = VSockVmciShutdown, .setsockopt = VSockVmciStreamSetsockopt, .getsockopt = VSockVmciStreamGetsockopt, .sendmsg = VSockVmciStreamSendmsg, .recvmsg = VSockVmciStreamRecvmsg, .mmap = sock_no_mmap, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 4) .sendpage = sock_no_sendpage, #endif }; #endif static struct file_operations vsockVmciDeviceOps = { #ifdef HAVE_UNLOCKED_IOCTL .unlocked_ioctl = VSockVmciDevUnlockedIoctl, #else .ioctl = VSockVmciDevIoctl, #endif #ifdef HAVE_COMPAT_IOCTL .compat_ioctl = VSockVmciDevUnlockedIoctl, #endif .open = VSockVmciDevOpen, .release = VSockVmciDevRelease, }; static struct miscdevice vsockVmciDevice = { .name = "vsock", .minor = MISC_DYNAMIC_MINOR, .fops = &vsockVmciDeviceOps, }; typedef struct VSockRecvPktInfo { compat_work work; struct sock *sk; VSockPacket pkt; } VSockRecvPktInfo; static DECLARE_MUTEX(registrationMutex); static int devOpenCount = 0; static int vsockVmciSocketCount = 0; #ifdef VMX86_TOOLS static VMCIHandle vmciStreamHandle = { VMCI_INVALID_ID, VMCI_INVALID_ID }; static Bool vmciDevicePresent = FALSE; static VMCIId qpResumedSubId = VMCI_INVALID_ID; #endif /* Comment this out to compare with old protocol. */ #define VSOCK_OPTIMIZATION_WAITING_NOTIFY 1 #if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) /* Comment this out to remove flow control for "new" protocol */ # define VSOCK_OPTIMIZATION_FLOW_CONTROL 1 #endif /* Comment this out to turn off datagram counting. */ //#define VSOCK_CONTROL_PACKET_COUNT 1 #ifdef VSOCK_CONTROL_PACKET_COUNT uint64 controlPacketCount[VSOCK_PACKET_TYPE_MAX]; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9) #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) kmem_cache_t *vsockCachep; #endif #endif #define VSOCK_MAX_DGRAM_RESENDS 10 /* * 64k is hopefully a reasonable default, but we should do some real * benchmarks. There are also some issues with resource limits on ESX. */ #define VSOCK_DEFAULT_QP_SIZE_MIN 128 #define VSOCK_DEFAULT_QP_SIZE 65536 #define VSOCK_DEFAULT_QP_SIZE_MAX 262144 #define VSOCK_SEND_RESET_BH(_dst, _src, _pkt) \ ((_pkt)->type == VSOCK_PACKET_TYPE_RST) ? \ 0 : \ VSockVmciSendControlPktBH(_dst, _src, VSOCK_PACKET_TYPE_RST, 0, \ 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_INVALID_BH(_dst, _src) \ VSockVmciSendControlPktBH(_dst, _src, VSOCK_PACKET_TYPE_INVALID, 0, \ 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_WROTE_BH(_dst, _src) \ VSockVmciSendControlPktBH(_dst, _src, VSOCK_PACKET_TYPE_WROTE, 0, \ 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_READ_BH(_dst, _src) \ VSockVmciSendControlPktBH(_dst, _src, VSOCK_PACKET_TYPE_READ, 0, \ 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_RESET(_sk, _pkt) \ ((_pkt)->type == VSOCK_PACKET_TYPE_RST) ? \ 0 : \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_RST, \ 0, 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_NEGOTIATE(_sk, _size) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_NEGOTIATE, \ _size, 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_QP_OFFER(_sk, _handle) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_OFFER, \ 0, 0, NULL, _handle) #define VSOCK_SEND_CONN_REQUEST(_sk, _size) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_REQUEST, \ _size, 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_ATTACH(_sk, _handle) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_ATTACH, \ 0, 0, NULL, _handle) #define VSOCK_SEND_WROTE(_sk) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_WROTE, \ 0, 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_READ(_sk) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_READ, \ 0, 0, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_SHUTDOWN(_sk, _mode) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_SHUTDOWN, \ 0, _mode, NULL, VMCI_INVALID_HANDLE) #define VSOCK_SEND_WAITING_WRITE(_sk, _waitInfo) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_WAITING_WRITE, \ 0, 0, _waitInfo, VMCI_INVALID_HANDLE) #define VSOCK_SEND_WAITING_READ(_sk, _waitInfo) \ VSockVmciSendControlPkt(_sk, VSOCK_PACKET_TYPE_WAITING_READ, \ 0, 0, _waitInfo, VMCI_INVALID_HANDLE) #ifdef VMX86_LOG # define LOG_PACKET(_pkt) VSockVmciLogPkt(__FUNCTION__, __LINE__, _pkt) #else # define LOG_PACKET(_pkt) #endif /* *---------------------------------------------------------------------------- * * VSockVmci_GetAFValue -- * * Returns the address family value being used. * * Results: * The address family on success, a negative error on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ int VSockVmci_GetAFValue(void) { int afvalue; down(®istrationMutex); afvalue = vsockVmciFamilyOps.family; if (!VSOCK_AF_IS_REGISTERED(afvalue)) { afvalue = VSockVmciRegisterAddressFamily(); } up(®istrationMutex); return afvalue; } /* *---------------------------------------------------------------------------- * * VSockVmciTestUnregister -- * * Tests if it's necessary to unregister the socket family, and does so. * * Note that this assumes the registration lock is held. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static inline void VSockVmciTestUnregister(void) { if (devOpenCount <= 0 && vsockVmciSocketCount <= 0) { if (VSOCK_AF_IS_REGISTERED(vsockVmciFamilyOps.family)) { VSockVmciUnregisterAddressFamily(); } } } /* * Helper functions. */ #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciNotifyWaitingWrite -- * * Determines if the conditions have been met to notify a waiting writer. * * Results: * TRUE if a notification should be sent, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static Bool VSockVmciNotifyWaitingWrite(VSockVmciSock *vsk) // IN { #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY Bool retval; uint64 notifyLimit; if (!vsk->peerWaitingWrite) { return FALSE; } #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL /* * When the sender blocks, we take that as a sign that the sender * is faster than the receiver. To reduce the transmit rate of the * sender, we delay the sending of the read notification by * decreasing the writeNotifyWindow. The notification is delayed * until the number of bytes used in the queue drops below the * writeNotifyWindow. */ if (!vsk->peerWaitingWriteDetected) { vsk->peerWaitingWriteDetected = TRUE; vsk->writeNotifyWindow -= PAGE_SIZE; if (vsk->writeNotifyWindow < vsk->writeNotifyMinWindow) { vsk->writeNotifyWindow = vsk->writeNotifyMinWindow; } } notifyLimit = vsk->consumeSize - vsk->writeNotifyWindow; #else notifyLimit = 0; #endif // VSOCK_OPTIMIZATION_FLOW_CONTROL /* * For now we ignore the wait information and just see if the free * space exceeds the notify limit. Note that improving this * function to be more intelligent will not require a protocol * change and will retain compatibility between endpoints with * mixed versions of this function. * * The notifyLimit is used to delay notifications in the case where * flow control is enabled. Below the test is expressed in terms of * free space in the queue: * if freeSpace > ConsumeSize - writeNotifyWindow then notify * An alternate way of expressing this is to rewrite the expression * to use the data ready in the receive queue: * if writeNotifyWindow > bufferReady then notify * as freeSpace == ConsumeSize - bufferReady. */ retval = VMCIQueue_FreeSpace(vsk->consumeQ, vsk->produceQ, vsk->consumeSize) > notifyLimit; #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL if (retval) { /* * Once we notify the peer, we reset the detected flag so the * next wait will again cause a decrease in the window size. */ vsk->peerWaitingWriteDetected = FALSE; } #endif // VSOCK_OPTIMIZATION_FLOW_CONTROL return retval; #else return TRUE; #endif } /* *---------------------------------------------------------------------------- * * VSockVmciNotifyWaitingRead -- * * Determines if the conditions have been met to notify a waiting reader. * * Results: * TRUE if a notification should be sent, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static Bool VSockVmciNotifyWaitingRead(VSockVmciSock *vsk) // IN { #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY if (!vsk->peerWaitingRead) { return FALSE; } /* * For now we ignore the wait information and just see if there is any data * to read. Note that improving this function to be more intelligent will * not require a protocol change and will retain compatibility between * endpoints with mixed versions of this function. */ return VMCIQueue_BufReady(vsk->produceQ, vsk->consumeQ, vsk->produceSize) > 0; #else return TRUE; #endif } /* *---------------------------------------------------------------------------- * * VSockVmciHandleWaitingWrite -- * * Handles an incoming waiting write message. * * Results: * None. * * Side effects: * May send a notification to the peer, may update socket's wait info * structure. * *---------------------------------------------------------------------------- */ static void VSockVmciHandleWaitingWrite(struct sock *sk, // IN VSockPacket *pkt, // IN Bool bottomHalf, // IN struct sockaddr_vm *dst, // IN struct sockaddr_vm *src) // IN { #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY VSockVmciSock *vsk; vsk = vsock_sk(sk); vsk->peerWaitingWrite = TRUE; memcpy(&vsk->peerWaitingWriteInfo, &pkt->u.wait, sizeof vsk->peerWaitingWriteInfo); if (VSockVmciNotifyWaitingWrite(vsk)) { Bool sent; if (bottomHalf) { sent = VSOCK_SEND_READ_BH(dst, src) > 0; } else { sent = VSOCK_SEND_READ(sk) > 0; } if (sent) { vsk->peerWaitingWrite = FALSE; } } #endif } /* *---------------------------------------------------------------------------- * * VSockVmciHandleWaitingRead -- * * Handles an incoming waiting read message. * * Results: * None. * * Side effects: * May send a notification to the peer, may update socket's wait info * structure. * *---------------------------------------------------------------------------- */ static void VSockVmciHandleWaitingRead(struct sock *sk, // IN VSockPacket *pkt, // IN Bool bottomHalf, // IN struct sockaddr_vm *dst, // IN struct sockaddr_vm *src) // IN { #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY VSockVmciSock *vsk; vsk = vsock_sk(sk); vsk->peerWaitingRead = TRUE; memcpy(&vsk->peerWaitingReadInfo, &pkt->u.wait, sizeof vsk->peerWaitingReadInfo); if (VSockVmciNotifyWaitingRead(vsk)) { Bool sent; if (bottomHalf) { sent = VSOCK_SEND_WROTE_BH(dst, src) > 0; } else { sent = VSOCK_SEND_WROTE(sk) > 0; } if (sent) { vsk->peerWaitingRead = FALSE; } } #endif } #endif /* *---------------------------------------------------------------------------- * * VSockVmciRecvDgramCB -- * * VMCI Datagram receive callback. This function is used specifically for * SOCK_DGRAM sockets. * * This is invoked as part of a tasklet that's scheduled when the VMCI * interrupt fires. This is run in bottom-half context and if it ever needs * to sleep it should defer that work to a work queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * An sk_buff is created and queued with this socket. * *---------------------------------------------------------------------------- */ static int VSockVmciRecvDgramCB(void *data, // IN VMCIDatagram *dg) // IN { struct sock *sk; size_t size; struct sk_buff *skb; ASSERT(dg); ASSERT(dg->payloadSize <= VMCI_MAX_DG_PAYLOAD_SIZE); sk = (struct sock *)data; ASSERT(sk); /* XXX Figure out why sk->compat_sk_socket can be NULL. */ ASSERT(sk->compat_sk_socket ? sk->compat_sk_socket->type == SOCK_DGRAM : 1); size = VMCI_DG_SIZE(dg); /* * Attach the packet to the socket's receive queue as an sk_buff. */ skb = alloc_skb(size, GFP_ATOMIC); if (skb) { /* compat_sk_receive_skb() will do a sock_put(), so hold here. */ sock_hold(sk); skb_put(skb, size); memcpy(skb->data, dg, size); compat_sk_receive_skb(sk, skb, 0); } return 0; } #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciRecvStreamCB -- * * VMCI stream receive callback for control datagrams. This function is * used specifically for SOCK_STREAM sockets. * * This is invoked as part of a tasklet that's scheduled when the VMCI * interrupt fires. This is run in bottom-half context but it defers most * of its work to the packet handling work queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciRecvStreamCB(void *data, // IN VMCIDatagram *dg) // IN { struct sock *sk; struct sockaddr_vm dst; struct sockaddr_vm src; VSockPacket *pkt; Bool processPkt; int err; ASSERT(dg); ASSERT(dg->payloadSize <= VMCI_MAX_DG_PAYLOAD_SIZE); sk = NULL; err = VMCI_SUCCESS; processPkt = TRUE; /* * Ignore incoming packets from contexts without sockets, or resources that * aren't vsock implementations. */ if (!VSockAddr_SocketContext(VMCI_HANDLE_TO_CONTEXT_ID(dg->src)) || VSOCK_PACKET_RID != VMCI_HANDLE_TO_RESOURCE_ID(dg->src)) { return VMCI_ERROR_NO_ACCESS; } if (VMCI_DG_SIZE(dg) < sizeof *pkt) { /* Drop datagrams that do not contain full VSock packets. */ return VMCI_ERROR_INVALID_ARGS; } pkt = (VSockPacket *)dg; LOG_PACKET(pkt); /* * Find the socket that should handle this packet. First we look for * a connected socket and if there is none we look for a socket bound to * the destintation address. * * Note that we don't initialize the family member of the src and dst * sockaddr_vm since we don't want to call VMCISock_GetAFValue() and * possibly register the address family. */ VSockAddr_InitNoFamily(&src, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src), pkt->srcPort); VSockAddr_InitNoFamily(&dst, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.dst), pkt->dstPort); sk = VSockVmciFindConnectedSocket(&src, &dst); if (!sk) { sk = VSockVmciFindBoundSocket(&dst); if (!sk) { /* * We could not find a socket for this specified address. If this * packet is a RST, we just drop it. If it is another packet, we send * a RST. Note that we do not send a RST reply to RSTs so that we do * not continually send RSTs between two endpoints. * * Note that since this is a reply, dst is src and src is dst. */ if (VSOCK_SEND_RESET_BH(&dst, &src, pkt) < 0) { Log("unable to send reset.\n"); } err = VMCI_ERROR_NOT_FOUND; goto out; } } /* * If the received packet type is beyond all types known to this * implementation, reply with an invalid message. Hopefully this will help * when implementing backwards compatibility in the future. */ if (pkt->type >= VSOCK_PACKET_TYPE_MAX) { if (VSOCK_SEND_INVALID_BH(&dst, &src) < 0) { Warning("unable to send reply for invalid packet.\n"); err = VMCI_ERROR_INVALID_ARGS; goto out; } } /* * We do most everything in a work queue, but let's fast path the * notification of reads and writes to help data transfer performance. We * can only do this if there is no process context code executing for this * socket since that may change the state. */ bh_lock_sock(sk); if (!compat_sock_owned_by_user(sk) && sk->compat_sk_state == SS_CONNECTED) { switch (pkt->type) { case VSOCK_PACKET_TYPE_WROTE: sk->compat_sk_data_ready(sk, 0); processPkt = FALSE; break; case VSOCK_PACKET_TYPE_READ: sk->compat_sk_write_space(sk); processPkt = FALSE; break; case VSOCK_PACKET_TYPE_WAITING_WRITE: VSockVmciHandleWaitingWrite(sk, pkt, TRUE, &dst, &src); processPkt = FALSE; break; case VSOCK_PACKET_TYPE_WAITING_READ: VSockVmciHandleWaitingRead(sk, pkt, TRUE, &dst, &src); processPkt = FALSE; break; } } bh_unlock_sock(sk); if (processPkt) { VSockRecvPktInfo *recvPktInfo; recvPktInfo = kmalloc(sizeof *recvPktInfo, GFP_ATOMIC); if (!recvPktInfo) { if (VSOCK_SEND_RESET_BH(&dst, &src, pkt) < 0) { Warning("unable to send reset\n"); } err = VMCI_ERROR_NO_MEM; goto out; } recvPktInfo->sk = sk; memcpy(&recvPktInfo->pkt, pkt, sizeof recvPktInfo->pkt); COMPAT_INIT_WORK(&recvPktInfo->work, VSockVmciRecvPktWork, recvPktInfo); compat_schedule_work(&recvPktInfo->work); /* * Clear sk so that the reference count incremented by one of the Find * functions above is not decremented below. We need that reference * count for the packet handler we've scheduled to run. */ sk = NULL; } out: if (sk) { sock_put(sk); } return err; } /* *---------------------------------------------------------------------------- * * VSockVmciPeerAttachCB -- * * Invoked when a peer attaches to a queue pair. * * Right now this does not do anything. * * Results: * None. * * Side effects: * May modify socket state and signal socket. * *---------------------------------------------------------------------------- */ static void VSockVmciPeerAttachCB(VMCIId subId, // IN VMCI_EventData *eData, // IN void *clientData) // IN { struct sock *sk; VMCIEventPayload_QP *ePayload; VSockVmciSock *vsk; ASSERT(eData); ASSERT(clientData); sk = (struct sock *)clientData; ePayload = VMCIEventDataPayload(eData); vsk = vsock_sk(sk); bh_lock_sock(sk); /* * XXX This is lame, we should provide a way to lookup sockets by qpHandle. */ if (VMCI_HANDLE_EQUAL(vsk->qpHandle, ePayload->handle)) { /* * XXX This doesn't do anything, but in the future we may want to set * a flag here to verify the attach really did occur and we weren't just * sent a datagram claiming it was. */ goto out; } out: bh_unlock_sock(sk); } /* *---------------------------------------------------------------------------- * * VSockVmciHandleDetach -- * * Perform the work necessary when the peer has detached. * * Note that this assumes the socket lock is held. * * Results: * None. * * Side effects: * The socket's and its peer's shutdown mask will be set appropriately, * and any callers waiting on this socket will be awoken. * *---------------------------------------------------------------------------- */ static INLINE void VSockVmciHandleDetach(struct sock *sk) // IN { VSockVmciSock *vsk; ASSERT(sk); vsk = vsock_sk(sk); if (!VMCI_HANDLE_INVALID(vsk->qpHandle)) { ASSERT(vsk->produceQ); ASSERT(vsk->consumeQ); #ifdef VMX86_TOOLS if (sk->compat_sk_type == SOCK_STREAM && sk->compat_sk_state == SS_CONNECTED) { compat_sock_set_done(sk); } #endif /* On a detach the peer will not be sending or receiving anymore. */ vsk->peerShutdown = SHUTDOWN_MASK; /* * We should not be sending anymore since the peer won't be there to * receive, but we can still receive if there is data left in our consume * queue. */ sk->compat_sk_shutdown |= SEND_SHUTDOWN; if (VMCIQueue_BufReady(vsk->consumeQ, vsk->produceQ, vsk->consumeSize) <= 0) { sk->compat_sk_shutdown |= RCV_SHUTDOWN; sk->compat_sk_state = SS_UNCONNECTED; } sk->compat_sk_state_change(sk); } } /* *---------------------------------------------------------------------------- * * VSockVmciPeerDetachCB -- * * Invoked when a peer detaches from a queue pair. * * Results: * None. * * Side effects: * May modify socket state and signal socket. * *---------------------------------------------------------------------------- */ static void VSockVmciPeerDetachCB(VMCIId subId, // IN VMCI_EventData *eData, // IN void *clientData) // IN { struct sock *sk; VMCIEventPayload_QP *ePayload; VSockVmciSock *vsk; ASSERT(eData); ASSERT(clientData); sk = (struct sock *)clientData; ePayload = VMCIEventDataPayload(eData); vsk = vsock_sk(sk); if (VMCI_HANDLE_INVALID(ePayload->handle)) { return; } /* * XXX This is lame, we should provide a way to lookup sockets by qpHandle. */ bh_lock_sock(sk); if (VMCI_HANDLE_EQUAL(vsk->qpHandle, ePayload->handle)) { VSockVmciHandleDetach(sk); } bh_unlock_sock(sk); } /* *---------------------------------------------------------------------------- * * VSockVmciQPResumedCB -- * * Invoked when a VM is resumed. We must mark all connected stream sockets * as detached. * * Results: * None. * * Side effects: * May modify socket state and signal socket. * *---------------------------------------------------------------------------- */ static void VSockVmciQPResumedCB(VMCIId subId, // IN VMCI_EventData *eData, // IN void *clientData) // IN { uint32 i; spin_lock_bh(&vsockTableLock); /* * XXX This loop should probably be provided by util.{h,c}, but that's for * another day. */ for (i = 0; i < ARRAYSIZE(vsockConnectedTable); i++) { VSockVmciSock *vsk; list_for_each_entry(vsk, &vsockConnectedTable[i], connectedTable) { struct sock *sk = sk_vsock(vsk); /* * XXX Technically this is racy but the resulting outcome from such * a race is relatively harmless. My next change will be a fix to * this. */ VSockVmciHandleDetach(sk); } } spin_unlock_bh(&vsockTableLock); } /* *---------------------------------------------------------------------------- * * VSockVmciPendingWork -- * * Releases the resources for a pending socket if it has not reached the * connected state and been accepted by a user process. * * Results: * None. * * Side effects: * The socket may be removed from the connected list and all its resources * freed. * *---------------------------------------------------------------------------- */ static void VSockVmciPendingWork(compat_delayed_work_arg work) // IN { struct sock *sk; struct sock *listener; VSockVmciSock *vsk; Bool cleanup; vsk = COMPAT_DELAYED_WORK_GET_DATA(work, VSockVmciSock, dwork); ASSERT(vsk); sk = sk_vsock(vsk); listener = vsk->listener; cleanup = TRUE; ASSERT(listener); lock_sock(listener); lock_sock(sk); /* * The socket should be on the pending list or the accept queue, but not * both. It's also possible that the socket isn't on either. */ ASSERT( ( VSockVmciIsPending(sk) && !VSockVmciInAcceptQueue(sk)) || (!VSockVmciIsPending(sk) && VSockVmciInAcceptQueue(sk)) || (!VSockVmciIsPending(sk) && !VSockVmciInAcceptQueue(sk))); if (VSockVmciIsPending(sk)) { VSockVmciRemovePending(listener, sk); } else if (!vsk->rejected) { /* * We are not on the pending list and accept() did not reject us, so we * must have been accepted by our user process. We just need to drop our * references to the sockets and be on our way. */ cleanup = FALSE; goto out; } listener->compat_sk_ack_backlog--; /* * We need to remove ourself from the global connected sockets list so * incoming packets can't find this socket, and to reduce the reference * count. */ if (VSockVmciInConnectedTable(sk)) { VSockVmciRemoveConnected(sk); } sk->compat_sk_state = SS_FREE; out: release_sock(sk); release_sock(listener); if (cleanup) { sock_put(sk); } sock_put(sk); sock_put(listener); } /* *---------------------------------------------------------------------------- * * VSockVmciRecvPktWork -- * * Handles an incoming control packet for the provided socket. This is the * state machine for our stream sockets. * * Results: * None. * * Side effects: * May set state and wakeup threads waiting for socket state to change. * *---------------------------------------------------------------------------- */ static void VSockVmciRecvPktWork(compat_work_arg work) // IN { int err; VSockRecvPktInfo *recvPktInfo; VSockPacket *pkt; VSockVmciSock *vsk; struct sock *sk; recvPktInfo = COMPAT_WORK_GET_DATA(work, VSockRecvPktInfo); ASSERT(recvPktInfo); err = 0; sk = recvPktInfo->sk; pkt = &recvPktInfo->pkt; vsk = vsock_sk(sk); ASSERT(vsk); ASSERT(pkt); ASSERT(pkt->type < VSOCK_PACKET_TYPE_MAX); lock_sock(sk); switch (sk->compat_sk_state) { case SS_LISTEN: err = VSockVmciRecvListen(sk, pkt); break; case SS_CONNECTING: /* * Processing of pending connections for servers goes through the * listening socket, so see VSockVmciRecvListen() for that path. */ err = VSockVmciRecvConnectingClient(sk, pkt); break; case SS_CONNECTED: err = VSockVmciRecvConnected(sk, pkt); break; default: /* * Because this function does not run in the same context as * VSockVmciRecvStreamCB it is possible that the socket * has closed. We need to let the other side know or it could * be sitting in a connect and hang forever. Send a reset to prevent * that. */ VSOCK_SEND_RESET(sk, pkt); goto out; } out: release_sock(sk); kfree(recvPktInfo); /* * Release reference obtained in the stream callback when we fetched this * socket out of the bound or connected list. */ sock_put(sk); } /* *---------------------------------------------------------------------------- * * VSockVmciRecvListen -- * * Receives packets for sockets in the listen state. * * Note that this assumes the socket lock is held. * * Results: * Zero on success, negative error code on failure. * * Side effects: * A new socket may be created and a negotiate control packet is sent. * *---------------------------------------------------------------------------- */ static int VSockVmciRecvListen(struct sock *sk, // IN VSockPacket *pkt) // IN { VSockVmciSock *vsk; struct sock *pending; VSockVmciSock *vpending; int err; uint64 qpSize; ASSERT(sk); ASSERT(pkt); ASSERT(sk->compat_sk_state == SS_LISTEN); vsk = vsock_sk(sk); err = 0; /* * Because we are in the listen state, we could be receiving a packet for * ourself or any previous connection requests that we received. If it's * the latter, we try to find a socket in our list of pending connections * and, if we do, call the appropriate handler for the state that that * socket is in. Otherwise we try to service the connection request. */ pending = VSockVmciGetPending(sk, pkt); if (pending) { lock_sock(pending); switch (pending->compat_sk_state) { case SS_CONNECTING: err = VSockVmciRecvConnectingServer(sk, pending, pkt); break; default: VSOCK_SEND_RESET(pending, pkt); err = -EINVAL; } if (err < 0) { VSockVmciRemovePending(sk, pending); } release_sock(pending); VSockVmciReleasePending(pending); return err; } /* * The listen state only accepts connection requests. Reply with a reset * unless we received a reset. */ if (pkt->type != VSOCK_PACKET_TYPE_REQUEST || pkt->u.size == 0) { VSOCK_SEND_RESET(sk, pkt); return -EINVAL; } /* * If this socket can't accommodate this connection request, we send * a reset. Otherwise we create and initialize a child socket and reply * with a connection negotiation. */ if (sk->compat_sk_ack_backlog >= sk->compat_sk_max_ack_backlog) { VSOCK_SEND_RESET(sk, pkt); return -ECONNREFUSED; } #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) pending = __VSockVmciCreate(NULL, GFP_KERNEL, sk->compat_sk_type); #else pending = __VSockVmciCreate(compat_sock_net(sk), NULL, GFP_KERNEL, sk->compat_sk_type); #endif if (!pending) { VSOCK_SEND_RESET(sk, pkt); return -ENOMEM; } vpending = vsock_sk(pending); ASSERT(vpending); ASSERT(vsk->localAddr.svm_port == pkt->dstPort); VSockAddr_Init(&vpending->localAddr, VMCI_GetContextID(), pkt->dstPort); VSockAddr_Init(&vpending->remoteAddr, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src), pkt->srcPort); /* * If the proposed size fits within our min/max, accept * it. Otherwise propose our own size. */ if (pkt->u.size >= vsk->queuePairMinSize && pkt->u.size <= vsk->queuePairMaxSize) { qpSize = pkt->u.size; } else { qpSize = vsk->queuePairSize; } err = VSOCK_SEND_NEGOTIATE(pending, qpSize); if (err < 0) { VSOCK_SEND_RESET(sk, pkt); sock_put(pending); err = VSockVmci_ErrorToVSockError(err); goto out; } VSockVmciAddPending(sk, pending); sk->compat_sk_ack_backlog++; pending->compat_sk_state = SS_CONNECTING; vpending->produceSize = vpending->consumeSize = vpending->writeNotifyWindow = pkt->u.size; /* * We might never receive another message for this socket and it's not * connected to any process, so we have to ensure it gets cleaned up * ourself. Our delayed work function will take care of that. Note that we * do not ever cancel this function since we have few guarantees about its * state when calling cancel_delayed_work(). Instead we hold a reference on * the socket for that function and make it capable of handling cases where * it needs to do nothing but release that reference. */ vpending->listener = sk; sock_hold(sk); sock_hold(pending); COMPAT_INIT_DELAYED_WORK(&vpending->dwork, VSockVmciPendingWork, vpending); compat_schedule_delayed_work(&vpending->dwork, HZ); out: return err; } /* *---------------------------------------------------------------------------- * * VSockVmciRecvConnectingServer -- * * Receives packets for sockets in the connecting state on the server side. * * Connecting sockets on the server side can only receive queue pair offer * packets. All others should be treated as cause for closing the * connection. * * Note that this assumes the socket lock is held for both sk and pending. * * Results: * Zero on success, negative error code on failure. * * Side effects: * A queue pair may be created, an attach control packet may be sent, the * socket may transition to the connected state, and a pending caller in * accept() may be woken up. * *---------------------------------------------------------------------------- */ static int VSockVmciRecvConnectingServer(struct sock *listener, // IN: the listening socket struct sock *pending, // IN: the pending connection VSockPacket *pkt) // IN: current packet { VSockVmciSock *vpending; VMCIHandle handle; VMCIQueue *produceQ; VMCIQueue *consumeQ; Bool isLocal; uint32 flags; VMCIId detachSubId; int err; int skerr; ASSERT(listener); ASSERT(pkt); ASSERT(listener->compat_sk_state == SS_LISTEN); ASSERT(pending->compat_sk_state == SS_CONNECTING); vpending = vsock_sk(pending); detachSubId = VMCI_INVALID_ID; switch (pkt->type) { case VSOCK_PACKET_TYPE_OFFER: if (VMCI_HANDLE_INVALID(pkt->u.handle)) { VSOCK_SEND_RESET(pending, pkt); skerr = EPROTO; err = -EINVAL; goto destroy; } break; default: /* Close and cleanup the connection. */ VSOCK_SEND_RESET(pending, pkt); skerr = EPROTO; err = pkt->type == VSOCK_PACKET_TYPE_RST ? 0 : -EINVAL; goto destroy; } ASSERT(pkt->type == VSOCK_PACKET_TYPE_OFFER); /* * In order to complete the connection we need to attach to the offered * queue pair and send an attach notification. We also subscribe to the * detach event so we know when our peer goes away, and we do that before * attaching so we don't miss an event. If all this succeeds, we update our * state and wakeup anything waiting in accept() for a connection. */ /* * We don't care about attach since we ensure the other side has attached by * specifying the ATTACH_ONLY flag below. */ err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_DETACH, VSockVmciPeerDetachCB, pending, &detachSubId); if (err < VMCI_SUCCESS) { VSOCK_SEND_RESET(pending, pkt); err = VSockVmci_ErrorToVSockError(err); skerr = -err; goto destroy; } vpending->detachSubId = detachSubId; /* Now attach to the queue pair the client created. */ handle = pkt->u.handle; isLocal = vpending->remoteAddr.svm_cid == vpending->localAddr.svm_cid; flags = VMCI_QPFLAG_ATTACH_ONLY; flags |= isLocal ? VMCI_QPFLAG_LOCAL : 0; err = VMCIQueuePair_Alloc(&handle, &produceQ, vpending->produceSize, &consumeQ, vpending->consumeSize, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src), flags); if (err < 0) { /* We cannot complete this connection: send a reset and close. */ Log("Could not attach to queue pair with %d\n", err); VSOCK_SEND_RESET(pending, pkt); err = VSockVmci_ErrorToVSockError(err); skerr = -err; goto destroy; } VMCIQueue_Init(handle, produceQ); ASSERT(VMCI_HANDLE_EQUAL(handle, pkt->u.handle)); vpending->qpHandle = handle; vpending->produceQ = produceQ; vpending->consumeQ = consumeQ; /* Notify our peer of our attach. */ err = VSOCK_SEND_ATTACH(pending, handle); if (err < 0) { Log("Could not send attach\n"); VSOCK_SEND_RESET(pending, pkt); err = VSockVmci_ErrorToVSockError(err); skerr = -err; goto destroy; } /* * We have a connection. Add our connection to the connected list so it no * longer goes through the listening socket, move it from the listener's * pending list to the accept queue so callers of accept() can find it. * Note that enqueueing the socket increments the reference count, so even * if a reset comes before the connection is accepted, the socket will be * valid until it is removed from the queue. */ pending->compat_sk_state = SS_CONNECTED; VSockVmciInsertConnected(vsockConnectedSocketsVsk(vpending), pending); VSockVmciRemovePending(listener, pending); VSockVmciEnqueueAccept(listener, pending); /* * Callers of accept() will be be waiting on the listening socket, not the * pending socket. */ listener->compat_sk_state_change(listener); return 0; destroy: pending->compat_sk_err = skerr; pending->compat_sk_state = SS_UNCONNECTED; /* * As long as we drop our reference, all necessary cleanup will handle when * the cleanup function drops its reference and our destruct implementation * is called. Note that since the listen handler will remove pending from * the pending list upon our failure, the cleanup function won't drop the * additional reference, which is why we do it here. */ sock_put(pending); return err; } /* *---------------------------------------------------------------------------- * * VSockVmciRecvConnectingClient -- * * Receives packets for sockets in the connecting state on the client side. * * Connecting sockets on the client side should only receive attach packets. * All others should be treated as cause for closing the connection. * * Note that this assumes the socket lock is held for both sk and pending. * * Results: * Zero on success, negative error code on failure. * * Side effects: * The socket may transition to the connected state and wakeup the pending * caller of connect(). * *---------------------------------------------------------------------------- */ static int VSockVmciRecvConnectingClient(struct sock *sk, // IN: socket VSockPacket *pkt) // IN: current packet { VSockVmciSock *vsk; int err; int skerr; ASSERT(sk); ASSERT(pkt); ASSERT(sk->compat_sk_state == SS_CONNECTING); vsk = vsock_sk(sk); switch (pkt->type) { case VSOCK_PACKET_TYPE_ATTACH: if (VMCI_HANDLE_INVALID(pkt->u.handle) || !VMCI_HANDLE_EQUAL(pkt->u.handle, vsk->qpHandle)) { skerr = EPROTO; err = -EINVAL; goto destroy; } /* * Signify the socket is connected and wakeup the waiter in connect(). * Also place the socket in the connected table for accounting (it can * already be found since it's in the bound table). */ sk->compat_sk_state = SS_CONNECTED; sk->compat_sk_socket->state = SS_CONNECTED; VSockVmciInsertConnected(vsockConnectedSocketsVsk(vsk), sk); sk->compat_sk_state_change(sk); break; case VSOCK_PACKET_TYPE_NEGOTIATE: if (pkt->u.size == 0 || VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src) != vsk->remoteAddr.svm_cid || pkt->srcPort != vsk->remoteAddr.svm_port || !VMCI_HANDLE_INVALID(vsk->qpHandle) || vsk->produceQ || vsk->consumeQ || vsk->produceSize != 0 || vsk->consumeSize != 0 || vsk->attachSubId != VMCI_INVALID_ID || vsk->detachSubId != VMCI_INVALID_ID) { skerr = EPROTO; err = -EINVAL; goto destroy; } err = VSockVmciRecvConnectingClientNegotiate(sk, pkt); if (err) { skerr = -err; goto destroy; } break; case VSOCK_PACKET_TYPE_RST: skerr = ECONNRESET; err = 0; goto destroy; default: /* Close and cleanup the connection. */ skerr = EPROTO; err = -EINVAL; goto destroy; } ASSERT(pkt->type == VSOCK_PACKET_TYPE_ATTACH || pkt->type == VSOCK_PACKET_TYPE_NEGOTIATE); return 0; destroy: VSOCK_SEND_RESET(sk, pkt); sk->compat_sk_state = SS_UNCONNECTED; sk->compat_sk_err = skerr; sk->compat_sk_error_report(sk); return err; } /* *---------------------------------------------------------------------------- * * VSockVmciRecvConnectingClientNegotiate -- * * Handles a negotiate packet for a client in the connecting state. * * Note that this assumes the socket lock is held for both sk and pending. * * Results: * Zero on success, negative error code on failure. * * Side effects: * The socket may transition to the connected state and wakeup the pending * caller of connect(). * *---------------------------------------------------------------------------- */ static int VSockVmciRecvConnectingClientNegotiate(struct sock *sk, // IN: socket VSockPacket *pkt) // IN: current packet { int err; VSockVmciSock *vsk; VMCIHandle handle; VMCIQueue *produceQ; VMCIQueue *consumeQ; VMCIId attachSubId; VMCIId detachSubId; Bool isLocal; vsk = vsock_sk(sk); handle = VMCI_INVALID_HANDLE; attachSubId = VMCI_INVALID_ID; detachSubId = VMCI_INVALID_ID; ASSERT(sk); ASSERT(pkt); ASSERT(pkt->u.size > 0); ASSERT(vsk->remoteAddr.svm_cid == VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src)); ASSERT(vsk->remoteAddr.svm_port == pkt->srcPort); ASSERT(VMCI_HANDLE_INVALID(vsk->qpHandle)); ASSERT(vsk->produceQ == NULL); ASSERT(vsk->consumeQ == NULL); ASSERT(vsk->produceSize == 0); ASSERT(vsk->consumeSize == 0); ASSERT(vsk->attachSubId == VMCI_INVALID_ID); ASSERT(vsk->detachSubId == VMCI_INVALID_ID); /* Verify that we're OK with the proposed queue pair size */ if (pkt->u.size < vsk->queuePairMinSize || pkt->u.size > vsk->queuePairMaxSize) { err = -EINVAL; goto destroy; } /* * Subscribe to attach and detach events first. * * XXX We attach once for each queue pair created for now so it is easy * to find the socket (it's provided), but later we should only subscribe * once and add a way to lookup sockets by queue pair handle. */ err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_ATTACH, VSockVmciPeerAttachCB, sk, &attachSubId); if (err < VMCI_SUCCESS) { err = VSockVmci_ErrorToVSockError(err); goto destroy; } err = VMCIEvent_Subscribe(VMCI_EVENT_QP_PEER_DETACH, VSockVmciPeerDetachCB, sk, &detachSubId); if (err < VMCI_SUCCESS) { err = VSockVmci_ErrorToVSockError(err); goto destroy; } /* Make VMCI select the handle for us. */ handle = VMCI_INVALID_HANDLE; isLocal = vsk->remoteAddr.svm_cid == vsk->localAddr.svm_cid; err = VMCIQueuePair_Alloc(&handle, &produceQ, pkt->u.size, &consumeQ, pkt->u.size, vsk->remoteAddr.svm_cid, isLocal ? VMCI_QPFLAG_LOCAL : 0); if (err < VMCI_SUCCESS) { err = VSockVmci_ErrorToVSockError(err); goto destroy; } VMCIQueue_Init(handle, produceQ); err = VSOCK_SEND_QP_OFFER(sk, handle); if (err < 0) { err = VSockVmci_ErrorToVSockError(err); goto destroy; } vsk->qpHandle = handle; vsk->produceQ = produceQ; vsk->consumeQ = consumeQ; vsk->produceSize = vsk->consumeSize = vsk->writeNotifyWindow = pkt->u.size; vsk->attachSubId = attachSubId; vsk->detachSubId = detachSubId; return 0; destroy: if (attachSubId != VMCI_INVALID_ID) { VMCIEvent_Unsubscribe(attachSubId); ASSERT(vsk->attachSubId == VMCI_INVALID_ID); } if (detachSubId != VMCI_INVALID_ID) { VMCIEvent_Unsubscribe(detachSubId); ASSERT(vsk->detachSubId == VMCI_INVALID_ID); } if (!VMCI_HANDLE_INVALID(handle)) { VMCIQueuePair_Detach(handle); ASSERT(VMCI_HANDLE_INVALID(vsk->qpHandle)); } return err; } /* *---------------------------------------------------------------------------- * * VSockVmciRecvConnected -- * * Receives packets for sockets in the connected state. * * Connected sockets should only ever receive detach, wrote, read, or reset * control messages. Others are treated as errors that are ignored. * * Wrote and read signify that the peer has produced or consumed, * respectively. * * Detach messages signify that the connection is being closed cleanly and * reset messages signify that the connection is being closed in error. * * Note that this assumes the socket lock is held. * * Results: * Zero on success, negative error code on failure. * * Side effects: * A queue pair may be created, an offer control packet sent, and the socket * may transition to the connecting state. * * *---------------------------------------------------------------------------- */ static int VSockVmciRecvConnected(struct sock *sk, // IN VSockPacket *pkt) // IN { VSockVmciSock *vsk = vsock_sk(sk); ASSERT(sk); ASSERT(pkt); ASSERT(sk->compat_sk_state == SS_CONNECTED); /* * In cases where we are closing the connection, it's sufficient to mark * the state change (and maybe error) and wake up any waiting threads. * Since this is a connected socket, it's owned by a user process and will * be cleaned up when the failure is passed back on the current or next * system call. Our system call implementations must therefore check for * error and state changes on entry and when being awoken. */ switch (pkt->type) { case VSOCK_PACKET_TYPE_SHUTDOWN: if (pkt->u.mode) { vsk->peerShutdown |= pkt->u.mode; sk->compat_sk_state_change(sk); } break; case VSOCK_PACKET_TYPE_RST: /* * It is possible that we sent our peer a message (e.g * a WAITING_READ) right before we got notified that the peer * had detached. If that happens then we can get a RST pkt back * from our peer even though there is data available for us * to read. In that case, don't shutdown the socket completely * but instead allow the local client to finish reading data * off the queuepair. Always treat a RST pkt in connected mode * like a clean shutdown. */ vsk->peerShutdown = SHUTDOWN_MASK; sk->compat_sk_shutdown |= SEND_SHUTDOWN; if (VMCIQueue_BufReady(vsk->consumeQ, vsk->produceQ, vsk->consumeSize) <= 0) { sk->compat_sk_state = SS_DISCONNECTING; sk->compat_sk_shutdown = SHUTDOWN_MASK; } sk->compat_sk_state_change(sk); break; case VSOCK_PACKET_TYPE_WROTE: sk->compat_sk_data_ready(sk, 0); break; case VSOCK_PACKET_TYPE_READ: sk->compat_sk_write_space(sk); break; case VSOCK_PACKET_TYPE_WAITING_WRITE: VSockVmciHandleWaitingWrite(sk, pkt, FALSE, NULL, NULL); break; case VSOCK_PACKET_TYPE_WAITING_READ: VSockVmciHandleWaitingRead(sk, pkt, FALSE, NULL, NULL); break; default: return -EINVAL; } return 0; } /* *---------------------------------------------------------------------------- * * VSockVmciSendControlPktBH -- * * Sends a control packet from bottom-half context. * * Results: * Size of datagram sent on success, negative error code otherwise. Note * that we return a VMCI error message since that's what callers will need * to provide. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciSendControlPktBH(struct sockaddr_vm *src, // IN struct sockaddr_vm *dst, // IN VSockPacketType type, // IN uint64 size, // IN uint64 mode, // IN VSockWaitingInfo *wait, // IN VMCIHandle handle) // IN { /* * Note that it is safe to use a single packet across all CPUs since two * tasklets of the same type are guaranteed to not ever run simultaneously. * If that ever changes, or VMCI stops using tasklets, we can use per-cpu * packets. */ static VSockPacket pkt; VSockPacket_Init(&pkt, src, dst, type, size, mode, wait, handle); LOG_PACKET(&pkt); #ifdef VSOCK_CONTROL_PACKET_COUNT controlPacketCount[pkt.type]++; #endif return VMCIDatagram_Send(&pkt.dg); } /* *---------------------------------------------------------------------------- * * VSockVmciSendControlPkt -- * * Sends a control packet. * * Results: * Size of datagram sent on success, negative error on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciSendControlPkt(struct sock *sk, // IN VSockPacketType type, // IN uint64 size, // IN uint64 mode, // IN VSockWaitingInfo *wait, // IN VMCIHandle handle) // IN { VSockPacket *pkt; VSockVmciSock *vsk; int err; ASSERT(sk); /* * New sockets for connection establishment won't have socket structures * yet; if one exists, ensure it is of the proper type. */ ASSERT(sk->compat_sk_socket ? sk->compat_sk_socket->type == SOCK_STREAM : 1); vsk = vsock_sk(sk); if (!VSockAddr_Bound(&vsk->localAddr)) { return -EINVAL; } if (!VSockAddr_Bound(&vsk->remoteAddr)) { return -EINVAL; } pkt = kmalloc(sizeof *pkt, GFP_KERNEL); if (!pkt) { return -ENOMEM; } VSockPacket_Init(pkt, &vsk->localAddr, &vsk->remoteAddr, type, size, mode, wait, handle); LOG_PACKET(pkt); err = VMCIDatagram_Send(&pkt->dg); kfree(pkt); if (err < 0) { return VSockVmci_ErrorToVSockError(err); } #ifdef VSOCK_CONTROL_PACKET_COUNT controlPacketCount[pkt->type]++; #endif return err; } #endif /* *---------------------------------------------------------------------------- * * __VSockVmciBind -- * * Common functionality needed to bind the specified address to the * VSocket. If VMADDR_CID_ANY or VMADDR_PORT_ANY are specified, the context * ID or port are selected automatically. * * Results: * Zero on success, negative error code on failure. * * Side effects: * On success, a new datagram handle is created. * *---------------------------------------------------------------------------- */ static int __VSockVmciBind(struct sock *sk, // IN/OUT struct sockaddr_vm *addr) // IN { static unsigned int port = LAST_RESERVED_PORT + 1; struct sockaddr_vm newAddr; VSockVmciSock *vsk; VMCIId cid; int err; ASSERT(sk); ASSERT(sk->compat_sk_socket); ASSERT(addr); vsk = vsock_sk(sk); /* First ensure this socket isn't already bound. */ if (VSockAddr_Bound(&vsk->localAddr)) { return -EINVAL; } /* * Now bind to the provided address or select appropriate values if none are * provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that like AF_INET * prevents binding to a non-local IP address (in most cases), we only allow * binding to the local CID. */ VSockAddr_Init(&newAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); cid = VMCI_GetContextID(); if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) { return -EADDRNOTAVAIL; } newAddr.svm_cid = cid; switch (sk->compat_sk_socket->type) { case SOCK_STREAM: spin_lock_bh(&vsockTableLock); if (addr->svm_port == VMADDR_PORT_ANY) { Bool found = FALSE; unsigned int i; for (i = 0; i < MAX_PORT_RETRIES; i++) { if (port <= LAST_RESERVED_PORT) { port = LAST_RESERVED_PORT + 1; } newAddr.svm_port = port++; if (!__VSockVmciFindBoundSocket(&newAddr)) { found = TRUE; break; } } if (!found) { err = -EADDRNOTAVAIL; goto out; } } else { /* If port is in reserved range, ensure caller has necessary privileges. */ if (addr->svm_port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) { err = -EACCES; goto out; } newAddr.svm_port = addr->svm_port; if (__VSockVmciFindBoundSocket(&newAddr)) { err = -EADDRINUSE; goto out; } } break; case SOCK_DGRAM: /* VMCI will select a resource ID for us if we provide VMCI_INVALID_ID. */ newAddr.svm_port = addr->svm_port == VMADDR_PORT_ANY ? VMCI_INVALID_ID : addr->svm_port; if (newAddr.svm_port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE)) { err = -EACCES; goto out; } err = VMCIDatagram_CreateHnd(newAddr.svm_port, 0, VSockVmciRecvDgramCB, sk, &vsk->dgHandle); if (err != VMCI_SUCCESS || vsk->dgHandle.context == VMCI_INVALID_ID || vsk->dgHandle.resource == VMCI_INVALID_ID) { err = VSockVmci_ErrorToVSockError(err); goto out; } newAddr.svm_port = VMCI_HANDLE_TO_RESOURCE_ID(vsk->dgHandle); break; default: err = -EINVAL; goto out; } VSockAddr_Init(&vsk->localAddr, newAddr.svm_cid, newAddr.svm_port); /* * Remove stream sockets from the unbound list and add them to the hash * table for easy lookup by its address. The unbound list is simply an * extra entry at the end of the hash table, a trick used by AF_UNIX. */ if (sk->compat_sk_socket->type == SOCK_STREAM) { __VSockVmciRemoveBound(sk); __VSockVmciInsertBound(vsockBoundSockets(&vsk->localAddr), sk); } err = 0; out: if (sk->compat_sk_socket->type == SOCK_STREAM) { spin_unlock_bh(&vsockTableLock); } return err; } #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciSendWaitingWrite -- * * Sends a waiting write notification to this socket's peer. * * Results: * TRUE if the datagram is sent successfully, FALSE otherwise. * * Side effects: * Our peer will notify us when there is room to write in to our produce * queue. * *---------------------------------------------------------------------------- */ static Bool VSockVmciSendWaitingWrite(struct sock *sk, // IN uint64 roomNeeded) // IN { #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY VSockVmciSock *vsk; VSockWaitingInfo waitingInfo; uint64 tail; uint64 head; uint64 roomLeft; ASSERT(sk); vsk = vsock_sk(sk); VMCIQueue_GetPointers(vsk->produceQ, vsk->consumeQ, &tail, &head); roomLeft = vsk->produceSize - tail; if (roomNeeded + 1 >= roomLeft) { /* Wraps around to current generation. */ waitingInfo.offset = roomNeeded + 1 - roomLeft; waitingInfo.generation = vsk->produceQGeneration; } else { waitingInfo.offset = tail + roomNeeded + 1; waitingInfo.generation = vsk->produceQGeneration - 1; } return VSOCK_SEND_WAITING_WRITE(sk, &waitingInfo) > 0; #else return TRUE; #endif } /* *---------------------------------------------------------------------------- * * VSockVmciSendWaitingRead -- * * Sends a waiting read notification to this socket's peer. * * Results: * TRUE if the datagram is sent successfully, FALSE otherwise. * * Side effects: * Our peer will notify us when there is data to read from our consume * queue. * *---------------------------------------------------------------------------- */ static Bool VSockVmciSendWaitingRead(struct sock *sk, // IN uint64 roomNeeded) // IN { #ifdef VSOCK_OPTIMIZATION_WAITING_NOTIFY VSockVmciSock *vsk; VSockWaitingInfo waitingInfo; uint64 tail; uint64 head; uint64 roomLeft; ASSERT(sk); vsk = vsock_sk(sk); if (vsk->writeNotifyWindow < vsk->consumeSize) { vsk->writeNotifyWindow = MIN(vsk->writeNotifyWindow + PAGE_SIZE, vsk->consumeSize); } VMCIQueue_GetPointers(vsk->consumeQ, vsk->produceQ, &tail, &head); roomLeft = vsk->consumeSize - head; if (roomNeeded >= roomLeft) { waitingInfo.offset = roomNeeded - roomLeft; waitingInfo.generation = vsk->consumeQGeneration + 1; } else { waitingInfo.offset = head + roomNeeded; waitingInfo.generation = vsk->consumeQGeneration; } return VSOCK_SEND_WAITING_READ(sk, &waitingInfo) > 0; #else return TRUE; #endif } /* *---------------------------------------------------------------------------- * * VSockVmciSendReadNotification -- * * Sends a read notification to this socket's peer. * * Results: * >= 0 if the datagram is sent successfully, negative error value * otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciSendReadNotification(struct sock *sk) // IN { VSockVmciSock *vsk; Bool sentRead; unsigned int retries; int err; ASSERT(sk); vsk = vsock_sk(sk); sentRead = FALSE; retries = 0; err = 0; if (VSockVmciNotifyWaitingWrite(vsk)) { /* * Notify the peer that we have read, retrying the send on failure up to our * maximum value. XXX For now we just log the failure, but later we should * schedule a work item to handle the resend until it succeeds. That would * require keeping track of work items in the vsk and cleaning them up upon * socket close. */ while (!(vsk->peerShutdown & RCV_SHUTDOWN) && !sentRead && retries < VSOCK_MAX_DGRAM_RESENDS) { err = VSOCK_SEND_READ(sk); if (err >= 0) { sentRead = TRUE; } retries++; } if (retries >= VSOCK_MAX_DGRAM_RESENDS) { Warning("unable to send read notification to peer for socket %p.\n", sk); } else { #if defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) vsk->peerWaitingWrite = FALSE; #endif } } return err; } #endif // VMX86_TOOLS /* *---------------------------------------------------------------------------- * * __VSockVmciCreate -- * * Does the work to create the sock structure. * Note: If sock is NULL then the type field must be non-zero. * Otherwise, sock is non-NULL and the type of sock is used in the * newly created socket. * * Results: * sock structure on success, NULL on failure. * * Side effects: * Allocated sk is added to the unbound sockets list iff it is owned by * a struct socket. * *---------------------------------------------------------------------------- */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14) static struct sock * __VSockVmciCreate(struct socket *sock, // IN: Owning socket, may be NULL unsigned int priority, // IN: Allocation flags unsigned short type) // IN: Socket type if sock is NULL #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) static struct sock * __VSockVmciCreate(struct socket *sock, // IN: Owning socket, may be NULL gfp_t priority, // IN: Allocation flags unsigned short type) // IN: Socket type if sock is NULL #else static struct sock * __VSockVmciCreate(struct net *net, // IN: Network namespace struct socket *sock, // IN: Owning socket, may be NULL gfp_t priority, // IN: Allocation flags unsigned short type) // IN: Socket type if sock is NULL #endif { struct sock *sk; VSockVmciSock *vsk; ASSERT((sock && !type) || (!sock && type)); vsk = NULL; /* * Before 2.5.5, sk_alloc() always used its own cache and protocol-specific * data was contained in the protinfo union. We cannot use those other * structures so we allocate our own structure and attach it to the * user_data pointer that we don't otherwise need. We must be sure to free * it later in our destruct routine. * * From 2.5.5 until 2.6.8, sk_alloc() offerred to use a cache that the * caller provided. After this, the cache was moved into the proto * structure, but you still had to specify the size and cache yourself until * 2.6.12. Most recently (in 2.6.24), sk_alloc() was changed to expect the * network namespace, and the option to zero the sock was dropped. * */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5) sk = sk_alloc(vsockVmciFamilyOps.family, priority, 1); #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9) sk = sk_alloc(vsockVmciFamilyOps.family, priority, sizeof (VSockVmciSock), vsockCachep); #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) sk = sk_alloc(vsockVmciFamilyOps.family, priority, vsockVmciProto.slab_obj_size, vsockVmciProto.slab); #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) sk = sk_alloc(vsockVmciFamilyOps.family, priority, &vsockVmciProto, 1); #else sk = sk_alloc(net, vsockVmciFamilyOps.family, priority, &vsockVmciProto); #endif if (!sk) { return NULL; } #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5) sk->user_data = kmalloc(sizeof *vsk, priority); if (!vsock_sk(sk)) { sk_free(sk); return NULL; } sk_vsock(vsock_sk(sk)) = sk; #endif /* * If we go this far, we know the socket family is registered, so there's no * need to register it now. */ down(®istrationMutex); vsockVmciSocketCount++; up(®istrationMutex); sock_init_data(sock, sk); /* * sk->compat_sk_type is normally set in sock_init_data, but only if * sock is non-NULL. We make sure that our sockets always have a type * by setting it here if needed. */ if (!sock) { sk->compat_sk_type = type; } vsk = vsock_sk(sk); VSockAddr_Init(&vsk->localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); VSockAddr_Init(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); sk->compat_sk_destruct = VSockVmciSkDestruct; sk->compat_sk_backlog_rcv = VSockVmciQueueRcvSkb; sk->compat_sk_state = SS_UNCONNECTED; compat_sock_reset_done(sk); INIT_LIST_HEAD(&vsk->boundTable); INIT_LIST_HEAD(&vsk->connectedTable); vsk->dgHandle = VMCI_INVALID_HANDLE; #ifdef VMX86_TOOLS vsk->qpHandle = VMCI_INVALID_HANDLE; vsk->produceQ = vsk->consumeQ = NULL; vsk->produceQGeneration = vsk->consumeQGeneration = 0; vsk->produceSize = vsk->consumeSize = 0; vsk->writeNotifyWindow = 0; vsk->writeNotifyMinWindow = PAGE_SIZE; vsk->queuePairSize = VSOCK_DEFAULT_QP_SIZE; vsk->queuePairMinSize = VSOCK_DEFAULT_QP_SIZE_MIN; vsk->queuePairMaxSize = VSOCK_DEFAULT_QP_SIZE_MAX; vsk->peerWaitingRead = vsk->peerWaitingWrite = FALSE; vsk->peerWaitingWriteDetected = FALSE; memset(&vsk->peerWaitingReadInfo, 0, sizeof vsk->peerWaitingReadInfo); memset(&vsk->peerWaitingWriteInfo, 0, sizeof vsk->peerWaitingWriteInfo); vsk->listener = NULL; INIT_LIST_HEAD(&vsk->pendingLinks); INIT_LIST_HEAD(&vsk->acceptQueue); vsk->rejected = FALSE; vsk->attachSubId = vsk->detachSubId = VMCI_INVALID_ID; vsk->peerShutdown = 0; #endif if (sock) { VSockVmciInsertBound(vsockUnboundSockets, sk); } return sk; } /* *---------------------------------------------------------------------------- * * __VSockVmciRelease -- * * Releases the provided socket. * * Results: * None. * * Side effects: * Any pending sockets are also released. * *---------------------------------------------------------------------------- */ static void __VSockVmciRelease(struct sock *sk) // IN { if (sk) { struct sk_buff *skb; struct sock *pending; struct VSockVmciSock *vsk; vsk = vsock_sk(sk); pending = NULL; /* Compiler warning. */ if (VSockVmciInBoundTable(sk)) { VSockVmciRemoveBound(sk); } if (VSockVmciInConnectedTable(sk)) { VSockVmciRemoveConnected(sk); } if (!VMCI_HANDLE_INVALID(vsk->dgHandle)) { VMCIDatagram_DestroyHnd(vsk->dgHandle); vsk->dgHandle = VMCI_INVALID_HANDLE; } lock_sock(sk); sock_orphan(sk); sk->compat_sk_shutdown = SHUTDOWN_MASK; while ((skb = skb_dequeue(&sk->compat_sk_receive_queue))) { kfree_skb(skb); } /* Clean up any sockets that never were accepted. */ #ifdef VMX86_TOOLS while ((pending = VSockVmciDequeueAccept(sk)) != NULL) { __VSockVmciRelease(pending); sock_put(pending); } #endif release_sock(sk); sock_put(sk); } } /* * Sock operations. */ /* *---------------------------------------------------------------------------- * * VSockVmciSkDestruct -- * * Destroys the provided socket. This is called by sk_free(), which is * invoked when the reference count of the socket drops to zero. * * Results: * None. * * Side effects: * Socket count is decremented. * *---------------------------------------------------------------------------- */ static void VSockVmciSkDestruct(struct sock *sk) // IN { VSockVmciSock *vsk; vsk = vsock_sk(sk); #ifdef VMX86_TOOLS if (vsk->attachSubId != VMCI_INVALID_ID) { VMCIEvent_Unsubscribe(vsk->attachSubId); vsk->attachSubId = VMCI_INVALID_ID; } if (vsk->detachSubId != VMCI_INVALID_ID) { VMCIEvent_Unsubscribe(vsk->detachSubId); vsk->detachSubId = VMCI_INVALID_ID; } if (!VMCI_HANDLE_INVALID(vsk->qpHandle)) { VMCIQueuePair_Detach(vsk->qpHandle); vsk->qpHandle = VMCI_INVALID_HANDLE; vsk->produceQ = vsk->consumeQ = NULL; vsk->produceSize = vsk->consumeSize = 0; } #endif /* * Each list entry holds a reference on the socket, so we should not even be * here if the socket is in one of our lists. If we are we have a stray * sock_put() that needs to go away. */ ASSERT(!VSockVmciInBoundTable(sk)); ASSERT(!VSockVmciInConnectedTable(sk)); #ifdef VMX86_TOOLS ASSERT(!VSockVmciIsPending(sk)); ASSERT(!VSockVmciInAcceptQueue(sk)); #endif /* * When clearing these addresses, there's no need to set the family and * possibly register the address family with the kernel. */ VSockAddr_InitNoFamily(&vsk->localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); VSockAddr_InitNoFamily(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5) ASSERT(vsock_sk(sk) == vsk); kfree(vsock_sk(sk)); #endif down(®istrationMutex); vsockVmciSocketCount--; VSockVmciTestUnregister(); up(®istrationMutex); #ifdef VSOCK_CONTROL_PACKET_COUNT { uint32 index; for (index = 0; index < ARRAYSIZE(controlPacketCount); index++) { Warning("Control packet count: Type = %u, Count = %"FMT64"u\n", index, controlPacketCount[index]); } } #endif } /* *---------------------------------------------------------------------------- * * VSockVmciQueueRcvSkb -- * * Receives skb on the socket's receive queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciQueueRcvSkb(struct sock *sk, // IN struct sk_buff *skb) // IN { int err; err = sock_queue_rcv_skb(sk, skb); if (err) { kfree_skb(skb); } return err; } /* *---------------------------------------------------------------------------- * * VSockVmciRegisterProto -- * * Registers the vmci sockets protocol family. * * Results: * Zero on success, error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static INLINE int VSockVmciRegisterProto(void) { int err; err = 0; /* * Before 2.6.9, each address family created their own slab (by calling * kmem_cache_create() directly). From 2.6.9 until 2.6.11, these address * families instead called sk_alloc_slab() and the allocated slab was * assigned to the slab variable in the proto struct and was created of size * slab_obj_size. As of 2.6.12 and later, this slab allocation was moved * into proto_register() and only done if you specified a non-zero value for * the second argument (alloc_slab); the size of the slab element was * changed to obj_size. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5) /* Simply here for clarity and so else case at end implies > rest. */ #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9) vsockCachep = kmem_cache_create("vsock", sizeof (VSockVmciSock), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!vsockCachep) { err = -ENOMEM; } #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) err = sk_alloc_slab(&vsockVmciProto, "vsock"); if (err != 0) { sk_alloc_slab_error(&vsockVmciProto); } #else /* Specify 1 as the second argument so the slab is created for us. */ err = proto_register(&vsockVmciProto, 1); #endif return err; } /* *---------------------------------------------------------------------------- * * VSockVmciUnregisterProto -- * * Unregisters the vmci sockets protocol family. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static INLINE void VSockVmciUnregisterProto(void) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5) /* Simply here for clarity and so else case at end implies > rest. */ #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 9) kmem_cache_destroy(vsockCachep); #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) sk_free_slab(&vsockVmciProto); #else proto_unregister(&vsockVmciProto); #endif #ifdef VSOCK_CONTROL_PACKET_COUNT { uint32 index; for (index = 0; index < ARRAYSIZE(controlPacketCount); index++) { controlPacketCount[index] = 0; } } #endif } /* *---------------------------------------------------------------------------- * * VSockVmciRegisterAddressFamily -- * * Registers our socket address family with the kernel. * * Note that this assumes the registration lock is held. * * Results: * The address family value on success, negative error code on failure. * * Side effects: * Callers of socket operations with the returned value, on success, will * be able to use our socket implementation. * *---------------------------------------------------------------------------- */ static int VSockVmciRegisterAddressFamily(void) { int err = 0; int i; #ifdef VMX86_TOOLS /* * We don't call into the vmci module or register our socket family if the * vmci device isn't present. */ vmciDevicePresent = VMCI_DeviceGet(); if (!vmciDevicePresent) { Log("Could not register VMCI Sockets because VMCI device is not present.\n"); return -1; } /* * Create the datagram handle that we will use to send and receive all * VSocket control messages for this context. */ err = VMCIDatagram_CreateHnd(VSOCK_PACKET_RID, 0, VSockVmciRecvStreamCB, NULL, &vmciStreamHandle); if (err != VMCI_SUCCESS || vmciStreamHandle.context == VMCI_INVALID_ID || vmciStreamHandle.resource == VMCI_INVALID_ID) { Warning("Unable to create datagram handle. (%d)\n", err); return -ENOMEM; } err = VMCIEvent_Subscribe(VMCI_EVENT_QP_RESUMED, VSockVmciQPResumedCB, NULL, &qpResumedSubId); if (err < VMCI_SUCCESS) { Warning("Unable to subscribe to QP resumed event. (%d)\n", err); err = -ENOMEM; qpResumedSubId = VMCI_INVALID_ID; goto error; } #endif /* * Linux will not allocate an address family to code that is not part of the * kernel proper, so until that time comes we need a workaround. Here we * loop through the allowed values and claim the first one that's not * currently used. Users will then make an ioctl(2) into our module to * retrieve this value before calling socket(2). * * This is undesirable, but it's better than having users' programs break * when a hard-coded, currently-available value gets assigned to someone * else in the future. */ for (i = NPROTO - 1; i >= 0; i--) { vsockVmciFamilyOps.family = i; err = sock_register(&vsockVmciFamilyOps); if (err) { Warning("Could not register address family %d.\n", i); vsockVmciFamilyOps.family = VSOCK_INVALID_FAMILY; } else { vsockVmciDgramOps.family = i; #ifdef VMX86_TOOLS vsockVmciStreamOps.family = i; #endif break; } } if (err) { goto error; } return vsockVmciFamilyOps.family; error: #ifdef VMX86_TOOLS if (qpResumedSubId != VMCI_INVALID_ID) { VMCIEvent_Unsubscribe(qpResumedSubId); qpResumedSubId = VMCI_INVALID_ID; } VMCIDatagram_DestroyHnd(vmciStreamHandle); #endif return err; } /* *---------------------------------------------------------------------------- * * VSockVmciUnregisterAddressFamily -- * * Unregisters the address family with the kernel. * * Note that this assumes the registration lock is held. * * Results: * None. * * Side effects: * Our socket implementation is no longer accessible. * *---------------------------------------------------------------------------- */ static void VSockVmciUnregisterAddressFamily(void) { #ifdef VMX86_TOOLS if (!vmciDevicePresent) { /* Nothing was registered. */ return; } if (!VMCI_HANDLE_INVALID(vmciStreamHandle)) { if (VMCIDatagram_DestroyHnd(vmciStreamHandle) != VMCI_SUCCESS) { Warning("Could not destroy VMCI datagram handle.\n"); } } if (qpResumedSubId != VMCI_INVALID_ID) { VMCIEvent_Unsubscribe(qpResumedSubId); qpResumedSubId = VMCI_INVALID_ID; } #endif if (vsockVmciFamilyOps.family != VSOCK_INVALID_FAMILY) { sock_unregister(vsockVmciFamilyOps.family); } vsockVmciDgramOps.family = vsockVmciFamilyOps.family = VSOCK_INVALID_FAMILY; #ifdef VMX86_TOOLS vsockVmciStreamOps.family = vsockVmciFamilyOps.family; #endif } /* * Socket operations. */ /* *---------------------------------------------------------------------------- * * VSockVmciRelease -- * * Releases the provided socket by freeing the contents of its queue. This * is called when a user process calls close(2) on the socket. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciRelease(struct socket *sock) // IN { __VSockVmciRelease(sock->sk); sock->sk = NULL; sock->state = SS_FREE; return 0; } /* *---------------------------------------------------------------------------- * * VSockVmciBind -- * * Binds the provided address to the provided socket. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciBind(struct socket *sock, // IN struct sockaddr *addr, // IN int addrLen) // IN { int err; struct sock *sk; struct sockaddr_vm *vmciAddr; sk = sock->sk; if (VSockAddr_Cast(addr, addrLen, &vmciAddr) != 0) { return -EINVAL; } lock_sock(sk); err = __VSockVmciBind(sk, vmciAddr); release_sock(sk); return err; } /* *---------------------------------------------------------------------------- * * VSockVmciDgramConnect -- * * Connects a datagram socket. This can be called multiple times to change * the socket's association and can be called with a sockaddr whose family * is set to AF_UNSPEC to dissolve any existing association. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciDgramConnect(struct socket *sock, // IN struct sockaddr *addr, // IN int addrLen, // IN int flags) // IN { int err; struct sock *sk; VSockVmciSock *vsk; struct sockaddr_vm *remoteAddr; sk = sock->sk; vsk = vsock_sk(sk); err = VSockAddr_Cast(addr, addrLen, &remoteAddr); if (err == -EAFNOSUPPORT && remoteAddr->svm_family == AF_UNSPEC) { lock_sock(sk); VSockAddr_Init(&vsk->remoteAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); sock->state = SS_UNCONNECTED; release_sock(sk); return 0; } else if (err != 0) { return -EINVAL; } lock_sock(sk); if (!VSockAddr_Bound(&vsk->localAddr)) { struct sockaddr_vm localAddr; VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); if ((err = __VSockVmciBind(sk, &localAddr))) { goto out; } } memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr); sock->state = SS_CONNECTED; out: release_sock(sk); return err; } #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciStreamConnect -- * * Connects a stream socket. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciStreamConnect(struct socket *sock, // IN struct sockaddr *addr, // IN int addrLen, // IN int flags) // IN { int err; struct sock *sk; VSockVmciSock *vsk; struct sockaddr_vm *remoteAddr; long timeout; COMPAT_DEFINE_WAIT(wait); err = 0; sk = sock->sk; vsk = vsock_sk(sk); lock_sock(sk); /* XXX AF_UNSPEC should make us disconnect like AF_INET. */ switch (sock->state) { case SS_CONNECTED: err = -EISCONN; goto out; case SS_DISCONNECTING: case SS_LISTEN: err = -EINVAL; goto out; case SS_CONNECTING: /* * This continues on so we can move sock into the SS_CONNECTED state once * the connection has completed (at which point err will be set to zero * also). Otherwise, we will either wait for the connection or return * -EALREADY should this be a non-blocking call. */ err = -EALREADY; break; default: ASSERT(sk->compat_sk_state == SS_FREE || sk->compat_sk_state == SS_UNCONNECTED); if (VSockAddr_Cast(addr, addrLen, &remoteAddr) != 0) { err = -EINVAL; goto out; } /* The hypervisor and well-known contexts do not have socket endpoints. */ if (!VSockAddr_SocketContext(remoteAddr->svm_cid)) { err = -ENETUNREACH; goto out; } /* Set the remote address that we are connecting to. */ memcpy(&vsk->remoteAddr, remoteAddr, sizeof vsk->remoteAddr); /* Autobind this socket to the local address if necessary. */ if (!VSockAddr_Bound(&vsk->localAddr)) { struct sockaddr_vm localAddr; VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); if ((err = __VSockVmciBind(sk, &localAddr))) { goto out; } } sk->compat_sk_state = SS_CONNECTING; err = VSOCK_SEND_CONN_REQUEST(sk, vsk->queuePairSize); if (err < 0) { sk->compat_sk_state = SS_UNCONNECTED; goto out; } /* * Mark sock as connecting and set the error code to in progress in case * this is a non-blocking connect. */ sock->state = SS_CONNECTING; err = -EINPROGRESS; } /* * The receive path will handle all communication until we are able to enter * the connected state. Here we wait for the connection to be completed or * a notification of an error. */ timeout = sock_sndtimeo(sk, flags & O_NONBLOCK); compat_init_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE); while (sk->compat_sk_state != SS_CONNECTED && sk->compat_sk_err == 0) { if (timeout == 0) { /* * If we're not going to block, skip ahead to preserve error code set * above. */ goto outWait; } release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); goto outWaitError; } else if (timeout == 0) { err = -ETIMEDOUT; goto outWaitError; } compat_cont_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE); } if (sk->compat_sk_err) { err = -sk->compat_sk_err; goto outWaitError; } else { ASSERT(sk->compat_sk_state == SS_CONNECTED); err = 0; } outWait: compat_finish_wait(sk->compat_sk_sleep, &wait, TASK_RUNNING); out: release_sock(sk); return err; outWaitError: sk->compat_sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; goto outWait; } /* *---------------------------------------------------------------------------- * * VSockVmciAccept -- * * Accepts next available connection request for this socket. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciAccept(struct socket *sock, // IN struct socket *newsock, // IN/OUT int flags) // IN { struct sock *listener; int err; struct sock *connected; VSockVmciSock *vconnected; long timeout; COMPAT_DEFINE_WAIT(wait); err = 0; listener = sock->sk; lock_sock(listener); if (sock->type != SOCK_STREAM) { err = -EOPNOTSUPP; goto out; } if (listener->compat_sk_state != SS_LISTEN) { err = -EINVAL; goto out; } /* * Wait for children sockets to appear; these are the new sockets created * upon connection establishment. */ timeout = sock_sndtimeo(listener, flags & O_NONBLOCK); compat_init_prepare_to_wait(listener->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE); while ((connected = VSockVmciDequeueAccept(listener)) == NULL && listener->compat_sk_err == 0) { release_sock(listener); timeout = schedule_timeout(timeout); lock_sock(listener); if (signal_pending(current)) { err = sock_intr_errno(timeout); goto outWait; } else if (timeout == 0) { err = -ETIMEDOUT; goto outWait; } compat_cont_prepare_to_wait(listener->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE); } if (listener->compat_sk_err) { err = -listener->compat_sk_err; } if (connected) { listener->compat_sk_ack_backlog--; lock_sock(connected); vconnected = vsock_sk(connected); /* * If the listener socket has received an error, then we should reject * this socket and return. Note that we simply mark the socket rejected, * drop our reference, and let the cleanup function handle the cleanup; * the fact that we found it in the listener's accept queue guarantees * that the cleanup function hasn't run yet. */ if (err) { vconnected->rejected = TRUE; release_sock(connected); sock_put(connected); goto outWait; } newsock->state = SS_CONNECTED; sock_graft(connected, newsock); release_sock(connected); sock_put(connected); } outWait: compat_finish_wait(listener->compat_sk_sleep, &wait, TASK_RUNNING); out: release_sock(listener); return err; } #endif /* *---------------------------------------------------------------------------- * * VSockVmciGetname -- * * Provides the local or remote address for the socket. * * Results: * Zero on success, negative error code otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciGetname(struct socket *sock, // IN struct sockaddr *addr, // OUT int *addrLen, // OUT int peer) // IN { int err; struct sock *sk; VSockVmciSock *vsk; struct sockaddr_vm *vmciAddr; sk = sock->sk; vsk = vsock_sk(sk); err = 0; lock_sock(sk); if (peer) { if (sock->state != SS_CONNECTED) { err = -ENOTCONN; goto out; } vmciAddr = &vsk->remoteAddr; } else { vmciAddr = &vsk->localAddr; } if (!vmciAddr) { err = -EINVAL; goto out; } /* * sys_getsockname() and sys_getpeername() pass us a MAX_SOCK_ADDR-sized * buffer and don't set addrLen. Unfortunately that macro is defined in * socket.c instead of .h, so we hardcode its value here. */ ASSERT_ON_COMPILE(sizeof *vmciAddr <= 128); memcpy(addr, vmciAddr, sizeof *vmciAddr); *addrLen = sizeof *vmciAddr; out: release_sock(sk); return err; } /* *---------------------------------------------------------------------------- * * VSockVmciPoll -- * * Waits on file for activity then provides mask indicating state of socket. * * Results: * Mask of flags containing socket state. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static unsigned int VSockVmciPoll(struct file *file, // IN struct socket *sock, // IN poll_table *wait) // IN { struct sock *sk; unsigned int mask; sk = sock->sk; poll_wait(file, sk->compat_sk_sleep, wait); mask = 0; if (sk->compat_sk_err) { /* Signify that there has been an error on this socket. */ mask |= POLLERR; } if (sk->compat_sk_shutdown == SHUTDOWN_MASK) { mask |= POLLHUP; } /* POLLRDHUP wasn't added until 2.6.17. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 17) if (sk->compat_sk_shutdown & RCV_SHUTDOWN) { mask |= POLLRDHUP; } #endif if (sock->type == SOCK_DGRAM) { /* * For datagram sockets we can read if there is something in the queue * and write as long as the socket isn't shutdown for sending. */ if (!skb_queue_empty(&sk->compat_sk_receive_queue) || (sk->compat_sk_shutdown & RCV_SHUTDOWN)) { mask |= POLLIN | POLLRDNORM; } if (!(sk->compat_sk_shutdown & SEND_SHUTDOWN)) { mask |= POLLOUT | POLLWRNORM | POLLWRBAND; } #ifdef VMX86_TOOLS } else if (sock->type == SOCK_STREAM) { VSockVmciSock *vsk; lock_sock(sk); vsk = vsock_sk(sk); /* * Listening sockets that have connections in their accept queue and * connected sockets that have consumable data can be read. Sockets * whose connections have been close, reset, or terminated should also be * considered read, and we check the shutdown flag for that. */ if ((sk->compat_sk_state == SS_LISTEN && !VSockVmciIsAcceptQueueEmpty(sk)) || (!VMCI_HANDLE_INVALID(vsk->qpHandle) && !(sk->compat_sk_shutdown & RCV_SHUTDOWN) && VMCIQueue_BufReady(vsk->consumeQ, vsk->produceQ, vsk->consumeSize)) || sk->compat_sk_shutdown) { mask |= POLLIN | POLLRDNORM; } /* * Connected sockets that can produce data can be written. */ if (sk->compat_sk_state == SS_CONNECTED && !(sk->compat_sk_shutdown & SEND_SHUTDOWN) && VMCIQueue_FreeSpace(vsk->produceQ, vsk->consumeQ, vsk->produceSize) > 0) { mask |= POLLOUT | POLLWRNORM | POLLWRBAND; } /* * Connected sockets also need to notify their peer that they are * waiting. Optimally these calls would happen in the code that decides * whether the caller will wait or not, but that's core kernel code and * this is the best we can do. If the caller doesn't sleep, the worst * that happens is a few extra datagrams are sent. */ if (sk->compat_sk_state == SS_CONNECTED) { if (VMCIQueue_FreeSpace(vsk->produceQ, vsk->consumeQ, vsk->produceSize) == 0) { /* * Only send waiting write if the queue is full, otherwise we end * up in an infinite WAITING_WRITE, READ, WAITING_WRITE, READ, etc. * loop. Treat failing to send the notification as a socket error, * passing that back through the mask. */ if (!VSockVmciSendWaitingWrite(sk, 1)) { mask |= POLLERR; } } if (!VSockVmciSendWaitingRead(sk, 1)) { mask |= POLLERR; } } release_sock(sk); #endif } return mask; } #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciListen -- * * Signify that this socket is listening for connection requests. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciListen(struct socket *sock, // IN int backlog) // IN { int err; struct sock *sk; VSockVmciSock *vsk; sk = sock->sk; lock_sock(sk); if (sock->type != SOCK_STREAM) { err = -EOPNOTSUPP; goto out; } if (sock->state != SS_UNCONNECTED) { err = -EINVAL; goto out; } vsk = vsock_sk(sk); if (!VSockAddr_Bound(&vsk->localAddr)) { err = -EINVAL; goto out; } sk->compat_sk_max_ack_backlog = backlog; sk->compat_sk_state = SS_LISTEN; err = 0; out: release_sock(sk); return err; } #endif /* *---------------------------------------------------------------------------- * * VSockVmciShutdown -- * * Shuts down the provided socket in the provided method. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciShutdown(struct socket *sock, // IN int mode) // IN { struct sock *sk; /* * User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode here * like the other address families do. Note also that the increment makes * SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3), which is what we * want. */ mode++; if ((mode & ~SHUTDOWN_MASK) || !mode) { return -EINVAL; } if (sock->state == SS_UNCONNECTED) { return -ENOTCONN; } sk = sock->sk; sock->state = SS_DISCONNECTING; /* Receive and send shutdowns are treated alike. */ mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); if (mode) { lock_sock(sk); sk->compat_sk_shutdown |= mode; sk->compat_sk_state_change(sk); release_sock(sk); } #ifdef VMX86_TOOLS if (sk->compat_sk_type == SOCK_STREAM && mode) { compat_sock_reset_done(sk); VSOCK_SEND_SHUTDOWN(sk, mode); } #endif return 0; } /* *---------------------------------------------------------------------------- * * VSockVmciDgramSendmsg -- * * Sends a datagram. * * Results: * Number of bytes sent on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43) static int VSockVmciDgramSendmsg(struct socket *sock, // IN: socket to send on struct msghdr *msg, // IN: message to send int len, // IN: length of message struct scm_cookie *scm) // UNUSED #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65) static int VSockVmciDgramSendmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to send on struct msghdr *msg, // IN: message to send int len, // IN: length of message struct scm_cookie *scm); // UNUSED #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2) static int VSockVmciDgramSendmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to send on struct msghdr *msg, // IN: message to send int len) // IN: length of message #else static int VSockVmciDgramSendmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to send on struct msghdr *msg, // IN: message to send size_t len) // IN: length of message #endif { int err; struct sock *sk; VSockVmciSock *vsk; struct sockaddr_vm *remoteAddr; VMCIDatagram *dg; if (msg->msg_flags & MSG_OOB) { return -EOPNOTSUPP; } if (len > VMCI_MAX_DG_PAYLOAD_SIZE) { return -EMSGSIZE; } /* For now, MSG_DONTWAIT is always assumed... */ err = 0; sk = sock->sk; vsk = vsock_sk(sk); lock_sock(sk); if (!VSockAddr_Bound(&vsk->localAddr)) { struct sockaddr_vm localAddr; VSockAddr_Init(&localAddr, VMADDR_CID_ANY, VMADDR_PORT_ANY); if ((err = __VSockVmciBind(sk, &localAddr))) { goto out; } } /* * If the provided message contains an address, use that. Otherwise fall * back on the socket's remote handle (if it has been connected). */ if (msg->msg_name && VSockAddr_Cast(msg->msg_name, msg->msg_namelen, &remoteAddr) == 0) { /* Ensure this address is of the right type and is a valid destination. */ // XXXAB Temporary to handle test program if (remoteAddr->svm_cid == VMADDR_CID_ANY) { remoteAddr->svm_cid = VMCI_GetContextID(); } if (!VSockAddr_Bound(remoteAddr)) { err = -EINVAL; goto out; } } else if (sock->state == SS_CONNECTED) { remoteAddr = &vsk->remoteAddr; // XXXAB Temporary to handle test program if (remoteAddr->svm_cid == VMADDR_CID_ANY) { remoteAddr->svm_cid = VMCI_GetContextID(); } /* XXX Should connect() or this function ensure remoteAddr is bound? */ if (!VSockAddr_Bound(&vsk->remoteAddr)) { err = -EINVAL; goto out; } } else { err = -EINVAL; goto out; } /* * Allocate a buffer for the user's message and our packet header. */ dg = kmalloc(len + sizeof *dg, GFP_KERNEL); if (!dg) { err = -ENOMEM; goto out; } memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), msg->msg_iov, len); dg->dst = VMCI_MAKE_HANDLE(remoteAddr->svm_cid, remoteAddr->svm_port); dg->src = VMCI_MAKE_HANDLE(vsk->localAddr.svm_cid, vsk->localAddr.svm_port); dg->payloadSize = len; err = VMCIDatagram_Send(dg); kfree(dg); if (err < 0) { err = VSockVmci_ErrorToVSockError(err); goto out; } /* * err is the number of bytes sent on success. We need to subtract the * VSock-specific header portions of what we've sent. */ err -= sizeof *dg; out: release_sock(sk); return err; } #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciStreamSetsockopt -- * * Set a socket option on a stream socket * * Results: * 0 on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ int VSockVmciStreamSetsockopt(struct socket *sock, // IN/OUT int level, // IN int optname, // IN char __user *optval, // IN int optlen) // IN { int err; struct sock *sk; VSockVmciSock *vsk; uint64 val; if (level != VSockVmci_GetAFValue()) { return -ENOPROTOOPT; } if (optlen < sizeof val) { return -EINVAL; } if (copy_from_user(&val, optval, sizeof val) != 0) { return -EFAULT; } err = 0; sk = sock->sk; vsk = vsock_sk(sk); ASSERT(vsk->queuePairMinSize <= vsk->queuePairSize && vsk->queuePairSize <= vsk->queuePairMaxSize); lock_sock(sk); switch (optname) { case SO_VMCI_BUFFER_SIZE: if (val < vsk->queuePairMinSize || val > vsk->queuePairMaxSize) { err = -EINVAL; goto out; } vsk->queuePairSize = val; break; case SO_VMCI_BUFFER_MAX_SIZE: if (val < vsk->queuePairSize) { err = -EINVAL; goto out; } vsk->queuePairMaxSize = val; break; case SO_VMCI_BUFFER_MIN_SIZE: if (val > vsk->queuePairSize) { err = -EINVAL; goto out; } vsk->queuePairMinSize = val; break; default: err = -ENOPROTOOPT; break; } out: ASSERT(vsk->queuePairMinSize <= vsk->queuePairSize && vsk->queuePairSize <= vsk->queuePairMaxSize); release_sock(sk); return err; } #endif #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciStreamGetsockopt -- * * Get a socket option for a stream socket * * Results: * 0 on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ int VSockVmciStreamGetsockopt(struct socket *sock, // IN int level, // IN int optname, // IN char __user *optval, // OUT int __user * optlen) // IN/OUT { int err; int len; struct sock *sk; VSockVmciSock *vsk; uint64 val; if (level != VSockVmci_GetAFValue()) { return -ENOPROTOOPT; } if ((err = get_user(len, optlen)) != 0) { return err; } if (len < sizeof val) { return -EINVAL; } len = sizeof val; err = 0; sk = sock->sk; vsk = vsock_sk(sk); switch (optname) { case SO_VMCI_BUFFER_SIZE: val = vsk->queuePairSize; break; case SO_VMCI_BUFFER_MAX_SIZE: val = vsk->queuePairMaxSize; break; case SO_VMCI_BUFFER_MIN_SIZE: val = vsk->queuePairMinSize; break; default: return -ENOPROTOOPT; } if ((err = copy_to_user(optval, &val, len)) != 0) { return -EFAULT; } if ((err = put_user(len, optlen)) != 0) { return -EFAULT; } return 0; } #endif #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciStreamSendmsg -- * * Sends a message on the socket. * * Results: * Number of bytes sent on success, negative error code on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43) static int VSockVmciStreamSendmsg(struct socket *sock, // IN: socket to send on struct msghdr *msg, // IN: message to send int len, // IN: length of message struct scm_cookie *scm) // UNUSED #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65) static int VSockVmciStreamSendmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to send on struct msghdr *msg, // IN: message to send int len, // IN: length of message struct scm_cookie *scm); // UNUSED #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2) static int VSockVmciStreamSendmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to send on struct msghdr *msg, // IN: message to send int len) // IN: length of message #else static int VSockVmciStreamSendmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to send on struct msghdr *msg, // IN: message to send size_t len) // IN: length of message #endif { struct sock *sk; VSockVmciSock *vsk; ssize_t totalWritten; long timeout; int err; #if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) uint64 produceTail; uint64 consumeHead; #endif COMPAT_DEFINE_WAIT(wait); sk = sock->sk; vsk = vsock_sk(sk); totalWritten = 0; err = 0; if (msg->msg_flags & MSG_OOB) { return -EOPNOTSUPP; } lock_sock(sk); /* Callers should not provide a destination with stream sockets. */ if (msg->msg_namelen) { err = sk->compat_sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; goto out; } if (sk->compat_sk_shutdown & SEND_SHUTDOWN) { err = -EPIPE; goto out; } if (sk->compat_sk_state != SS_CONNECTED || !VSockAddr_Bound(&vsk->localAddr)) { err = -ENOTCONN; goto out; } if (!VSockAddr_Bound(&vsk->remoteAddr)) { err = -EDESTADDRREQ; goto out; } /* * Wait for room in the produce queue to enqueue our user's data. */ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); compat_init_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE); while (totalWritten < len) { Bool sentWrote; unsigned int retries; ssize_t written; sentWrote = FALSE; retries = 0; while (VMCIQueue_FreeSpace(vsk->produceQ, vsk->consumeQ, vsk->produceSize) == 0 && sk->compat_sk_err == 0 && !(sk->compat_sk_shutdown & SEND_SHUTDOWN) && !(vsk->peerShutdown & RCV_SHUTDOWN)) { /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; goto outWait; } /* Notify our peer that we are waiting for room to write. */ if (!VSockVmciSendWaitingWrite(sk, 1)) { err = -EHOSTUNREACH; goto outWait; } release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); goto outWait; } else if (timeout == 0) { err = -EAGAIN; goto outWait; } compat_cont_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE); } /* * These checks occur both as part of and after the loop conditional * since we need to check before and after sleeping. */ if (sk->compat_sk_err) { err = -sk->compat_sk_err; goto outWait; } else if ((sk->compat_sk_shutdown & SEND_SHUTDOWN) || (vsk->peerShutdown & RCV_SHUTDOWN)) { err = -EPIPE; goto outWait; } /* * Note that enqueue will only write as many bytes as are free in the * produce queue, so we don't need to ensure len is smaller than the queue * size. It is the caller's responsibility to check how many bytes we were * able to send. */ #if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) VMCIQueue_GetPointers(vsk->produceQ, vsk->consumeQ, &produceTail, &consumeHead); #endif written = VMCIQueue_EnqueueV(vsk->produceQ, vsk->consumeQ, vsk->produceSize, msg->msg_iov, len - totalWritten); if (written < 0) { err = -ENOMEM; goto outWait; } #if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) /* * Detect a wrap-around to maintain queue generation. Note that this is * safe since we hold the socket lock across the two queue pair * operations. */ if (written >= vsk->produceSize - produceTail) { vsk->produceQGeneration++; } #endif totalWritten += written; if (VSockVmciNotifyWaitingRead(vsk)) { /* * Notify the peer that we have written, retrying the send on failure up to * our maximum value. See the XXX comment for the corresponding piece of * code in StreamRecvmsg() for potential improvements. */ while (!(vsk->peerShutdown & RCV_SHUTDOWN) && !sentWrote && retries < VSOCK_MAX_DGRAM_RESENDS) { err = VSOCK_SEND_WROTE(sk); if (err >= 0) { sentWrote = TRUE; } retries++; } if (retries >= VSOCK_MAX_DGRAM_RESENDS) { Warning("unable to send wrote notification to peer for socket %p.\n", sk); goto outWait; } else { #if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) vsk->peerWaitingRead = FALSE; #endif } } } ASSERT(totalWritten <= INT_MAX); outWait: if (totalWritten > 0) { err = totalWritten; } compat_finish_wait(sk->compat_sk_sleep, &wait, TASK_RUNNING); out: release_sock(sk); return err; } #endif /* *---------------------------------------------------------------------------- * * VSockVmciDgramRecvmsg -- * * Receives a datagram and places it in the caller's msg. * * Results: * The size of the payload on success, negative value on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43) static int VSockVmciDgramRecvmsg(struct socket *sock, // IN: socket to receive from struct msghdr *msg, // IN/OUT: message to receive into int len, // IN: length of receive buffer int flags, // IN: receive flags struct scm_cookie *scm) // UNUSED #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65) static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to receive from struct msghdr *msg, // IN/OUT: message to receive into int len, // IN: length of receive buffer int flags, // IN: receive flags struct scm_cookie *scm) // UNUSED #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2) static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to receive from struct msghdr *msg, // IN/OUT: message to receive into int len, // IN: length of receive buffer int flags) // IN: receive flags #else static int VSockVmciDgramRecvmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to receive from struct msghdr *msg, // IN/OUT: message to receive into size_t len, // IN: length of receive buffer int flags) // IN: receive flags #endif { int err; int noblock; struct sock *sk; VMCIDatagram *dg; size_t payloadLen; struct sk_buff *skb; struct sockaddr_vm *vmciAddr; err = 0; sk = sock->sk; payloadLen = 0; noblock = flags & MSG_DONTWAIT; vmciAddr = (struct sockaddr_vm *)msg->msg_name; if (flags & MSG_OOB || flags & MSG_ERRQUEUE) { return -EOPNOTSUPP; } /* Retrieve the head sk_buff from the socket's receive queue. */ skb = skb_recv_datagram(sk, flags, noblock, &err); if (err) { return err; } if (!skb) { return -EAGAIN; } dg = (VMCIDatagram *)skb->data; if (!dg) { /* err is 0, meaning we read zero bytes. */ goto out; } payloadLen = dg->payloadSize; /* Ensure the sk_buff matches the payload size claimed in the packet. */ if (payloadLen != skb->len - sizeof *dg) { err = -EINVAL; goto out; } if (payloadLen > len) { payloadLen = len; msg->msg_flags |= MSG_TRUNC; } /* Place the datagram payload in the user's iovec. */ err = skb_copy_datagram_iovec(skb, sizeof *dg, msg->msg_iov, payloadLen); if (err) { goto out; } msg->msg_namelen = 0; if (vmciAddr) { /* Provide the address of the sender. */ VSockAddr_Init(vmciAddr, VMCI_HANDLE_TO_CONTEXT_ID(dg->src), VMCI_HANDLE_TO_RESOURCE_ID(dg->src)); msg->msg_namelen = sizeof *vmciAddr; } err = payloadLen; out: skb_free_datagram(sk, skb); return err; } #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciStreamRecvmsg -- * * Receives a datagram and places it in the caller's msg. * * Results: * The size of the payload on success, negative value on failure. * * Side effects: * None. * *---------------------------------------------------------------------------- */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 43) static int VSockVmciStreamRecvmsg(struct socket *sock, // IN: socket to receive from struct msghdr *msg, // IN/OUT: message to receive into int len, // IN: length of receive buffer int flags, // IN: receive flags struct scm_cookie *scm) // UNUSED #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 65) static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to receive from struct msghdr *msg, // IN/OUT: message to receive into int len, // IN: length of receive buffer int flags, // IN: receive flags struct scm_cookie *scm) // UNUSED #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 2) static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to receive from struct msghdr *msg, // IN/OUT: message to receive into int len, // IN: length of receive buffer int flags) // IN: receive flags #else static int VSockVmciStreamRecvmsg(struct kiocb *kiocb, // UNUSED struct socket *sock, // IN: socket to receive from struct msghdr *msg, // IN/OUT: message to receive into size_t len, // IN: length of receive buffer int flags) // IN: receive flags #endif { struct sock *sk; VSockVmciSock *vsk; int err; int target; int64 ready; long timeout; ssize_t copied; Bool sentRead; unsigned int retries; #if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) uint64 consumeHead; uint64 produceTail; #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL Bool notifyOnBlock; #endif #endif COMPAT_DEFINE_WAIT(wait); sk = sock->sk; vsk = vsock_sk(sk); err = 0; retries = 0; sentRead = FALSE; #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL notifyOnBlock = FALSE; #endif lock_sock(sk); if (sk->compat_sk_state != SS_CONNECTED) { /* * Recvmsg is supposed to return 0 if a peer performs an orderly shutdown. * Differentiate between that case and when a peer has not connected or a * local shutdown occured with the SOCK_DONE flag. */ if (compat_sock_test_done(sk)) { err = 0; } else { err = -ENOTCONN; } goto out; } if (flags & MSG_OOB) { err = -EOPNOTSUPP; goto out; } if (sk->compat_sk_shutdown & RCV_SHUTDOWN) { err = 0; goto out; } /* * We must not copy less than target bytes into the user's buffer before * returning successfully, so we wait for the consume queue to have that * much data to consume before dequeueing. Note that this makes it * impossible to handle cases where target is greater than the queue size. */ target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); if (target >= vsk->consumeSize) { err = -ENOMEM; goto out; } timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); copied = 0; #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL if (vsk->writeNotifyMinWindow < target + 1) { ASSERT(target < vsk->consumeSize); vsk->writeNotifyMinWindow = target + 1; if (vsk->writeNotifyWindow < vsk->writeNotifyMinWindow) { /* * If the current window is smaller than the new minimal * window size, we need to reevaluate whether we need to * notify the sender. If the number of ready bytes are * smaller than the new window, we need to send a * notification to the sender before we block. */ vsk->writeNotifyWindow = vsk->writeNotifyMinWindow; notifyOnBlock = TRUE; } } #endif compat_init_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE); while ((ready = VMCIQueue_BufReady(vsk->consumeQ, vsk->produceQ, vsk->consumeSize)) < target && sk->compat_sk_err == 0 && !(sk->compat_sk_shutdown & RCV_SHUTDOWN) && !(vsk->peerShutdown & SEND_SHUTDOWN)) { if (ready < 0) { /* * Invalid queue pair content. XXX This should be changed to * a connection reset in a later change. */ err = -ENOMEM; goto out; } /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; goto outWait; } /* Notify our peer that we are waiting for data to read. */ if (!VSockVmciSendWaitingRead(sk, target)) { err = -EHOSTUNREACH; goto outWait; } #ifdef VSOCK_OPTIMIZATION_FLOW_CONTROL if (notifyOnBlock) { err = VSockVmciSendReadNotification(sk); if (err < 0) { goto outWait; } notifyOnBlock = FALSE; } #endif release_sock(sk); timeout = schedule_timeout(timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); goto outWait; } else if (timeout == 0) { err = -EAGAIN; goto outWait; } compat_cont_prepare_to_wait(sk->compat_sk_sleep, &wait, TASK_INTERRUPTIBLE); } if (sk->compat_sk_err) { err = -sk->compat_sk_err; goto outWait; } else if (sk->compat_sk_shutdown & RCV_SHUTDOWN) { err = 0; goto outWait; } else if ((vsk->peerShutdown & SEND_SHUTDOWN) && VMCIQueue_BufReady(vsk->consumeQ, vsk->produceQ, vsk->consumeSize) < target) { err = 0; goto outWait; } /* * Now consume up to len bytes from the queue. Note that since we have the * socket locked we should copy at least ready bytes. */ #if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) VMCIQueue_GetPointers(vsk->consumeQ, vsk->produceQ, &produceTail, &consumeHead); #endif copied = VMCIQueue_DequeueV(vsk->produceQ, vsk->consumeQ, vsk->consumeSize, msg->msg_iov, len); if (copied < 0) { err = -ENOMEM; goto outWait; } #if defined(VMX86_TOOLS) && defined(VSOCK_OPTIMIZATION_WAITING_NOTIFY) /* * Detect a wrap-around to maintain queue generation. Note that this is * safe since we hold the socket lock across the two queue pair * operations. */ if (copied >= vsk->consumeSize - consumeHead) { vsk->consumeQGeneration++; } #endif ASSERT(copied >= target); /* * If the other side has shutdown for sending and there is nothing more to * read, then set our socket's RCV_SHUTDOWN flag and modify the socket * state. */ if (vsk->peerShutdown & SEND_SHUTDOWN) { if (VMCIQueue_BufReady(vsk->consumeQ, vsk->produceQ, vsk->consumeSize) <= 0) { sk->compat_sk_shutdown |= RCV_SHUTDOWN; sk->compat_sk_state = SS_UNCONNECTED; compat_sock_set_done(sk); sk->compat_sk_state_change(sk); } } err = VSockVmciSendReadNotification(sk); if (err < 0) { goto outWait; } ASSERT(copied <= INT_MAX); err = copied; outWait: compat_finish_wait(sk->compat_sk_sleep, &wait, TASK_RUNNING); out: release_sock(sk); return err; } #endif /* * Protocol operation. */ /* *---------------------------------------------------------------------------- * * VSockVmciCreate -- * * Creates a VSocket socket. * * Results: * Zero on success, negative error code on failure. * * Side effects: * Socket count is incremented. * *---------------------------------------------------------------------------- */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) static int VSockVmciCreate(struct socket *sock, // IN int protocol) // IN #else static int VSockVmciCreate(struct net *net, // IN struct socket *sock, // IN int protocol) // IN #endif { if (!sock) { return -EINVAL; } if (protocol) { return -EPROTONOSUPPORT; } switch (sock->type) { case SOCK_DGRAM: sock->ops = &vsockVmciDgramOps; break; # ifdef VMX86_TOOLS /* * Queue pairs are /currently/ only supported within guests, so stream * sockets are only supported within guests. */ case SOCK_STREAM: sock->ops = &vsockVmciStreamOps; break; # endif default: return -ESOCKTNOSUPPORT; } sock->state = SS_UNCONNECTED; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) return __VSockVmciCreate(sock, GFP_KERNEL, 0) ? 0 : -ENOMEM; #else return __VSockVmciCreate(net, sock, GFP_KERNEL, 0) ? 0 : -ENOMEM; #endif } /* *---------------------------------------------------------------------------- * * VSockVmciIoctl32Handler -- * * Handler for 32-bit ioctl(2) on 64-bit. * * Results: * Same as VsockVmciDevIoctl(). * * Side effects: * None. * *---------------------------------------------------------------------------- */ #ifdef VM_X86_64 #ifndef HAVE_COMPAT_IOCTL static int VSockVmciIoctl32Handler(unsigned int fd, // IN unsigned int iocmd, // IN unsigned long ioarg, // IN/OUT struct file * filp) // IN { int ret; #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 26) || \ (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 3)) lock_kernel(); #endif ret = -ENOTTY; if (filp && filp->f_op && filp->f_op->ioctl == VSockVmciDevIoctl) { ret = VSockVmciDevIoctl(filp->f_dentry->d_inode, filp, iocmd, ioarg); } #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 26) || \ (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 3)) unlock_kernel(); #endif return ret; } #endif /* !HAVE_COMPAT_IOCTL */ /* *---------------------------------------------------------------------------- * * register_ioctl32_handlers -- * * Registers the ioctl conversion handler. * * Results: * Zero on success, error code otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int register_ioctl32_handlers(void) { #ifndef HAVE_COMPAT_IOCTL { int i; for (i = IOCTL_VMCI_SOCKETS_FIRST; i < IOCTL_VMCI_SOCKETS_LAST; i++) { int retval = register_ioctl32_conversion(i, VSockVmciIoctl32Handler); if (retval) { Warning("Fail to register ioctl32 conversion for cmd %d\n", i); return retval; } } } #endif /* !HAVE_COMPAT_IOCTL */ return 0; } /* *---------------------------------------------------------------------------- * * unregister_ioctl32_handlers -- * * Unregisters the ioctl converstion handler. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static void unregister_ioctl32_handlers(void) { #ifndef HAVE_COMPAT_IOCTL { int i; for (i = IOCTL_VMCI_SOCKETS_FIRST; i < IOCTL_VMCI_SOCKETS_LAST; i++) { int retval = unregister_ioctl32_conversion(i); if (retval) { Warning("Fail to unregister ioctl32 conversion for cmd %d\n", i); } } } #endif /* !HAVE_COMPAT_IOCTL */ } #else /* VM_X86_64 */ #define register_ioctl32_handlers() (0) #define unregister_ioctl32_handlers() do { } while (0) #endif /* VM_X86_64 */ /* * Device operations. */ /* *---------------------------------------------------------------------------- * * VSockVmciDevOpen -- * * Invoked when the device is opened. Simply maintains a count of open * instances. * * Results: * Zero on success, negative value otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ int VSockVmciDevOpen(struct inode *inode, // IN struct file *file) // IN { down(®istrationMutex); devOpenCount++; up(®istrationMutex); return 0; } /* *---------------------------------------------------------------------------- * * VSockVmciDevRelease -- * * Invoked when the device is closed. Updates the open instance count and * unregisters the socket family if this is the last user. * * Results: * Zero on success, negative value otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ int VSockVmciDevRelease(struct inode *inode, // IN struct file *file) // IN { down(®istrationMutex); devOpenCount--; VSockVmciTestUnregister(); up(®istrationMutex); return 0; } /* *---------------------------------------------------------------------------- * * VSockVmciDevIoctl -- * * ioctl(2) handler. * * Results: * Zero on success, negative error code otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static int VSockVmciDevIoctl(struct inode *inode, // IN struct file *filp, // IN u_int iocmd, // IN unsigned long ioarg) // IN/OUT { int retval; retval = 0; switch (iocmd) { case IOCTL_VMCI_SOCKETS_GET_AF_VALUE: { int family; family = VSockVmci_GetAFValue(); if (family < 0) { Warning("AF_VSOCK is not registered\n"); } if (copy_to_user((void *)ioarg, &family, sizeof family) != 0) { retval = -EFAULT; } break; } case IOCTL_VMCI_SOCKETS_GET_LOCAL_CID: { VMCIId cid = VMCI_GetContextID(); if (copy_to_user((void *)ioarg, &cid, sizeof cid) != 0) { retval = -EFAULT; } break; } default: Warning("Unknown ioctl %d\n", iocmd); retval = -EINVAL; } return retval; } #if defined(HAVE_COMPAT_IOCTL) || defined(HAVE_UNLOCKED_IOCTL) /* *----------------------------------------------------------------------------- * * VSockVmciDevUnlockedIoctl -- * * Wrapper for VSockVmciDevIoctl() supporting the compat_ioctl and * unlocked_ioctl methods that have signatures different from the * old ioctl. Used as compat_ioctl method for 32bit apps running * on 64bit kernel and for unlocked_ioctl on systems supporting * those. VSockVmciDevIoctl() may safely be called without holding * the BKL. * * Results: * Same as VSockVmciDevIoctl(). * * Side effects: * None. * *----------------------------------------------------------------------------- */ static long VSockVmciDevUnlockedIoctl(struct file *filp, // IN u_int iocmd, // IN unsigned long ioarg) // IN/OUT { return VSockVmciDevIoctl(NULL, filp, iocmd, ioarg); } #endif /* * Module operations. */ /* *---------------------------------------------------------------------------- * * VSockVmciInit -- * * Initialization routine for the VSockets module. * * Results: * Zero on success, error code on failure. * * Side effects: * The VSocket protocol family and socket operations are registered. * *---------------------------------------------------------------------------- */ static int __init VSockVmciInit(void) { int err; DriverLog_Init("VSock"); request_module("vmci"); err = misc_register(&vsockVmciDevice); if (err) { return -ENOENT; } err = register_ioctl32_handlers(); if (err) { misc_deregister(&vsockVmciDevice); return err; } err = VSockVmciRegisterProto(); if (err) { Warning("Cannot register vsock protocol.\n"); unregister_ioctl32_handlers(); misc_deregister(&vsockVmciDevice); return err; } VSockVmciInitTables(); return 0; } /* *---------------------------------------------------------------------------- * * VSocketVmciExit -- * * VSockets module exit routine. * * Results: * None. * * Side effects: * Unregisters VSocket protocol family and socket operations. * *---------------------------------------------------------------------------- */ static void __exit VSockVmciExit(void) { unregister_ioctl32_handlers(); misc_deregister(&vsockVmciDevice); down(®istrationMutex); VSockVmciUnregisterAddressFamily(); up(®istrationMutex); VSockVmciUnregisterProto(); } module_init(VSockVmciInit); module_exit(VSockVmciExit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); MODULE_VERSION(VSOCK_DRIVER_VERSION_STRING); MODULE_LICENSE("GPL v2"); /* * Starting with SLE10sp2, Novell requires that IHVs sign a support agreement * with them and mark their kernel modules as externally supported via a * change to the module header. If this isn't done, the module will not load * by default (i.e., neither mkinitrd nor modprobe will accept it). */ MODULE_INFO(supported, "external"); vsock-only/linux/af_vsock.h0000444000000000000000000000602112025726714014766 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * af_vsock.h -- * * Definitions for Linux VSockets module. */ #ifndef __AF_VSOCK_H__ #define __AF_VSOCK_H__ #include "vsockCommon.h" #include "vsockPacket.h" #include "compat_workqueue.h" #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5) # define vsock_sk(__sk) ((VSockVmciSock *)(__sk)->user_data) # define sk_vsock(__vsk) ((__vsk)->sk) #else # define vsock_sk(__sk) ((VSockVmciSock *)__sk) # define sk_vsock(__vsk) (&(__vsk)->sk) #endif typedef struct VSockVmciSock { #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 5) struct sock *sk; #else /* sk must be the first member. */ struct sock sk; #endif struct sockaddr_vm localAddr; struct sockaddr_vm remoteAddr; /* Links for the global tables of bound and connected sockets. */ struct list_head boundTable; struct list_head connectedTable; VMCIHandle dgHandle; /* For SOCK_DGRAM only. */ #ifdef VMX86_TOOLS /* Rest are SOCK_STREAM only. */ VMCIHandle qpHandle; VMCIQueue *produceQ; VMCIQueue *consumeQ; uint64 produceQGeneration; uint64 consumeQGeneration; uint64 produceSize; uint64 consumeSize; uint64 queuePairSize; uint64 queuePairMinSize; uint64 queuePairMaxSize; uint64 writeNotifyWindow; uint64 writeNotifyMinWindow; Bool peerWaitingRead; Bool peerWaitingWrite; Bool peerWaitingWriteDetected; VSockWaitingInfo peerWaitingReadInfo; VSockWaitingInfo peerWaitingWriteInfo; VMCIId attachSubId; VMCIId detachSubId; /* Listening socket that this came from. */ struct sock *listener; /* * Used for pending list and accept queue during connection handshake. The * listening socket is the head for both lists. Sockets created for * connection requests are placed in the pending list until they are * connected, at which point they are put in the accept queue list so they * can be accepted in accept(). If accept() cannot accept the connection, * it is marked as rejected so the cleanup function knows to clean up the * socket. */ struct list_head pendingLinks; struct list_head acceptQueue; Bool rejected; compat_delayed_work dwork; uint32 peerShutdown; #endif } VSockVmciSock; #endif /* __AF_VSOCK_H__ */ vsock-only/linux/util.c0000444000000000000000000004700112025726714014146 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * util.c -- * * Utility functions for Linux VSocket module. */ #include "driver-config.h" #include #include "compat_sock.h" #include "compat_list.h" #include "af_vsock.h" #include "util.h" struct list_head vsockBindTable[VSOCK_HASH_SIZE + 1]; struct list_head vsockConnectedTable[VSOCK_HASH_SIZE]; spinlock_t vsockTableLock = SPIN_LOCK_UNLOCKED; /* * snprintf() wasn't exported until 2.4.10: fall back on sprintf in those * cases. It's okay since this is only for the debug function for logging * packets. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 10) #define snprintf(str, size, fmt, args...) sprintf(str, fmt, ## args) #endif /* *---------------------------------------------------------------------------- * * VSockVmciLogPkt -- * * Logs the provided packet. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------------- */ void VSockVmciLogPkt(char const *function, // IN uint32 line, // IN VSockPacket *pkt) // IN { char buf[256]; char *cur = buf; int left = sizeof buf; int written = 0; char *typeStrings[] = { [VSOCK_PACKET_TYPE_INVALID] = "INVALID", [VSOCK_PACKET_TYPE_REQUEST] = "REQUEST", [VSOCK_PACKET_TYPE_NEGOTIATE] = "NEGOTIATE", [VSOCK_PACKET_TYPE_OFFER] = "OFFER", [VSOCK_PACKET_TYPE_ATTACH] = "ATTACH", [VSOCK_PACKET_TYPE_WROTE] = "WROTE", [VSOCK_PACKET_TYPE_READ] = "READ", [VSOCK_PACKET_TYPE_RST] = "RST", [VSOCK_PACKET_TYPE_SHUTDOWN] = "SHUTDOWN", [VSOCK_PACKET_TYPE_WAITING_WRITE] = "WAITING_WRITE", [VSOCK_PACKET_TYPE_WAITING_READ] = "WAITING_READ", }; written = snprintf(cur, left, "PKT: %u:%u -> %u:%u", VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src), pkt->srcPort, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.dst), pkt->dstPort); if (written >= left) { goto error; } left -= written; cur += written; switch (pkt->type) { case VSOCK_PACKET_TYPE_REQUEST: case VSOCK_PACKET_TYPE_NEGOTIATE: written = snprintf(cur, left, ", %s, size = %"FMT64"u", typeStrings[pkt->type], pkt->u.size); break; case VSOCK_PACKET_TYPE_OFFER: case VSOCK_PACKET_TYPE_ATTACH: written = snprintf(cur, left, ", %s, handle = %u:%u", typeStrings[pkt->type], VMCI_HANDLE_TO_CONTEXT_ID(pkt->u.handle), VMCI_HANDLE_TO_RESOURCE_ID(pkt->u.handle)); break; case VSOCK_PACKET_TYPE_WROTE: case VSOCK_PACKET_TYPE_READ: case VSOCK_PACKET_TYPE_RST: written = snprintf(cur, left, ", %s", typeStrings[pkt->type]); break; case VSOCK_PACKET_TYPE_SHUTDOWN: { Bool recv; Bool send; recv = pkt->u.mode & RCV_SHUTDOWN; send = pkt->u.mode & SEND_SHUTDOWN; written = snprintf(cur, left, ", %s, mode = %c%c", typeStrings[pkt->type], recv ? 'R' : ' ', send ? 'S' : ' '); } break; case VSOCK_PACKET_TYPE_WAITING_WRITE: case VSOCK_PACKET_TYPE_WAITING_READ: written = snprintf(cur, left, ", %s, generation = %"FMT64"u, " "offset = %"FMT64"u", typeStrings[pkt->type], pkt->u.wait.generation, pkt->u.wait.offset); break; default: written = snprintf(cur, left, ", unrecognized type"); } if (written >= left) { goto error; } left -= written; cur += written; written = snprintf(cur, left, " [%s:%u]\n", function, line); if (written >= left) { goto error; } Log("%s", buf); return; error: Log("could not log packet\n"); } /* *---------------------------------------------------------------------------- * * VSockVmciInitTables -- * * Initializes the tables used for socket lookup. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------------- */ void VSockVmciInitTables(void) { uint32 i; for (i = 0; i < ARRAYSIZE(vsockBindTable); i++) { INIT_LIST_HEAD(&vsockBindTable[i]); } for (i = 0; i < ARRAYSIZE(vsockConnectedTable); i++) { INIT_LIST_HEAD(&vsockConnectedTable[i]); } } /* *---------------------------------------------------------------------------- * * __VSockVmciInsertBound -- * * Inserts socket into the bound table. * * Note that this assumes any necessary locks are held. * * Results: * None. * * Side effects: * The reference count for sk is incremented. * *---------------------------------------------------------------------------- */ void __VSockVmciInsertBound(struct list_head *list, // IN struct sock *sk) // IN { VSockVmciSock *vsk; ASSERT(list); ASSERT(sk); vsk = vsock_sk(sk); sock_hold(sk); list_add(&vsk->boundTable, list); } /* *---------------------------------------------------------------------------- * * __VSockVmciInsertConnected -- * * Inserts socket into the connected table. * * Note that this assumes any necessary locks are held. * * Results: * None. * * Side effects: * The reference count for sk is incremented. * *---------------------------------------------------------------------------- */ void __VSockVmciInsertConnected(struct list_head *list, // IN struct sock *sk) // IN { VSockVmciSock *vsk; ASSERT(list); ASSERT(sk); vsk = vsock_sk(sk); sock_hold(sk); list_add(&vsk->connectedTable, list); } /* *---------------------------------------------------------------------------- * * __VSockVmciRemoveBound -- * * Removes socket from the bound table. * * Note that this assumes any necessary locks are held. * * Results: * None. * * Side effects: * The reference count for sk is decremented. * *---------------------------------------------------------------------------- */ void __VSockVmciRemoveBound(struct sock *sk) // IN { VSockVmciSock *vsk; ASSERT(sk); ASSERT(__VSockVmciInBoundTable(sk)); vsk = vsock_sk(sk); list_del_init(&vsk->boundTable); sock_put(sk); } /* *---------------------------------------------------------------------------- * * __VSockVmciRemoveConnected -- * * Removes socket from the connected table. * * Note that this assumes any necessary locks are held. * * Results: * None. * * Side effects: * The reference count for sk is decremented. * *---------------------------------------------------------------------------- */ void __VSockVmciRemoveConnected(struct sock *sk) // IN { VSockVmciSock *vsk; ASSERT(sk); ASSERT(__VSockVmciInConnectedTable(sk)); vsk = vsock_sk(sk); list_del_init(&vsk->connectedTable); sock_put(sk); } /* *---------------------------------------------------------------------------- * * __VSockVmciFindBoundSocket -- * * Finds the socket corresponding to the provided address in the bound * sockets hash table. * * Note that this assumes any necessary locks are held. * * Results: * The sock structure if found, NULL if not found. * * Side effects: * None. * *---------------------------------------------------------------------------- */ struct sock * __VSockVmciFindBoundSocket(struct sockaddr_vm *addr) // IN { VSockVmciSock *vsk; struct sock *sk; ASSERT(addr); list_for_each_entry(vsk, vsockBoundSockets(addr), boundTable) { if (VSockAddr_EqualsAddr(addr, &vsk->localAddr)) { sk = sk_vsock(vsk); /* We only store stream sockets in the bound table. */ ASSERT(sk->compat_sk_socket ? sk->compat_sk_socket->type == SOCK_STREAM : 1); goto found; } } sk = NULL; found: return sk; } /* *---------------------------------------------------------------------------- * * __VSockVmciFindConnectedSocket -- * * Finds the socket corresponding to the provided addresses in the connected * sockets hash table. * * Note that this assumes any necessary locks are held. * * Results: * The sock structure if found, NULL if not found. * * Side effects: * None. * *---------------------------------------------------------------------------- */ struct sock * __VSockVmciFindConnectedSocket(struct sockaddr_vm *src, // IN struct sockaddr_vm *dst) // IN { VSockVmciSock *vsk; struct sock *sk; ASSERT(src); ASSERT(dst); list_for_each_entry(vsk, vsockConnectedSockets(src, dst), connectedTable) { if (VSockAddr_EqualsAddr(src, &vsk->remoteAddr) && VSockAddr_EqualsAddr(dst, &vsk->localAddr)) { sk = sk_vsock(vsk); goto found; } } sk = NULL; found: return sk; } /* *---------------------------------------------------------------------------- * * __VSockVmciInBoundTable -- * * Determines whether the provided socket is in the bound table. * * Results: * TRUE is socket is in bound table, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ Bool __VSockVmciInBoundTable(struct sock *sk) // IN { VSockVmciSock *vsk; ASSERT(sk); vsk = vsock_sk(sk); return !list_empty(&vsk->boundTable); } /* *---------------------------------------------------------------------------- * * __VSockVmciInConnectedTable -- * * Determines whether the provided socket is in the connected table. * * Results: * TRUE is socket is in connected table, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ Bool __VSockVmciInConnectedTable(struct sock *sk) // IN { VSockVmciSock *vsk; ASSERT(sk); vsk = vsock_sk(sk); return !list_empty(&vsk->connectedTable); } #ifdef VMX86_TOOLS /* *---------------------------------------------------------------------------- * * VSockVmciGetPending -- * * Retrieves a pending connection that matches the addresses specified in * the provided packet. * * Assumes the socket lock is held for listener. * * Results: * Socket of the pending connection on success, NULL if not found. * * Side effects: * A reference is held on the socket until the release function is called. * *---------------------------------------------------------------------------- */ struct sock * VSockVmciGetPending(struct sock *listener, // IN: listening socket VSockPacket *pkt) // IN: incoming packet { VSockVmciSock *vlistener; VSockVmciSock *vpending; struct sock *pending; ASSERT(listener); ASSERT(pkt); vlistener = vsock_sk(listener); list_for_each_entry(vpending, &vlistener->pendingLinks, pendingLinks) { struct sockaddr_vm src; struct sockaddr_vm dst; VSockAddr_Init(&src, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src), pkt->srcPort); VSockAddr_Init(&dst, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.dst), pkt->dstPort); if (VSockAddr_EqualsAddr(&src, &vpending->remoteAddr) && VSockAddr_EqualsAddr(&dst, &vpending->localAddr)) { pending = sk_vsock(vpending); sock_hold(pending); goto found; } } pending = NULL; found: return pending; } /* *---------------------------------------------------------------------------- * * VSockVmciReleasePending -- * * Releases the reference on a socket previously obtained by a call to * VSockVmciGetPending(). * * Results: * None. * * Side effects: * The socket may be freed if this was the last reference. * *---------------------------------------------------------------------------- */ void VSockVmciReleasePending(struct sock *pending) // IN: pending connection { ASSERT(pending); sock_put(pending); } /* *---------------------------------------------------------------------------- * * VSockVmciAddPending -- * * Adds a pending connection on listener's pending list. * * Assumes the socket lock is held for listener. * Assumes the socket lock is held for pending. * * Results: * None. * * Side effects: * The reference count of the sockets is incremented. * *---------------------------------------------------------------------------- */ void VSockVmciAddPending(struct sock *listener, // IN: listening socket struct sock *pending) // IN: pending connection { VSockVmciSock *vlistener; VSockVmciSock *vpending; ASSERT(listener); ASSERT(pending); vlistener = vsock_sk(listener); vpending = vsock_sk(pending); sock_hold(pending); sock_hold(listener); list_add_tail(&vpending->pendingLinks, &vlistener->pendingLinks); } /* *---------------------------------------------------------------------------- * * VSockVmciRemovePending -- * * Removes a pending connection from the listener's pending list. * * Assumes the socket lock is held for listener. * Assumes the socket lock is held for pending. * * Results: * None. * * Side effects: * The reference count of the sockets is decremented. * *---------------------------------------------------------------------------- */ void VSockVmciRemovePending(struct sock *listener, // IN: listening socket struct sock *pending) // IN: pending connection { VSockVmciSock *vlistener; VSockVmciSock *vpending; ASSERT(listener); ASSERT(pending); vlistener = vsock_sk(listener); vpending = vsock_sk(pending); list_del_init(&vpending->pendingLinks); sock_put(listener); sock_put(pending); } /* *---------------------------------------------------------------------------- * * VSockVmciEnqueueAccept -- * * Enqueues the connected socket on the listening socket's accepting * queue. * * Assumes the socket lock is held for listener. * Assumes the socket lock is held for connected. * * Results: * None. * * Side effects: * The sockets' reference counts are incremented. * *---------------------------------------------------------------------------- */ void VSockVmciEnqueueAccept(struct sock *listener, // IN: listening socket struct sock *connected) // IN: connected socket { VSockVmciSock *vlistener; VSockVmciSock *vconnected; ASSERT(listener); ASSERT(connected); vlistener = vsock_sk(listener); vconnected = vsock_sk(connected); sock_hold(connected); sock_hold(listener); list_add_tail(&vconnected->acceptQueue, &vlistener->acceptQueue); } /* *---------------------------------------------------------------------------- * * VSockVmciDequeueAccept -- * * Dequeues the next connected socket from the listening socket's accept * queue. * * Assumes the socket lock is held for listener. * * Note that the caller must call sock_put() on the returned socket once it * is done with the socket. * * Results: * The next socket from the queue, or NULL if the queue is empty. * * Side effects: * The reference count of the listener is decremented. * *---------------------------------------------------------------------------- */ struct sock * VSockVmciDequeueAccept(struct sock *listener) // IN: listening socket { VSockVmciSock *vlistener; VSockVmciSock *vconnected; ASSERT(listener); vlistener = vsock_sk(listener); if (list_empty(&vlistener->acceptQueue)) { return NULL; } vconnected = list_entry(vlistener->acceptQueue.next, VSockVmciSock, acceptQueue); ASSERT(vconnected); list_del_init(&vconnected->acceptQueue); sock_put(listener); /* * The caller will need a reference on the connected socket so we let it * call sock_put(). */ ASSERT(sk_vsock(vconnected)); return sk_vsock(vconnected); } /* *---------------------------------------------------------------------------- * * VSockVmciRemoveAccept -- * * Removes a socket from the accept queue of a listening socket. * * Assumes the socket lock is held for listener. * Assumes the socket lock is held for connected. * * Results: * None. * * Side effects: * The sockets' reference counts are decremented. * *---------------------------------------------------------------------------- */ void VSockVmciRemoveAccept(struct sock *listener, // IN: listening socket struct sock *connected) // IN: connected socket { VSockVmciSock *vconnected; ASSERT(listener); ASSERT(connected); if (!VSockVmciInAcceptQueue(connected)) { return; } vconnected = vsock_sk(connected); ASSERT(vconnected->listener == listener); list_del_init(&vconnected->acceptQueue); sock_put(listener); sock_put(connected); } /* *---------------------------------------------------------------------------- * * VSockVmciInAcceptQueue -- * * Determines whether a socket is on an accept queue. * * Assumes the socket lock is held for sk. * * Results: * TRUE if the socket is in an accept queue, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ Bool VSockVmciInAcceptQueue(struct sock *sk) // IN: socket { ASSERT(sk); /* * If our accept queue isn't empty, it means we're linked into some listener * socket's accept queue. */ return !VSockVmciIsAcceptQueueEmpty(sk); } /* *---------------------------------------------------------------------------- * * VSockVmciIsAcceptQueueEmpty -- * * Determines whether the provided socket's accept queue is empty. * * Assumes the socket lock is held for sk. * * Results: * TRUE if the socket's accept queue is empty, FALSE otherwsise. * * Side effects: * None. * * *---------------------------------------------------------------------------- */ Bool VSockVmciIsAcceptQueueEmpty(struct sock *sk) // IN: socket { VSockVmciSock *vsk; ASSERT(sk); vsk = vsock_sk(sk); return list_empty(&vsk->acceptQueue); } /* *---------------------------------------------------------------------------- * * VSockVmciIsPending -- * * Determines whether a socket is pending. * * Assumes the socket lock is held for sk. * * Results: * TRUE if the socket is pending, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ Bool VSockVmciIsPending(struct sock *sk) // IN: socket { VSockVmciSock *vsk; ASSERT(sk); vsk = vsock_sk(sk); return !list_empty(&vsk->pendingLinks); } #endif vsock-only/linux/util.h0000444000000000000000000002556012025726714014161 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * util.h -- * * Utility functions for Linux VSocket module. */ #ifndef __UTIL_H__ #define __UTIL_H__ #include "driver-config.h" #include "compat_sock.h" #include "compat_spinlock.h" #include "vsockCommon.h" #include "vsockPacket.h" /* * Each bound VSocket is stored in the bind hash table and each connected * VSocket is stored in the connected hash table. * * Unbound sockets are all put on the same list attached to the end of the hash * table (vsockUnboundSockets). Bound sockets are added to the hash table in * the bucket that their local address hashes to (vsockBoundSockets(addr) * represents the list that addr hashes to). * * Specifically, we initialize the vsockBindTable array to a size of * VSOCK_HASH_SIZE + 1 so that vsockBindTable[0] through * vsockBindTable[VSOCK_HASH_SIZE - 1] are for bound sockets and * vsockBindTable[VSOCK_HASH_SIZE] is for unbound sockets. The hash function * mods with VSOCK_HASH_SIZE - 1 to ensure this. */ #define VSOCK_HASH_SIZE 251 #define LAST_RESERVED_PORT 1023 #define MAX_PORT_RETRIES 24 extern struct list_head vsockBindTable[VSOCK_HASH_SIZE + 1]; extern struct list_head vsockConnectedTable[VSOCK_HASH_SIZE]; extern spinlock_t vsockTableLock; #define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) #define vsockBoundSockets(addr) (&vsockBindTable[VSOCK_HASH(addr)]) #define vsockUnboundSockets (&vsockBindTable[VSOCK_HASH_SIZE]) /* XXX This can probably be implemented in a better way. */ #define VSOCK_CONN_HASH(src, dst) \ (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) #define vsockConnectedSockets(src, dst) \ (&vsockConnectedTable[VSOCK_CONN_HASH(src, dst)]) #define vsockConnectedSocketsVsk(vsk) \ vsockConnectedSockets(&(vsk)->remoteAddr, &(vsk)->localAddr) /* * Prototypes. */ void VSockVmciLogPkt(char const *function, uint32 line, VSockPacket *pkt); void VSockVmciInitTables(void); void __VSockVmciInsertBound(struct list_head *list, struct sock *sk); void __VSockVmciInsertConnected(struct list_head *list, struct sock *sk); void __VSockVmciRemoveBound(struct sock *sk); void __VSockVmciRemoveConnected(struct sock *sk); struct sock *__VSockVmciFindBoundSocket(struct sockaddr_vm *addr); struct sock *__VSockVmciFindConnectedSocket(struct sockaddr_vm *src, struct sockaddr_vm *dst); Bool __VSockVmciInBoundTable(struct sock *sk); Bool __VSockVmciInConnectedTable(struct sock *sk); #ifdef VMX86_TOOLS struct sock *VSockVmciGetPending(struct sock *listener, VSockPacket *pkt); void VSockVmciReleasePending(struct sock *pending); void VSockVmciAddPending(struct sock *listener, struct sock *pending); void VSockVmciRemovePending(struct sock *listener, struct sock *pending); void VSockVmciEnqueueAccept(struct sock *listener, struct sock *connected); struct sock *VSockVmciDequeueAccept(struct sock *listener); void VSockVmciRemoveAccept(struct sock *listener, struct sock *connected); Bool VSockVmciInAcceptQueue(struct sock *sk); Bool VSockVmciIsAcceptQueueEmpty(struct sock *sk); Bool VSockVmciIsPending(struct sock *sk); #endif static INLINE void VSockVmciInsertBound(struct list_head *list, struct sock *sk); static INLINE void VSockVmciInsertConnected(struct list_head *list, struct sock *sk); static INLINE void VSockVmciRemoveBound(struct sock *sk); static INLINE void VSockVmciRemoveConnected(struct sock *sk); static INLINE struct sock *VSockVmciFindBoundSocket(struct sockaddr_vm *addr); static INLINE struct sock *VSockVmciFindConnectedSocket(struct sockaddr_vm *src, struct sockaddr_vm *dst); static INLINE Bool VSockVmciInBoundTable(struct sock *sk); static INLINE Bool VSockVmciInConnectedTable(struct sock *sk); /* *---------------------------------------------------------------------------- * * VSockVmciInsertBound -- * * Inserts socket into the bound table. * * Note that it is important to invoke the bottom-half versions of the * spinlock functions since these may be called from tasklets. * * Results: * None. * * Side effects: * vsockTableLock is acquired and released. * *---------------------------------------------------------------------------- */ static INLINE void VSockVmciInsertBound(struct list_head *list, // IN struct sock *sk) // IN { ASSERT(list); ASSERT(sk); spin_lock_bh(&vsockTableLock); __VSockVmciInsertBound(list, sk); spin_unlock_bh(&vsockTableLock); } /* *---------------------------------------------------------------------------- * * VSockVmciInsertConnected -- * * Inserts socket into the connected table. * * Note that it is important to invoke the bottom-half versions of the * spinlock functions since these may be called from tasklets. * * Results: * None. * * Side effects: * vsockTableLock is acquired and released. * *---------------------------------------------------------------------------- */ static INLINE void VSockVmciInsertConnected(struct list_head *list, // IN struct sock *sk) // IN { ASSERT(list); ASSERT(sk); spin_lock_bh(&vsockTableLock); __VSockVmciInsertConnected(list, sk); spin_unlock_bh(&vsockTableLock); } /* *---------------------------------------------------------------------------- * * VSockVmciRemoveBound -- * * Removes socket from the bound list. * * Note that it is important to invoke the bottom-half versions of the * spinlock functions since these may be called from tasklets. * * Results: * None. * * Side effects: * vsockTableLock is acquired and released. * *---------------------------------------------------------------------------- */ static INLINE void VSockVmciRemoveBound(struct sock *sk) // IN { ASSERT(sk); spin_lock_bh(&vsockTableLock); __VSockVmciRemoveBound(sk); spin_unlock_bh(&vsockTableLock); } /* *---------------------------------------------------------------------------- * * VSockVmciRemoveConnected -- * * Removes socket from the connected list. * * Note that it is important to invoke the bottom-half versions of the * spinlock functions since these may be called from tasklets. * * Results: * None. * * Side effects: * vsockTableLock is acquired and released. * *---------------------------------------------------------------------------- */ static INLINE void VSockVmciRemoveConnected(struct sock *sk) // IN { ASSERT(sk); spin_lock_bh(&vsockTableLock); __VSockVmciRemoveConnected(sk); spin_unlock_bh(&vsockTableLock); } /* *---------------------------------------------------------------------------- * * VSockVmciFindBoundSocket -- * * Finds the socket corresponding to the provided address in the bound * sockets hash table. * * Note that it is important to invoke the bottom-half versions of the * spinlock functions since these are called from tasklets. * * Results: * The sock structure if found, NULL on failure. * * Side effects: * vsockTableLock is acquired and released. * The socket's reference count is increased. * *---------------------------------------------------------------------------- */ static INLINE struct sock * VSockVmciFindBoundSocket(struct sockaddr_vm *addr) // IN { struct sock *sk; ASSERT(addr); spin_lock_bh(&vsockTableLock); sk = __VSockVmciFindBoundSocket(addr); if (sk) { sock_hold(sk); } spin_unlock_bh(&vsockTableLock); return sk; } /* *---------------------------------------------------------------------------- * * VSockVmciFindConnectedSocket -- * * Finds the socket corresponding to the provided address in the connected * sockets hash table. * * Note that it is important to invoke the bottom-half versions of the * spinlock functions since these are called from tasklets. * * Results: * The sock structure if found, NULL on failure. * * Side effects: * vsockTableLock is acquired and released. * The socket's reference count is increased. * *---------------------------------------------------------------------------- */ static INLINE struct sock * VSockVmciFindConnectedSocket(struct sockaddr_vm *src, // IN struct sockaddr_vm *dst) // IN { struct sock *sk; ASSERT(src); ASSERT(dst); spin_lock_bh(&vsockTableLock); sk = __VSockVmciFindConnectedSocket(src, dst); if (sk) { sock_hold(sk); } spin_unlock_bh(&vsockTableLock); return sk; } /* *---------------------------------------------------------------------------- * * VSockVmciInBoundTable -- * * Determines whether the provided socket is in the bound table. * * Note that it is important to invoke the bottom-half versions of the * spinlock functions since these may be called from tasklets. * * Results: * TRUE is socket is in bound table, FALSE otherwise. * * Side effects: * vsockTableLock is acquired and released. * *---------------------------------------------------------------------------- */ static INLINE Bool VSockVmciInBoundTable(struct sock *sk) // IN { Bool ret; ASSERT(sk); spin_lock_bh(&vsockTableLock); ret = __VSockVmciInBoundTable(sk); spin_unlock_bh(&vsockTableLock); return ret; } /* *---------------------------------------------------------------------------- * * VSockVmciInConnectedTable -- * * Determines whether the provided socket is in the connected table. * * Note that it is important to invoke the bottom-half versions of the * spinlock functions since these may be called from tasklets. * * Results: * TRUE is socket is in connected table, FALSE otherwise. * * Side effects: * vsockTableLock is acquired and released. * *---------------------------------------------------------------------------- */ static INLINE Bool VSockVmciInConnectedTable(struct sock *sk) // IN { Bool ret; ASSERT(sk); spin_lock_bh(&vsockTableLock); ret = __VSockVmciInConnectedTable(sk); spin_unlock_bh(&vsockTableLock); return ret; } #endif /* __UTIL_H__ */ vsock-only/linux/vsock_version.h0000444000000000000000000000221012025726714016061 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vsock_version.h -- * * Version definitions for the Linux vsock driver. */ #ifndef _VSOCK_VERSION_H_ #define _VSOCK_VERSION_H_ #define VSOCK_DRIVER_VERSION 1.0.0.0 #define VSOCK_DRIVER_VERSION_COMMAS 1,0,0,0 #define VSOCK_DRIVER_VERSION_STRING "1.0.0.0" #endif /* _VSOCK_VERSION_H_ */ vsock-only/linux/vsockPacket.h0000444000000000000000000001772512025726714015465 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vsockPacket.h -- * * Packet constants, types and functions. */ #if defined(_WIN32) || defined(VMKERNEL) # include "vsockOSInt.h" #else # define VSockOS_ClearMemory(_dst, _sz) memset(_dst, 0, _sz) # define VSockOS_Memcpy(_dst, _src, _sz) memcpy(_dst, _src, _sz) #endif #ifndef _VSOCK_PACKET_H_ #define _VSOCK_PACKET_H_ /* If the packet format changes in a release then this should change too. */ #define VSOCK_PACKET_VERSION 1 /* The resource ID on which control packets are sent. */ #define VSOCK_PACKET_RID 1 /* Assert that the given packet is valid. */ #define VSOCK_PACKET_ASSERT(_p) \ ASSERT((_p)); \ ASSERT((_p)->type < VSOCK_PACKET_TYPE_MAX); \ ASSERT(0 == (_p)->_reserved1); \ ASSERT(0 == (_p)->_reserved2) typedef enum VSockPacketType { VSOCK_PACKET_TYPE_INVALID = 0, // Invalid type. VSOCK_PACKET_TYPE_REQUEST, // Connection request. VSOCK_PACKET_TYPE_NEGOTIATE, // Connection negotiate. VSOCK_PACKET_TYPE_OFFER, // Connection offer queue pair. VSOCK_PACKET_TYPE_ATTACH, // Connection attach. VSOCK_PACKET_TYPE_WROTE, // Wrote data to queue pair. VSOCK_PACKET_TYPE_READ, // Read data from queue pair. VSOCK_PACKET_TYPE_RST, // Reset. VSOCK_PACKET_TYPE_SHUTDOWN, // Shutdown the connection. VSOCK_PACKET_TYPE_WAITING_WRITE, // Notify peer we are waiting to write. VSOCK_PACKET_TYPE_WAITING_READ, // Notify peer we are waiting to read. VSOCK_PACKET_TYPE_MAX // Last message. } VSockPacketType; typedef struct VSockWaitingInfo { uint64 generation; // Generation of the queue. uint64 offset; // Offset within the queue. } VSockWaitingInfo; /* * Control packet type for STREAM sockets. DGRAMs have no control packets * nor special packet header for data packets, they are just raw VMCI DGRAM * messages. For STREAMs, control packets are sent over the control channel * while data is written and read directly from queue pairs with no packet * format. */ typedef struct VSockPacket { VMCIDatagram dg; // Datagram header. uint8 version; // Version. uint8 type; // Type of message. uint16 _reserved1; // Reserved. uint32 srcPort; // Source port. uint32 dstPort; // Destination port. uint32 _reserved2; // Reserved. union { uint64 size; // Size of queue pair for request/negotiation. uint64 mode; // Mode of shutdown for shutdown. VMCIHandle handle; // Queue pair handle once size negotiated. VSockWaitingInfo wait; // Information provided for wait notifications. } u; } VSockPacket; MY_ASSERTS(VSockPacketAsserts, ASSERT_ON_COMPILE(sizeof (VSockPacket) == 56); ) /* *----------------------------------------------------------------------------- * * VSockPacket_Init -- * * Initialize the given packet. The packet version is set and the fields * are filled out. Reserved fields are cleared. * * Results: * None. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE void VSockPacket_Init(VSockPacket *pkt, // OUT struct sockaddr_vm *src, // IN struct sockaddr_vm *dst, // IN uint8 type, // IN uint64 size, // IN uint64 mode, // IN VSockWaitingInfo *wait, // IN VMCIHandle handle) // IN { ASSERT(pkt); VSOCK_ADDR_NOFAMILY_ASSERT(src); VSOCK_ADDR_NOFAMILY_ASSERT(dst); pkt->dg.src = VMCI_MAKE_HANDLE(src->svm_cid, VSOCK_PACKET_RID); pkt->dg.dst = VMCI_MAKE_HANDLE(dst->svm_cid, VSOCK_PACKET_RID); pkt->dg.payloadSize = sizeof *pkt - sizeof pkt->dg; pkt->version = VSOCK_PACKET_VERSION; pkt->type = type; pkt->srcPort = src->svm_port; pkt->dstPort = dst->svm_port; VSockOS_ClearMemory(&pkt->_reserved1, sizeof pkt->_reserved1); VSockOS_ClearMemory(&pkt->_reserved2, sizeof pkt->_reserved2); switch (pkt->type) { case VSOCK_PACKET_TYPE_INVALID: pkt->u.size = 0; break; case VSOCK_PACKET_TYPE_REQUEST: case VSOCK_PACKET_TYPE_NEGOTIATE: pkt->u.size = size; break; case VSOCK_PACKET_TYPE_OFFER: case VSOCK_PACKET_TYPE_ATTACH: pkt->u.handle = handle; break; case VSOCK_PACKET_TYPE_WROTE: case VSOCK_PACKET_TYPE_READ: case VSOCK_PACKET_TYPE_RST: pkt->u.size = 0; break; case VSOCK_PACKET_TYPE_SHUTDOWN: pkt->u.mode = mode; break; case VSOCK_PACKET_TYPE_WAITING_READ: case VSOCK_PACKET_TYPE_WAITING_WRITE: ASSERT(wait); VSockOS_Memcpy(&pkt->u.wait, wait, sizeof pkt->u.wait); break; } VSOCK_PACKET_ASSERT(pkt); } /* *----------------------------------------------------------------------------- * * VSockPacket_Validate -- * * Validate the given packet. * * Results: * 0 on success, EFAULT if the address is invalid, EINVAL if the packet * fields are invalid. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE int32 VSockPacket_Validate(VSockPacket *pkt) { int32 err = EINVAL; if (NULL == pkt) { err = EFAULT; goto exit; } if (VMCI_HANDLE_INVALID(pkt->dg.src)) { goto exit; } if (VMCI_HANDLE_INVALID(pkt->dg.dst)) { goto exit; } if (VMCI_INVALID_ID == pkt->dstPort || VMCI_INVALID_ID == pkt->srcPort) { goto exit; } if (VSOCK_PACKET_VERSION != pkt->version) { goto exit; } if (0 != pkt->_reserved1 || 0 != pkt->_reserved2) { goto exit; } switch (pkt->type) { case VSOCK_PACKET_TYPE_INVALID: if (0 != pkt->u.size) { goto exit; } break; case VSOCK_PACKET_TYPE_REQUEST: case VSOCK_PACKET_TYPE_NEGOTIATE: if (0 == pkt->u.size) { goto exit; } break; case VSOCK_PACKET_TYPE_OFFER: case VSOCK_PACKET_TYPE_ATTACH: if (VMCI_HANDLE_INVALID(pkt->u.handle)) { goto exit; } break; case VSOCK_PACKET_TYPE_WROTE: case VSOCK_PACKET_TYPE_READ: case VSOCK_PACKET_TYPE_RST: if (0 != pkt->u.size) { goto exit; } break; } err = 0; exit: return sockerr2err(err); } /* *----------------------------------------------------------------------------- * * VSockPacket_GetAddresses -- * * Get the local and remote addresses from the given packet. * * Results: * None * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE void VSockPacket_GetAddresses(VSockPacket *pkt, // IN struct sockaddr_vm *local, // OUT struct sockaddr_vm *remote) // OUT { VSOCK_PACKET_ASSERT(pkt); VSockAddr_Init(local, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.dst), pkt->dstPort); VSockAddr_Init(remote, VMCI_HANDLE_TO_CONTEXT_ID(pkt->dg.src), pkt->srcPort); } #endif // _VSOCK_PACKET_H_ vsock-only/linux/vsockVmci.h0000444000000000000000000000462712025726714015151 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vsockVmci.h -- * * VSockets VMCI constants, types and functions. */ #ifndef _VSOCK_VMCI_H_ #define _VSOCK_VMCI_H_ extern VMCIId VMCI_GetContextID(void); /* *----------------------------------------------------------------------------- * * VSockVmci_IsLocal -- * * Determine if the given handle points to the local context. * * Results: * TRUE if the given handle is for the local context, FALSE otherwise. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE Bool VSockVmci_IsLocal(VMCIHandle handle) // IN { return VMCI_GetContextID() == VMCI_HANDLE_TO_CONTEXT_ID(handle); } /* *---------------------------------------------------------------------------- * * VSockVmci_ErrorToVSockError -- * * Converts from a VMCI error code to a VSock error code. * * Results: * Appropriate error code. * * Side effects: * None. * *---------------------------------------------------------------------------- */ static INLINE int32 VSockVmci_ErrorToVSockError(int32 vmciError) // IN { int32 err; switch (vmciError) { case VMCI_ERROR_NO_MEM: #if defined(_WIN32) err = ENOBUFS; #else // _WIN32 err = ENOMEM; #endif // _WIN32 break; case VMCI_ERROR_DUPLICATE_ENTRY: err = EADDRINUSE; break; case VMCI_ERROR_NO_RESOURCES: err = ENOBUFS; break; case VMCI_ERROR_INVALID_ARGS: case VMCI_ERROR_INVALID_RESOURCE: default: err = EINVAL; } return sockerr2err(err); } #endif // _VSOCK_VMCI_H_ vsock-only/linux/vsockCommon.h0000444000000000000000000000660612025726714015502 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vsockCommon.h -- * * VSockets common constants, types and functions. */ #ifndef _VSOCK_COMMON_H_ #define _VSOCK_COMMON_H_ #if defined(_WIN32) # define VMCI_SOCKETS_AF_VALUE 28 # if defined(WINNT_DDK) # define _WIN2K_COMPAT_SLIST_USAGE /* * wdm.h has to come first, otherwise NTDDI_VERSION gets all confused * and we start pulling in the wrong versions of the Ke() routines. */ # include # include # include /* * Using ntifs.h for these functions does not play nicely with having * wdm.h first. So rather than include that header, we pull these in * directly. */ NTKERNELAPI HANDLE PsGetCurrentProcessId(VOID); NTKERNELAPI NTSTATUS PsSetCreateProcessNotifyRoutine( PCREATE_PROCESS_NOTIFY_ROUTINE, BOOLEAN); NTSYSAPI NTSTATUS NTAPI ZwWaitForSingleObject(HANDLE, BOOLEAN, PLARGE_INTEGER); # define _INC_WINDOWS # include "vmci_queue_pair.h" /* In the kernel we can't call into the provider. */ # define VMCISock_GetAFValue() VMCI_SOCKETS_AF_VALUE # else // WINNT_DDK # include # endif // WINNT_DDK # define Uint64ToPtr(_ui) ((void *)(uint64)(_ui)) # define PtrToUint64(_p) ((uint64)(_p)) #else #if defined(VMKERNEL) # include "uwvmkAPI.h" # define VMCI_SOCKETS_AF_VALUE AF_VMCI /* Defined in uwvmkAPI.h. */ /* The address family is fixed in the vmkernel. */ # define VMCISock_GetAFValue() VMCI_SOCKETS_AF_VALUE # include "vmci_queue_pair_vmk.h" # define Uint64ToPtr(_ui) ((void *)(uint64)(_ui)) # define PtrToUint64(_p) ((uint64)(_p)) #else #if defined(linux) # if defined(__KERNEL__) /* Include compat_page.h now so PAGE_SIZE and friends don't get redefined. */ # include "driver-config.h" # include "compat_page.h" # if defined(VMX86_TOOLS) # include "vmci_queue_pair.h" # endif /* * In the kernel we call back into af_vsock.c to get the address family * being used. Otherwise an ioctl(2) is performed (see vmci_sockets.h). */ extern int VSockVmci_GetAFValue(void); # define VMCISock_GetAFValue() VSockVmci_GetAFValue() # endif #endif // linux #endif // VMKERNEL #endif // _WIN32 #include "vmware.h" #include "vmware_pack_init.h" #include "vmci_defs.h" #include "vmci_call_defs.h" #include "vmci_sockets.h" #include "vsockAddr.h" #include "vsockSocketWrapper.h" /* Memory allocation flags. */ #define VSOCK_MEMORY_NORMAL 0 #define VSOCK_MEMORY_ATOMIC (1 << 0) #define VSOCK_MEMORY_NONPAGED (1 << 1) #endif // _VSOCK_COMMON_H_ vsock-only/linux/vsockAddr.h0000444000000000000000000000364212025726714015121 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vsockAddr.h -- * * VSockets address constants, types and functions. */ #ifndef _VSOCK_ADDR_H_ #define _VSOCK_ADDR_H_ /* Assert that the given address is valid. */ #define VSOCK_ADDR_ASSERT(_a) \ ASSERT(0 == VSockAddr_Validate((_a))) #define VSOCK_ADDR_NOFAMILY_ASSERT(_a) \ ASSERT(0 == VSockAddr_ValidateNoFamily((_a))) void VSockAddr_Init(struct sockaddr_vm *addr, uint32 cid, uint32 port); void VSockAddr_InitNoFamily(struct sockaddr_vm *addr, uint32 cid, uint32 port); int32 VSockAddr_Validate(const struct sockaddr_vm *addr); int32 VSockAddr_ValidateNoFamily(const struct sockaddr_vm *addr); Bool VSockAddr_Bound(struct sockaddr_vm *addr); void VSockAddr_Unbind(struct sockaddr_vm *addr); Bool VSockAddr_EqualsAddr(struct sockaddr_vm *addr, struct sockaddr_vm *other); Bool VSockAddr_EqualsHandlePort(struct sockaddr_vm *addr, VMCIHandle handle, uint32 port); int32 VSockAddr_Cast(const struct sockaddr *addr, int32 len, struct sockaddr_vm **outAddr); Bool VSockAddr_SocketContext(VMCIId cid); #endif // _VSOCK_ADDR_H_ vsock-only/linux/vsockSocketWrapper.h0000444000000000000000000001456712025726714017050 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vsockSocketWrapper.h -- * * Socket wrapper constants, types and functions. */ #ifndef _VSOCK_SOCKET_WRAPPER_H_ #define _VSOCK_SOCKET_WRAPPER_H_ /* * Socket states and flags. Note that MSG_WAITALL is only supported on 2K3, * XP-SP2 and above. Since we currently build for 2K to maintain backwards * compatibility, it will always be 0. */ #if defined(_WIN32) # define MSG_DONTWAIT 0 # define MSG_NOSIGNAL 0 # if (_WIN32_WINNT < 0x0502) # define MSG_WAITALL 0 # endif #endif #if defined(_WIN32) || defined(VMKERNEL) # define SS_FREE 0 # define SS_UNCONNECTED 1 # define SS_CONNECTING 2 # define SS_CONNECTED 3 # define SS_DISCONNECTING 4 # define RCV_SHUTDOWN 1 # define SEND_SHUTDOWN 2 # define SHUTDOWN_MASK 3 #endif // _WIN32 || VMKERNEL /* * Error codes. */ #if defined(_WIN32) # if !defined(EINTR) # define EINTR WSAEINTR # endif # if !defined(EACCES) # define EACCES WSAEACCES # endif # if !defined(EFAULT) # define EFAULT WSAEFAULT # endif # if !defined(EINVAL) # define EINVAL WSAEINVAL # endif # define EWOULDBLOCK WSAEWOULDBLOCK # define EINPROGRESS WSAEINPROGRESS # define EALREADY WSAEALREADY # define ENOTSOCK WSAENOTSOCK # define EDESTADDRREQ WSAEDESTADDRREQ # define EMSGSIZE WSAEMSGSIZE # define EPROTOTYPE WSAEPROTOTYPE # define ENOPROTOOPT WSAENOPROTOOPT # define EPROTONOSUPPORT WSAEPROTONOSUPPORT # define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT # define EOPNOTSUPP WSAEOPNOTSUPP # define EPFNOSUPPORT WSAEPFNOSUPPORT # define EAFNOSUPPORT WSAEAFNOSUPPORT # define EADDRINUSE WSAEADDRINUSE # define EADDRNOTAVAIL WSAEADDRNOTAVAIL # define ENETDOWN WSAENETDOWN # define ENETUNREACH WSAENETUNREACH # define ENETRESET WSAENETRESET # define ECONNABORTED WSAECONNABORTED # define ECONNRESET WSAECONNRESET # define ENOBUFS WSAENOBUFS # define EISCONN WSAEISCONN # define ENOTCONN WSAENOTCONN # define ESHUTDOWN WSAESHUTDOWN # define ETIMEDOUT WSAETIMEDOUT # define ECONNREFUSED WSAECONNREFUSED # define EHOSTDOWN WSAEHOSTDOWN # define EHOSTUNREACH WSAEHOSTUNREACH # define __ELOCALSHUTDOWN ESHUTDOWN # define __EPEERSHUTDOWN ECONNABORTED #else #if defined(VMKERNEL) # define EINTR VMK_WAIT_INTERRUPTED # define EACCES VMK_NOACCESS # define EFAULT VMK_INVALID_ADDRESS # define EINVAL VMK_FAILURE # define EWOULDBLOCK VMK_WOULD_BLOCK # define EINPROGRESS VMK_EINPROGRESS # define EALREADY VMK_EALREADY # define ENOTSOCK VMK_NOT_A_SOCKET # define EDESTADDRREQ VMK_EDESTADDRREQ # define EMSGSIZE VMK_LIMIT_EXCEEDED # define EPROTOTYPE VMK_NOT_SUPPORTED # define ENOPROTOOPT VMK_NOT_SUPPORTED # define EPROTONOSUPPORT VMK_EPROTONOSUPPORT # define ESOCKTNOSUPPORT VMK_NOT_SUPPORTED # define EOPNOTSUPP VMK_EOPNOTSUPP # define EPFNOSUPPORT VMK_ADDRFAM_UNSUPP # define EAFNOSUPPORT VMK_ADDRFAM_UNSUPP # define EADDRINUSE VMK_EADDRINUSE # define EADDRNOTAVAIL VMK_EADDRNOTAVAIL # define ENETDOWN VMK_ENETDOWN # define ENETUNREACH VMK_ENETUNREACH # define ENETRESET VMK_ENETRESET # define ECONNABORTED VMK_ECONNABORTED # define ECONNRESET VMK_ECONNRESET # define ENOBUFS VMK_NO_MEMORY # define ENOMEM VMK_NO_MEMORY # define EISCONN VMK_ALREADY_CONNECTED # define ENOTCONN VMK_ENOTCONN # define ESHUTDOWN VMK_ESHUTDOWN # define ETIMEDOUT VMK_TIMEOUT # define ECONNREFUSED VMK_ECONNREFUSED # define EHOSTDOWN VMK_EHOSTDOWN # define EHOSTUNREACH VMK_EHOSTUNREACH # define EPIPE VMK_BROKEN_PIPE # define __ELOCALSHUTDOWN EPIPE # define __EPEERSHUTDOWN EPIPE #endif // VMKERNEL #endif // _WIN32 #if defined(_WIN32) # define sockerr() WSAGetLastError() # define sockerr2err(_e) (((_e) < 0) ? -(_e) : (_e)) # define sockcleanup() WSACleanup() typedef uint32 socklen_t; typedef uint32 in_addr_t; #else // _WIN32 #if defined(VMKERNEL) # define SOCKET_ERROR (-1) # define INVALID_SOCKET ((SOCKET) -1) # define sockerr() errno # define sockerr2err(_e) (_e) # define sockcleanup() do {} while (0) # define closesocket(_s) close((_s)) typedef int32 SOCKET; #else #if defined(linux) # define SOCKET_ERROR (-1) # define INVALID_SOCKET ((SOCKET) -1) # define sockerr() errno # define sockerr2err(_e) (((_e) > 0) ? -(_e) : (_e)) # define sockcleanup() do {} while (0) # define closesocket(_s) close((_s)) typedef int32 SOCKET; #endif // linux #endif // VMKERNEL #endif // _WIN32 /* * There is no SS_XXX state equivalent to TCP_LISTEN. Linux does have a flag * __SO_ACCEPTCON which some of the socket implementations use, but it does * not fit in the state field (although it is sometimes incorrectly used that * way). So we define our own listen state here for all platforms. */ #define SS_LISTEN 255 /* * Initialize sockets. This is really for platforms that do not have * on-by-default socket implementations like Windows. */ int sockinit(void); #endif // _VSOCK_SOCKET_WRAPPER_H_ vsock-only/linux/vsockAddr.c0000444000000000000000000002231612025726714015113 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vsockAddr.c -- * * VSockets address implementation. */ /* * These includes come before vsockCommon.h to ensure that VMware's ASSERT * macro is used instead of Linux's irda.h definition. */ #if defined(linux) && !defined(VMKERNEL) # if defined(__KERNEL__) # include "driver-config.h" # include # include "compat_sock.h" # else # include # include # endif #elif defined(VMKERNEL) # include "vm_libc.h" # include "return_status.h" #endif #include "vsockCommon.h" /* *----------------------------------------------------------------------------- * * VSockAddr_Init -- * * Initialize the given address with the given context id and port. This * will clear the address, set the correct family, and add the given * values. * * Results: * None. * * Side effects: * None. * *----------------------------------------------------------------------------- */ void VSockAddr_Init(struct sockaddr_vm *addr, // OUT uint32 cid, // IN uint32 port) // IN { ASSERT(addr); memset(addr, 0, sizeof *addr); VSockAddr_InitNoFamily(addr, cid, port); addr->svm_family = VMCISock_GetAFValue(); VSOCK_ADDR_ASSERT(addr); } /* *----------------------------------------------------------------------------- * * VSockAddr_InitNoFamily -- * * Initialize the given address with the given context id and port. This * will clear the address and add the given values, but not set the * family. Note that this is needed because in some places we don't want * to re-register the address family in the Linux kernel and all we need * is to check the context id and port. * * Results: * None. * * Side effects: * None. * *----------------------------------------------------------------------------- */ void VSockAddr_InitNoFamily(struct sockaddr_vm *addr, // OUT uint32 cid, // IN uint32 port) // IN { ASSERT(addr); memset(addr, 0, sizeof *addr); addr->svm_cid = cid; addr->svm_port = port; VSOCK_ADDR_NOFAMILY_ASSERT(addr); } /* *----------------------------------------------------------------------------- * * VSockAddr_Validate -- * * Try to validate the given address. The address must not be null and * must have the correct address family. Any reserved fields must be * zero. * * Results: * 0 on success, EFAULT if the address is null, EAFNOSUPPORT if the * address is of the wrong family, and EINVAL if the reserved fields are * not zero. * * Side effects: * None. * *----------------------------------------------------------------------------- */ int32 VSockAddr_Validate(const struct sockaddr_vm *addr) // IN { int32 err; if (NULL == addr) { err = EFAULT; goto exit; } if (VMCISock_GetAFValue() != addr->svm_family) { err = EAFNOSUPPORT; goto exit; } if (0 != addr->svm_zero[0]) { err = EINVAL; goto exit; } err = 0; exit: return sockerr2err(err); } /* *----------------------------------------------------------------------------- * * VSockAddr_ValidateNoFamily -- * * Try to validate the given address. The address must not be null and * any reserved fields must be zero, but the address family is not * checked. Note that this is needed because in some places we don't want * to re-register the address family with the Linux kernel. * * Also note that we duplicate the code from _Validate() since we want to * retain the ordering or the error return values. * * Results: * 0 on success, EFAULT if the address is null and EINVAL if the reserved * fields are not zero. * * Side effects: * None. * *----------------------------------------------------------------------------- */ int32 VSockAddr_ValidateNoFamily(const struct sockaddr_vm *addr) // IN { int32 err; if (NULL == addr) { err = EFAULT; goto exit; } if (0 != addr->svm_zero[0]) { err = EINVAL; goto exit; } err = 0; exit: return sockerr2err(err); } /* *---------------------------------------------------------------------------- * * VSockAddr_Bound -- * * Determines whether the provided address is bound. * * Results: * TRUE if the address structure is bound, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ Bool VSockAddr_Bound(struct sockaddr_vm *addr) // IN: socket address to check { ASSERT(addr); return addr->svm_cid != VMADDR_CID_ANY && addr->svm_port != VMADDR_PORT_ANY; } /* *---------------------------------------------------------------------------- * * VSockAddr_Unbind -- * * Unbind the given addresss. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------------- */ void VSockAddr_Unbind(struct sockaddr_vm *addr) // IN { VSockAddr_Init(addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); } /* *---------------------------------------------------------------------------- * * VSockAddr_EqualsAddr -- * * Determine if the given addresses are equal. * * Results: * TRUE if the addresses are equal, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ Bool VSockAddr_EqualsAddr(struct sockaddr_vm *addr, // IN struct sockaddr_vm *other) // IN { /* * XXX We don't ASSERT on the family here since this is used on the receive * path in Linux and we don't want to re-register the address family * unnecessarily. */ VSOCK_ADDR_NOFAMILY_ASSERT(addr); VSOCK_ADDR_NOFAMILY_ASSERT(other); return (addr->svm_cid == other->svm_cid && addr->svm_port == other->svm_port); } /* *---------------------------------------------------------------------------- * * VSockAddr_EqualsHandlePort -- * * Determines if the given address matches the given handle and port. * * Results: * TRUE if the address matches the handle and port, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ Bool VSockAddr_EqualsHandlePort(struct sockaddr_vm *addr, // IN VMCIHandle handle, // IN uint32 port) // IN { VSOCK_ADDR_ASSERT(addr); return (addr->svm_cid == VMCI_HANDLE_TO_CONTEXT_ID(handle) && addr->svm_port == port); } /* *----------------------------------------------------------------------------- * * VSockAddr_Cast -- * * Try to cast the given generic address to a VM address. The given * length must match that of a VM address and the address must be valid. * The "outAddr" parameter contains the address if successful. * * Results: * 0 on success, EFAULT if the length is too small. See * VSockAddr_Validate() for other possible return codes. * * Side effects: * None. * *----------------------------------------------------------------------------- */ int32 VSockAddr_Cast(const struct sockaddr *addr, // IN int32 len, // IN struct sockaddr_vm **outAddr) // OUT { int32 err; ASSERT(outAddr); if (len < sizeof **outAddr) { err = EFAULT; goto exit; } *outAddr = (struct sockaddr_vm *) addr; err = VSockAddr_Validate(*outAddr); exit: return sockerr2err(err); } /* *---------------------------------------------------------------------------- * * VSockAddr_SocketContext -- * * Determines whether the provided context id represents a context that * contains socket endpoints. * * Results: * TRUE if the context does have socket endpoints, FALSE otherwise. * * Side effects: * None. * *---------------------------------------------------------------------------- */ Bool VSockAddr_SocketContext(uint32 cid) // IN { uint32 i; VMCIId nonSocketContexts[] = { VMCI_HYPERVISOR_CONTEXT_ID, VMCI_WELL_KNOWN_CONTEXT_ID, }; ASSERT_ON_COMPILE(sizeof cid == sizeof *nonSocketContexts); for (i = 0; i < ARRAYSIZE(nonSocketContexts); i++) { if (cid == nonSocketContexts[i]) { return FALSE; } } return TRUE; } vsock-only/linux/driverLog.c0000444000000000000000000001111212025726714015120 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * driverLog.c -- * * Common logging functions for Linux kernel modules. */ #include "driver-config.h" #include "compat_kernel.h" #include "compat_sched.h" #include #include "driverLog.h" #define LINUXLOG_BUFFER_SIZE 1024 static const char *driverLogPrefix = ""; /* * vsnprintf was born in 2.4.10. Fall back on vsprintf if we're * an older kernel. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 10) # define vsnprintf(str, size, fmt, args) vsprintf(str, fmt, args) #endif /* *---------------------------------------------------------------------------- * * DriverLog_Init -- * * Initializes the Linux logging. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------------- */ void DriverLog_Init(const char *prefix) // IN { driverLogPrefix = prefix ? prefix : ""; } /* *---------------------------------------------------------------------- * * DriverLogPrint -- * * Log error message from a Linux module. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------- */ static void DriverLogPrint(const char *level, // IN: KERN_* constant const char *fmt, // IN: error format string va_list args) // IN: arguments for format string { static char staticBuf[LINUXLOG_BUFFER_SIZE]; char stackBuf[128]; va_list args2; const char *buf; /* * By default, use a small buffer on the stack (thread safe). If it is too * small, fall back to a larger static buffer (not thread safe). */ va_copy(args2, args); if (vsnprintf(stackBuf, sizeof stackBuf, fmt, args2) < sizeof stackBuf) { buf = stackBuf; } else { vsnprintf(staticBuf, sizeof staticBuf, fmt, args); buf = staticBuf; } va_end(args2); printk("%s%s[%d]: %s", level, driverLogPrefix, current->pid, buf); } /* *---------------------------------------------------------------------- * * Warning -- * * Warning messages from kernel module: logged into kernel log * as warnings. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------- */ void Warning(const char *fmt, ...) // IN: warning format string { va_list args; va_start(args, fmt); DriverLogPrint(KERN_WARNING, fmt, args); va_end(args); } /* *---------------------------------------------------------------------- * * Log -- * * Log messages from kernel module: logged into kernel log * as debug information. * * Results: * None. * * Side effects: * None. * *---------------------------------------------------------------------- */ void Log(const char *fmt, ...) // IN: log format string { va_list args; /* * Use the kernel log with at least a KERN_DEBUG level * so it doesn't garbage the screen at (re)boot time on RedHat 6.0. */ va_start(args, fmt); DriverLogPrint(KERN_DEBUG, fmt, args); va_end(args); } /* *---------------------------------------------------------------------- * * Panic -- * * ASSERTION failures and Panics from kernel module get here. * Message is logged to the kernel log and on console. * * Results: * None. * * Side effects: * Never returns * *---------------------------------------------------------------------- */ void Panic(const char *fmt, ...) // IN: panic format string { va_list args; va_start(args, fmt); DriverLogPrint(KERN_EMERG, fmt, args); va_end(args); #ifdef BUG BUG(); #else /* Should die with %cs unwritable, or at least with page fault. */ asm volatile("movb $0, %cs:(0)"); #endif while (1); } vsock-only/linux/driverLog.h0000444000000000000000000000223212025726714015130 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * driverLog.h -- * * Logging functions for Linux kernel modules. */ #ifndef __DRIVERLOG_H__ #define __DRIVERLOG_H__ /* * The definitions of Warning(), Log(), and Panic() come from vm_assert.h for * consistency. */ #include "vm_assert.h" void DriverLog_Init(const char *prefix); #endif /* __DRIVERLOG_H__ */ vsock-only/linux/vmci_sockets.h0000444000000000000000000001730512025726714015673 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vmci_sockets.h -- * * VMCI sockets public constants and types. */ #ifndef _VMCI_SOCKETS_H_ #define _VMCI_SOCKETS_H_ #if defined(_WIN32) # include #else // _WIN32 #if defined(linux) && !defined(VMKERNEL) # if defined(__KERNEL__) # include "driver-config.h" # include "compat_sock.h" # else # include # endif // __KERNEL__ #endif // linux && !VMKERNEL #endif /* * We use the same value for the AF family and the socket option * level. To set options, use the value of VMCISock_GetAFValue for * 'level' and these constants for the optname. */ #define SO_VMCI_BUFFER_SIZE 0 #define SO_VMCI_BUFFER_MIN_SIZE 1 #define SO_VMCI_BUFFER_MAX_SIZE 2 /* * The VMCI sockets address equivalents of INADDR_ANY. The first works for * the svm_cid (context id) field of the address structure below and indicates * the current guest (or the host, if running outside a guest), while the * second indicates any available port. */ #define VMADDR_CID_ANY ((unsigned int) -1) #define VMADDR_PORT_ANY ((unsigned int) -1) #if defined(_WIN32) || defined(VMKERNEL) typedef unsigned short sa_family_t; #endif // _WIN32 #if defined(VMKERNEL) struct sockaddr { sa_family_t sa_family; char sa_data[14]; }; #endif /* * Address structure for VSockets VMCI sockets. The address family should be * set to AF_VMCI. */ struct sockaddr_vm { sa_family_t svm_family; // AF_VMCI. unsigned short svm_reserved1; // Reserved. unsigned int svm_port; // Port. unsigned int svm_cid; // Context id. unsigned char svm_zero[sizeof(struct sockaddr) - // Same size as sockaddr. sizeof(sa_family_t) - sizeof(unsigned short) - sizeof(unsigned int) - sizeof(unsigned int)]; }; #if defined(_WIN32) # if !defined(WINNT_DDK) # include # define VMCI_SOCKETS_DEVICE TEXT("\\\\.\\VMCI") # define VMCI_SOCKETS_GET_AF_VALUE 0x81032068 # define VMCI_SOCKETS_GET_LOCAL_CID 0x8103206c static __inline int VMCISock_GetAFValue(void) { HANDLE device = CreateFile(VMCI_SOCKETS_DEVICE, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_FLAG_OVERLAPPED, NULL); if (INVALID_HANDLE_VALUE != device) { DWORD ioReturn; int afvalue; if (DeviceIoControl(device, VMCI_SOCKETS_GET_AF_VALUE, &afvalue, sizeof afvalue, &afvalue, sizeof afvalue, &ioReturn, NULL)) { CloseHandle(device); device = INVALID_HANDLE_VALUE; return afvalue; } CloseHandle(device); device = INVALID_HANDLE_VALUE; } return -1; } static __inline unsigned int VMCISock_GetLocalCID(void) { HANDLE device = CreateFile(VMCI_SOCKETS_DEVICE, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_FLAG_OVERLAPPED, NULL); if (INVALID_HANDLE_VALUE != device) { DWORD ioReturn; unsigned int cid; if (DeviceIoControl(device, VMCI_SOCKETS_GET_LOCAL_CID, &cid, sizeof cid, &cid, sizeof cid, &ioReturn, NULL)) { CloseHandle(device); device = INVALID_HANDLE_VALUE; return cid; } CloseHandle(device); device = INVALID_HANDLE_VALUE; } return VMADDR_CID_ANY; } # endif // WINNT_DDK #else // _WIN32 #if defined(linux) && !defined(VMKERNEL) # ifndef __KERNEL__ # include # include # include # include # include # include # define VMCI_SOCKETS_DEFAULT_DEVICE "/dev/vsock" # define VMCI_SOCKETS_CLASSIC_ESX_DEVICE "/vmfs/devices/char/vsock/vsock" # define IOCTL_VMCI_SOCKETS_GET_AF_VALUE 1976 # define IOCTL_VMCI_SOCKETS_GET_LOCAL_CID 1977 /* *---------------------------------------------------------------------------- * * VMCISock_GetAFValue and VMCISock_GetAFValueFd -- * * Returns the value to be used for the VMCI Sockets address family. * This value should be used as the domain argument to socket(2) (when * you might otherwise use AF_INET). For VMCI Socket-specific options, * this value should also be used for the level argument to * setsockopt(2) (when you might otherwise use SOL_TCP). * * This function leaves its descriptor to the vsock device open so that * the socket implementation knows that the socket family is still in * use. We do this because we register our address family with the * kernel on-demand and need a notification to unregister the address * family. * * For many programs this behavior is sufficient as is, but some may * wish to close this descriptor once they are done with VMCI Sockets. * For these programs, we provide a VMCISock_GetAFValueFd() that takes * an optional outFd argument. This value can be provided to * VMCISock_ReleaseAFValueFd() only after the program no longer will * use VMCI Sockets. Note that outFd is only valid in cases where * VMCISock_GetAFValueFd() returns a non-negative value. * * Results: * The address family value to use on success, negative error code on * failure. * *---------------------------------------------------------------------------- */ static inline int VMCISock_GetAFValueFd(int *outFd) { int fd; int family; fd = open(VMCI_SOCKETS_DEFAULT_DEVICE, O_RDWR); if (fd < 0) { fd = open(VMCI_SOCKETS_CLASSIC_ESX_DEVICE, O_RDWR); if (fd < 0) { return -1; } } if (ioctl(fd, IOCTL_VMCI_SOCKETS_GET_AF_VALUE, &family) < 0) { family = -1; } if (family < 0) { close(fd); } else if (outFd) { *outFd = fd; } return family; } static inline int VMCISock_GetAFValue(void) { return VMCISock_GetAFValueFd(NULL); } static inline void VMCISock_ReleaseAFValueFd(int fd) { if (fd >= 0) { close(fd); } } static inline unsigned int VMCISock_GetLocalCID(void) { int fd; unsigned int contextId; fd = open(VMCI_SOCKETS_DEFAULT_DEVICE, O_RDWR); if (fd < 0) { fd = open(VMCI_SOCKETS_CLASSIC_ESX_DEVICE, O_RDWR); if (fd < 0) { return VMADDR_CID_ANY; } } if (ioctl(fd, IOCTL_VMCI_SOCKETS_GET_LOCAL_CID, &contextId) < 0) { contextId = VMADDR_CID_ANY; } close(fd); return contextId; } # endif // __KERNEL__ #endif // linux && !VMKERNEL #endif // _WIN32 #endif // _VMCI_SOCKETS_H_ vsock-only/include/0000755000000000000000000000000012025726714013311 5ustar rootrootvsock-only/include/vmware.h0000444000000000000000000000354112025726714014764 0ustar rootroot/********************************************************* * Copyright (C) 2003 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vmware.h -- * * Standard include file for VMware source code. */ #ifndef _VMWARE_H_ #define _VMWARE_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMNIXMOD #define INCLUDE_ALLOW_VMKERNEL #define INCLUDE_ALLOW_VMK_MODULE #define INCLUDE_ALLOW_DISTRIBUTE #include "includeCheck.h" #include "vm_basic_types.h" #include "vm_basic_defs.h" #include "vm_assert.h" /* * Global error codes. Currently used internally, but may be exported * to customers one day, like VM_E_XXX in vmcontrol_constants.h */ typedef enum VMwareStatus { VMWARE_STATUS_SUCCESS, /* success */ VMWARE_STATUS_ERROR, /* generic error */ VMWARE_STATUS_NOMEM, /* generic memory allocation error */ VMWARE_STATUS_INSUFFICIENT_RESOURCES, /* internal or system resource limit exceeded */ VMWARE_STATUS_INVALID_ARGS /* invalid arguments */ } VMwareStatus; #define VMWARE_SUCCESS(s) ((s) == VMWARE_STATUS_SUCCESS) #endif // ifndef _VMWARE_H_ vsock-only/include/vm_assert.h0000444000000000000000000002422012025726714015463 0ustar rootroot/********************************************************* * Copyright (C) 1998-2004 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vm_assert.h -- * * The basic assertion facility for all VMware code. * * For proper use, see * http://vmweb.vmware.com/~mts/WebSite/guide/programming/asserts.html */ #ifndef _VM_ASSERT_H_ #define _VM_ASSERT_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMEXT #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMNIXMOD #define INCLUDE_ALLOW_VMKERNEL #define INCLUDE_ALLOW_VMKDRIVERS #define INCLUDE_ALLOW_VMK_MODULE #define INCLUDE_ALLOW_DISTRIBUTE #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMIROM #include "includeCheck.h" // XXX not necessary except some places include vm_assert.h improperly #include "vm_basic_types.h" #include "vm_basic_defs.h" /* * XXX old file code */ #ifdef FILECODEINT #error "Don't define FILECODEINT. It is obsolete." #endif #ifdef FILECODE #error "Don't define FILECODE. It is obsolete." #endif /* * Panic and log functions */ EXTERN void Log(const char *fmt, ...) PRINTF_DECL(1, 2); EXTERN void Warning(const char *fmt, ...) PRINTF_DECL(1, 2); EXTERN NORETURN void Panic(const char *fmt, ...) PRINTF_DECL(1, 2); EXTERN void LogThrottled(uint32 *count, const char *fmt, ...) PRINTF_DECL(2, 3); EXTERN void WarningThrottled(uint32 *count, const char *fmt, ...) PRINTF_DECL(2, 3); /* DB family: messages which are parsed by logfile database system */ #define WarningDB Warning #define LogDB Log #define WarningThrottledDB WarningThrottled #define LogThrottledDB LogThrottled /* * Stress testing: redefine ASSERT_IFNOT() to taste */ #ifndef ASSERT_IFNOT #ifdef __cplusplus #define ASSERT_IFNOT(cond, panic) (UNLIKELY(!(cond)) ? (panic) : (void)0) #else #define ASSERT_IFNOT(cond, panic) (UNLIKELY(!(cond)) ? (panic) : 0) #endif #endif /* * Assert, panic, and log macros * * Some of these are redefined below undef !VMX86_DEBUG. * ASSERT() is special cased because of interaction with Windows DDK. */ #if defined VMX86_DEBUG || defined ASSERT_ALWAYS_AVAILABLE #undef ASSERT #define ASSERT(cond) \ ASSERT_IFNOT(cond, _ASSERT_PANIC(AssertAssert)) #endif #define ASSERT_BUG(bug, cond) \ ASSERT_IFNOT(cond, _ASSERT_PANIC_BUG(bug, AssertAssert)) #define ASSERT_BUG_DEBUGONLY(bug, cond) ASSERT_BUG(bug, cond) #define PANIC() _ASSERT_PANIC(AssertPanic) #define PANIC_BUG(bug) _ASSERT_PANIC_BUG(bug, AssertPanic) #define ASSERT_NOT_IMPLEMENTED(cond) \ ASSERT_IFNOT(cond, NOT_IMPLEMENTED()) #define ASSERT_NOT_IMPLEMENTED_BUG(bug, cond) \ ASSERT_IFNOT(cond, NOT_IMPLEMENTED_BUG(bug)) #define NOT_IMPLEMENTED() _ASSERT_PANIC(AssertNotImplemented) #define NOT_IMPLEMENTED_BUG(bug) _ASSERT_PANIC_BUG(bug, AssertNotImplemented) #define NOT_REACHED() _ASSERT_PANIC(AssertNotReached) #define NOT_REACHED_BUG(bug) _ASSERT_PANIC_BUG(bug, AssertNotReached) #define ASSERT_MEM_ALLOC(cond) \ ASSERT_IFNOT(cond, _ASSERT_PANIC(AssertMemAlloc)) #ifdef VMX86_DEVEL #define ASSERT_LENGTH(real, expected) \ ASSERT_IFNOT((real) == (expected), \ Panic(AssertLengthFmt, __FILE__, __LINE__, real, expected)) #else #define ASSERT_LENGTH(real, expected) ASSERT((real) == (expected)) #endif #ifdef VMX86_DEVEL #define ASSERT_DEVEL(cond) ASSERT(cond) #else #define ASSERT_DEVEL(cond) ((void) 0) #endif #define ASSERT_NO_INTERRUPTS() ASSERT(!INTERRUPTS_ENABLED()) #define ASSERT_HAS_INTERRUPTS() ASSERT(INTERRUPTS_ENABLED()) #define ASSERT_LOG_UNEXPECTED(bug, cond) \ (UNLIKELY(!(cond)) ? LOG_UNEXPECTED(bug) : 0) #ifdef VMX86_DEVEL #define LOG_UNEXPECTED(bug) \ Warning(AssertUnexpectedFmt, __FILE__, __LINE__, bug) #else #define LOG_UNEXPECTED(bug) \ Log(AssertUnexpectedFmt, __FILE__, __LINE__, bug) #endif #define ASSERT_NOT_TESTED(cond) (UNLIKELY(!(cond)) ? NOT_TESTED() : 0) #ifdef VMX86_DEVEL #define NOT_TESTED() Warning(AssertNotTestedFmt, __FILE__, __LINE__) #else #define NOT_TESTED() Log(AssertNotTestedFmt, __FILE__, __LINE__) #endif #define NOT_TESTED_ONCE() \ do { \ static Bool alreadyPrinted = FALSE; \ if (UNLIKELY(!alreadyPrinted)) { \ alreadyPrinted = TRUE; \ NOT_TESTED(); \ } \ } while (0) #define NOT_TESTED_1024() \ do { \ static uint16 count = 0; \ if (UNLIKELY(count == 0)) { NOT_TESTED(); } \ count = (count + 1) & 1023; \ } while (0) #define LOG_ONCE(_s) \ do { \ static Bool logged = FALSE; \ if (!logged) { \ Log _s; \ logged = TRUE; \ } \ } while (0) /* * Redefine macros that are only in debug versions */ #if !defined VMX86_DEBUG && !defined ASSERT_ALWAYS_AVAILABLE // { #undef ASSERT #define ASSERT(cond) ((void) 0) #undef ASSERT_BUG_DEBUGONLY #define ASSERT_BUG_DEBUGONLY(bug, cond) ((void) 0) #undef ASSERT_LENGTH #define ASSERT_LENGTH(real, expected) ((void) 0) /* * Expand NOT_REACHED() as appropriate for each situation. * * Mainly, we want the compiler to infer the same control-flow * information as it would from Panic(). Otherwise, different * compilation options will lead to different control-flow-derived * errors, causing some make targets to fail while others succeed. * * VC++ has the __assume() built-in function which we don't trust * (see bug 43485); gcc has no such construct; we just panic in * userlevel code. The monitor doesn't want to pay the size penalty * (measured at 212 bytes for the release vmm for a minimal infinite * loop; panic would cost even more) so it does without and lives * with the inconsistency. */ #ifdef VMM #undef NOT_REACHED #define NOT_REACHED() ((void) 0) #else // keep debug definition #endif #undef ASSERT_LOG_UNEXPECTED #define ASSERT_LOG_UNEXPECTED(bug, cond) ((void) 0) #undef LOG_UNEXPECTED #define LOG_UNEXPECTED(bug) ((void) 0) #undef ASSERT_NOT_TESTED #define ASSERT_NOT_TESTED(cond) ((void) 0) #undef NOT_TESTED #define NOT_TESTED() ((void) 0) #undef NOT_TESTED_ONCE #define NOT_TESTED_ONCE() ((void) 0) #undef NOT_TESTED_1024 #define NOT_TESTED_1024() ((void) 0) #endif // !VMX86_DEBUG } /* * Compile-time assertions. * * ASSERT_ON_COMPILE does not use the common * switch (0) { case 0: case (e): ; } trick because some compilers (e.g. MSVC) * generate code for it. * * The implementation uses both enum and typedef because the typedef alone is * insufficient; gcc allows arrays to be declared with non-constant expressions * (even in typedefs, where it makes no sense). */ #define ASSERT_ON_COMPILE(e) \ do { \ enum { AssertOnCompileMisused = ((e) ? 1 : -1) }; \ typedef char AssertOnCompileFailed[AssertOnCompileMisused]; \ } while (0) /* * To put an ASSERT_ON_COMPILE() outside a function, wrap it * in MY_ASSERTS(). The first parameter must be unique in * each .c file where it appears. For example, * * MY_ASSERTS(FS3_INT, * ASSERT_ON_COMPILE(sizeof(FS3_DiskLock) == 128); * ASSERT_ON_COMPILE(sizeof(FS3_DiskLockReserved) == DISK_BLOCK_SIZE); * ASSERT_ON_COMPILE(sizeof(FS3_DiskBlock) == DISK_BLOCK_SIZE); * ASSERT_ON_COMPILE(sizeof(Hardware_DMIUUID) == 16); * ) * * Caution: ASSERT() within MY_ASSERTS() is silently ignored. * The same goes for anything else not evaluated at compile time. */ #define MY_ASSERTS(name, assertions) \ static INLINE void name(void) { \ assertions \ } /* * Internal macros, functions, and strings * * The monitor wants to save space at call sites, so it has specialized * functions for each situation. User level wants to save on implementation * so it uses generic functions. */ #if !defined VMM || defined MONITOR_APP // { #define _ASSERT_PANIC(name) \ Panic(_##name##Fmt "\n", __FILE__, __LINE__) #define _ASSERT_PANIC_BUG(bug, name) \ Panic(_##name##Fmt " bugNr=%d\n", __FILE__, __LINE__, bug) #define AssertLengthFmt _AssertLengthFmt #define AssertUnexpectedFmt _AssertUnexpectedFmt #define AssertNotTestedFmt _AssertNotTestedFmt #endif // } // these don't have newline so a bug can be tacked on #define _AssertPanicFmt "PANIC %s:%d" #define _AssertAssertFmt "ASSERT %s:%d" #define _AssertNotImplementedFmt "NOT_IMPLEMENTED %s:%d" #define _AssertNotReachedFmt "NOT_REACHED %s:%d" #define _AssertMemAllocFmt "MEM_ALLOC %s:%d" // these are complete formats with newline #define _AssertLengthFmt "LENGTH %s:%d r=%#x e=%#x\n" #define _AssertUnexpectedFmt "UNEXPECTED %s:%d bugNr=%d\n" #define _AssertNotTestedFmt "NOT_TESTED %s:%d\n" #endif /* ifndef _VM_ASSERT_H_ */ vsock-only/include/vm_basic_defs.h0000444000000000000000000003156512025726714016256 0ustar rootroot/********************************************************* * Copyright (C) 2003 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vm_basic_defs.h -- * * Standard macros for VMware source code. */ #ifndef _VM_BASIC_DEFS_H_ #define _VM_BASIC_DEFS_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMEXT #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMNIXMOD #define INCLUDE_ALLOW_VMKERNEL #define INCLUDE_ALLOW_VMKDRIVERS #define INCLUDE_ALLOW_VMK_MODULE #define INCLUDE_ALLOW_DISTRIBUTE #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMIROM #include "includeCheck.h" #include "vm_basic_types.h" // For INLINE. #if defined _WIN32 && defined USERLEVEL #include /* * We re-define offsetof macro from stddef, make * sure that its already defined before we do it */ #include // for Sleep() and LOWORD() etc. #endif /* * Simple macros */ #if defined __APPLE__ && !defined KERNEL # include #else // XXX the __cplusplus one matches that of VC++, to prevent redefinition warning // XXX the other one matches that of gcc3.3.3/glibc2.2.4 to prevent redefinition warnings #ifndef offsetof #ifdef __cplusplus #define offsetof(s,m) (size_t)&(((s *)0)->m) #else #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif #endif #endif // __APPLE__ #ifndef ARRAYSIZE #define ARRAYSIZE(a) (sizeof (a) / sizeof *(a)) #endif #ifndef MIN #define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) #endif /* The Solaris 9 cross-compiler complains about these not being used */ #ifndef sun static INLINE int Min(int a, int b) { return a < b ? a : b; } #endif #ifndef MAX #define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) #endif #ifndef sun static INLINE int Max(int a, int b) { return a > b ? a : b; } #endif #define ROUNDUP(x,y) (((x) + (y) - 1) / (y) * (y)) #define ROUNDDOWN(x,y) ((x) / (y) * (y)) #define ROUNDUPBITS(x, bits) (((uintptr_t) (x) + MASK(bits)) & ~MASK(bits)) #define ROUNDDOWNBITS(x, bits) ((uintptr_t) (x) & ~MASK(bits)) #define CEILING(x, y) (((x) + (y) - 1) / (y)) #if defined __APPLE__ #include #undef MASK #endif #define MASK(n) ((1 << (n)) - 1) /* make an n-bit mask */ #define DWORD_ALIGN(x) ((((x)+3) >> 2) << 2) #define QWORD_ALIGN(x) ((((x)+4) >> 3) << 3) #define IMPLIES(a,b) (!(a) || (b)) /* * Not everybody (e.g., the monitor) has NULL */ #ifndef NULL #ifdef __cplusplus #define NULL 0 #else #define NULL ((void *)0) #endif #endif /* * Token concatenation * * The C preprocessor doesn't prescan arguments when they are * concatenated or stringified. So we need extra levels of * indirection to convince the preprocessor to expand its * arguments. */ #define CONC(x, y) x##y #define XCONC(x, y) CONC(x, y) #define XXCONC(x, y) XCONC(x, y) #define MAKESTR(x) #x #define XSTR(x) MAKESTR(x) /* * Page operations * * It has been suggested that these definitions belong elsewhere * (like x86types.h). However, I deem them common enough * (since even regular user-level programs may want to do * page-based memory manipulation) to be here. * -- edward */ #ifndef PAGE_SHIFT // { #if defined VM_I386 #define PAGE_SHIFT 12 #elif defined __APPLE__ #define PAGE_SHIFT 12 #else #error #endif #endif // } #ifndef PAGE_SIZE #define PAGE_SIZE (1<> PAGE_SHIFT) #endif #ifndef BYTES_2_PAGES #define BYTES_2_PAGES(_nbytes) ((_nbytes) >> PAGE_SHIFT) #endif #ifndef PAGES_2_BYTES #define PAGES_2_BYTES(_npages) (((uint64)(_npages)) << PAGE_SHIFT) #endif #ifndef MBYTES_2_PAGES #define MBYTES_2_PAGES(_nbytes) ((_nbytes) << (20 - PAGE_SHIFT)) #endif #ifndef PAGES_2_MBYTES #define PAGES_2_MBYTES(_npages) ((_npages) >> (20 - PAGE_SHIFT)) #endif #ifndef VM_PAE_LARGE_PAGE_SHIFT #define VM_PAE_LARGE_PAGE_SHIFT 21 #endif #ifndef VM_PAE_LARGE_PAGE_SIZE #define VM_PAE_LARGE_PAGE_SIZE (1 << VM_PAE_LARGE_PAGE_SHIFT) #endif #ifndef VM_PAE_LARGE_PAGE_MASK #define VM_PAE_LARGE_PAGE_MASK (VM_PAE_LARGE_PAGE_SIZE - 1) #endif #ifndef VM_PAE_LARGE_2_SMALL_PAGES #define VM_PAE_LARGE_2_SMALL_PAGES (BYTES_2_PAGES(VM_PAE_LARGE_PAGE_SIZE)) #endif /* * Word operations */ #ifndef LOWORD #define LOWORD(_dw) ((_dw) & 0xffff) #endif #ifndef HIWORD #define HIWORD(_dw) (((_dw) >> 16) & 0xffff) #endif #ifndef LOBYTE #define LOBYTE(_w) ((_w) & 0xff) #endif #ifndef HIBYTE #define HIBYTE(_w) (((_w) >> 8) & 0xff) #endif #define HIDWORD(_qw) ((uint32)((_qw) >> 32)) #define LODWORD(_qw) ((uint32)(_qw)) #define QWORD(_hi, _lo) ((((uint64)(_hi)) << 32) | ((uint32)(_lo))) /* * Deposit a field _src at _pos bits from the right, * with a length of _len, into the integer _target. */ #define DEPOSIT_BITS(_src,_pos,_len,_target) { \ unsigned mask = ((1 << _len) - 1); \ unsigned shiftedmask = ((1 << _len) - 1) << _pos; \ _target = (_target & ~shiftedmask) | ((_src & mask) << _pos); \ } /* * Get return address. */ #ifdef _MSC_VER #ifdef __cplusplus extern "C" #endif void *_ReturnAddress(void); #pragma intrinsic(_ReturnAddress) #define GetReturnAddress() _ReturnAddress() #elif __GNUC__ #define GetReturnAddress() __builtin_return_address(0) #endif #ifdef __GNUC__ #ifndef sun /* * Get the frame pointer. We use this assembly hack instead of * __builtin_frame_address() due to a bug introduced in gcc 4.1.1 */ static INLINE_SINGLE_CALLER uintptr_t GetFrameAddr(void) { uintptr_t bp; #if (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)) bp = (uintptr_t)__builtin_frame_address(0); #elif (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 3) # if defined(VMM64) || defined(VM_X86_64) __asm__ __volatile__("movq %%rbp, %0\n" : "=g" (bp)); # else __asm__ __volatile__("movl %%ebp, %0\n" : "=g" (bp)); # endif #else __asm__ __volatile__( #ifdef __linux__ ".print \"This newer version of GCC may or may not have the " "__builtin_frame_address bug. Need to update this. " "See bug 147638.\"\n" ".abort" #else /* MacOS */ ".abort \"This newer version of GCC may or may not have the " "__builtin_frame_address bug. Need to update this. " "See bug 147638.\"\n" #endif : "=g" (bp) ); #endif return bp; } /* * Returns the frame pointer of the calling function. * Equivalent to __builtin_frame_address(1). */ static INLINE_SINGLE_CALLER uintptr_t GetCallerFrameAddr(void) { return *(uintptr_t*)GetFrameAddr(); } #endif // sun #endif // __GNUC__ /* * Data prefetch was added in gcc 3.1.1 * http://www.gnu.org/software/gcc/gcc-3.1/changes.html */ #ifdef __GNUC__ # if ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 1) || \ (__GNUC__ == 3 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ >= 1)) # define PREFETCH_R(var) __builtin_prefetch((var), 0 /* read */, \ 3 /* high temporal locality */) # define PREFETCH_W(var) __builtin_prefetch((var), 1 /* write */, \ 3 /* high temporal locality */) # else # define PREFETCH_R(var) ((void)(var)) # define PREFETCH_W(var) ((void)(var)) # endif #endif /* __GNUC__ */ #ifdef USERLEVEL // { /* * Note this might be a problem on NT b/c while sched_yield guarantees it * moves you to the end of your priority list, Sleep(0) offers no such * guarantee. Bummer. --Jeremy. */ #if defined(N_PLAT_NLM) || defined(__FreeBSD__) /* We do not have YIELD() as we do not need it yet... */ #elif defined(_WIN32) # define YIELD() Sleep(0) #else # include // For sched_yield. Don't ask. --Jeremy. # define YIELD() sched_yield() #endif /* * Standardize some Posix names on Windows. */ #ifdef _WIN32 // { #define snprintf _snprintf #define vsnprintf _vsnprintf #define strtok_r strtok_s static INLINE void sleep(unsigned int sec) { Sleep(sec * 1000); } static INLINE void usleep(unsigned long usec) { Sleep(CEILING(usec, 1000)); } typedef int pid_t; #define F_OK 0 #define X_OK 1 #define W_OK 2 #define R_OK 4 #endif // } /* * Macro for username comparison. */ #ifdef _WIN32 // { #define USERCMP(x,y) Str_Strcasecmp(x,y) #else #define USERCMP(x,y) strcmp(x,y) #endif // } #endif // } #ifndef va_copy #ifdef _WIN32 /* * Windows needs va_copy. This works for both 32 and 64-bit Windows * based on inspection of how varags.h from the Visual C CRTL is * implemented. (Future versions of the RTL may break this). */ #define va_copy(dest, src) ((dest) = (src)) #elif defined(__APPLE__) && defined(KERNEL) /* * MacOS kernel-mode needs va_copy. Based on inspection of stdarg.h * from the MacOSX10.4u.sdk kernel framework, this should work. * (Future versions of the SDK may break this). */ #define va_copy(dest, src) ((dest) = (src)) #elif defined(__GNUC__) && (__GNUC__ < 3) /* * Old versions of gcc recognize __va_copy, but not va_copy. */ #define va_copy(dest, src) __va_copy(dest, src) #endif // _WIN32 #endif // va_copy /* * This one is outside USERLEVEL because it's used by * files compiled into the Windows hgfs driver or the display * driver. */ #ifdef _WIN32 #define PATH_MAX 256 #ifndef strcasecmp #define strcasecmp(_s1,_s2) _stricmp((_s1),(_s2)) #endif #ifndef strncasecmp #define strncasecmp(_s1,_s2,_n) _strnicmp((_s1),(_s2),(_n)) #endif #endif /* * Convenience macro for COMMUNITY_SOURCE */ #undef EXCLUDE_COMMUNITY_SOURCE #ifdef COMMUNITY_SOURCE #define EXCLUDE_COMMUNITY_SOURCE(x) #else #define EXCLUDE_COMMUNITY_SOURCE(x) x #endif #undef COMMUNITY_SOURCE_INTEL_SECRET #if !defined(COMMUNITY_SOURCE) || defined(INTEL_SOURCE) /* * It's ok to include INTEL_SECRET source code for non-commsrc, * or for drops directed at Intel. */ #define COMMUNITY_SOURCE_INTEL_SECRET #endif /* * Convenience macros and definitions. Can often be used instead of #ifdef. */ #undef DEBUG_ONLY #undef SL_DEBUG_ONLY #undef VMX86_SL_DEBUG #ifdef VMX86_DEBUG #define vmx86_debug 1 #define DEBUG_ONLY(x) x /* * Be very, very, very careful with SL_DEBUG. Pls ask ganesh or min before * using it. */ #define VMX86_SL_DEBUG #define vmx86_sl_debug 1 #define SL_DEBUG_ONLY(x) x #else #define vmx86_debug 0 #define DEBUG_ONLY(x) #define vmx86_sl_debug 0 #define SL_DEBUG_ONLY(x) #endif #ifdef VMX86_STATS #define vmx86_stats 1 #define STATS_ONLY(x) x #else #define vmx86_stats 0 #define STATS_ONLY(x) #endif #ifdef VMX86_DEVEL #define vmx86_devel 1 #define DEVEL_ONLY(x) x #else #define vmx86_devel 0 #define DEVEL_ONLY(x) #endif #ifdef VMX86_LOG #define vmx86_log 1 #define LOG_ONLY(x) x #else #define vmx86_log 0 #define LOG_ONLY(x) #endif #ifdef VMX86_VMM_SERIAL_LOGGING #define vmx86_vmm_serial_log 1 #define VMM_SERIAL_LOG_ONLY(x) x #else #define vmx86_vmm_serial_log 0 #define VMM_SERIAL_LOG_ONLY(x) #endif #ifdef VMX86_SERVER #define vmx86_server 1 #define SERVER_ONLY(x) x #define HOSTED_ONLY(x) #else #define vmx86_server 0 #define SERVER_ONLY(x) #define HOSTED_ONLY(x) x #endif #ifdef VMX86_WGS #define vmx86_wgs 1 #define WGS_ONLY(x) x #else #define vmx86_wgs 0 #define WGS_ONLY(x) #endif #ifdef VMKERNEL #define vmkernel 1 #define VMKERNEL_ONLY(x) x #else #define vmkernel 0 #define VMKERNEL_ONLY(x) #endif #ifdef _WIN32 #define WIN32_ONLY(x) x #define POSIX_ONLY(x) #else #define WIN32_ONLY(x) #define POSIX_ONLY(x) x #endif #ifdef VMM #define VMM_ONLY(x) x #define USER_ONLY(x) #else #define VMM_ONLY(x) #define USER_ONLY(x) x #endif /* VMVISOR ifdef only allowed in the vmkernel */ #ifdef VMKERNEL #ifdef VMVISOR #define vmvisor 1 #define VMVISOR_ONLY(x) x #else #define vmvisor 0 #define VMVISOR_ONLY(x) #endif #endif #ifdef _WIN32 #define VMW_INVALID_HANDLE INVALID_HANDLE_VALUE #else #define VMW_INVALID_HANDLE -1 #endif #endif // ifndef _VM_BASIC_DEFS_H_ vsock-only/include/vm_basic_types.h0000444000000000000000000005624212025726714016500 0ustar rootroot/********************************************************* * Copyright (C) 1998-2008 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * * vm_basic_types.h -- * * basic data types. */ #ifndef _VM_BASIC_TYPES_H_ #define _VM_BASIC_TYPES_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMEXT #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMNIXMOD #define INCLUDE_ALLOW_VMKERNEL #define INCLUDE_ALLOW_VMKDRIVERS #define INCLUDE_ALLOW_VMK_MODULE #define INCLUDE_ALLOW_DISTRIBUTE #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMIROM #include "includeCheck.h" /* STRICT ANSI means the Xserver build and X defines Bool differently. */ #if !defined(__STRICT_ANSI__) || defined(__FreeBSD__) typedef char Bool; #endif #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif #define IsBool(x) (((x) & ~1) == 0) #define IsBool2(x, y) ((((x) | (y)) & ~1) == 0) /* * Macros __i386__ and __ia64 are intrinsically defined by GCC */ #ifdef __i386__ #define VM_I386 #endif #ifdef _WIN64 #define __x86_64__ #endif #ifdef __x86_64__ #define VM_X86_64 #define VM_I386 #define vm_x86_64 (1) #else #define vm_x86_64 (0) #endif #ifdef _WIN32 /* safe assumption for a while */ #define VM_I386 #endif #ifdef _MSC_VER typedef unsigned __int64 uint64; typedef signed __int64 int64; #pragma warning (3 :4505) // unreferenced local function #pragma warning (disable :4018) // signed/unsigned mismatch #pragma warning (disable :4761) // integral size mismatch in argument; conversion supplied #pragma warning (disable :4305) // truncation from 'const int' to 'short' #pragma warning (disable :4244) // conversion from 'unsigned short' to 'unsigned char' #pragma warning (disable :4267) // truncation of 'size_t' #if !defined VMX86_DEVEL // XXX until we clean up all the code -- edward #pragma warning (disable :4133) // incompatible types - from 'struct VM *' to 'int *' #pragma warning (disable :4047) // differs in levels of indirection #endif #pragma warning (disable :4146) // unary minus operator applied to unsigned type, result still unsigned #pragma warning (disable :4142) // benign redefinition of type #elif __GNUC__ /* The Xserver source compiles with -ansi -pendantic */ #ifndef __STRICT_ANSI__ #if defined(VM_X86_64) typedef unsigned long uint64; typedef long int64; #else typedef unsigned long long uint64; typedef long long int64; #endif #elif __FreeBSD__ typedef unsigned long long uint64; typedef long long int64; #endif #else #error - Need compiler define for int64/uint64 #endif typedef unsigned int uint32; typedef unsigned short uint16; typedef unsigned char uint8; typedef int int32; typedef short int16; typedef char int8; /* * FreeBSD (for the tools build) unconditionally defines these in * sys/inttypes.h so don't redefine them if this file has already * been included. [greg] * * This applies to Solaris as well. */ /* * Before trying to do the includes based on OS defines, see if we can use * feature-based defines to get as much functionality as possible */ #ifdef HAVE_INTTYPES_H #include #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_INTTYPES_H #include #endif #ifdef HAVE_STDINT_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #if !defined(USING_AUTOCONF) # if defined(__FreeBSD__) || defined(sun) # ifdef KLD_MODULE # include # else # if (BSD_VERSION >= 50) # include # include # else # include # endif # endif # elif defined __APPLE__ # if KERNEL # include # include /* mostly for size_t */ # include # else # include # include # include # include # endif # else # if !defined(__intptr_t_defined) && !defined(intptr_t) # define __intptr_t_defined # define intptr_t intptr_t # ifdef VM_I386 # ifdef VM_X86_64 typedef int64 intptr_t; # else typedef int32 intptr_t; # endif # endif # endif # ifndef _STDINT_H # ifdef VM_I386 # ifdef VM_X86_64 typedef uint64 uintptr_t; # else typedef uint32 uintptr_t; # endif # endif # endif # endif #endif /* * Time * XXX These should be cleaned up. -- edward */ typedef int64 VmTimeType; /* Time in microseconds */ typedef int64 VmTimeRealClock; /* Real clock kept in microseconds */ typedef int64 VmTimeVirtualClock; /* Virtual Clock kept in CPU cycles */ /* * Printf format specifiers for size_t and 64-bit number. * Use them like this: * printf("%"FMT64"d\n", big); * * FMTH is for handles/fds. */ #ifdef _MSC_VER #define FMT64 "I64" #ifdef VM_X86_64 #define FMTSZ "I64" #define FMTPD "I64" #define FMTH "I64" #else #define FMTSZ "I" #define FMTPD "I" #define FMTH "I" #endif #elif __GNUC__ #define FMTH "" #if defined(N_PLAT_NLM) || defined(sun) || \ (defined(__FreeBSD__) && (__FreeBSD__ + 0) && ((__FreeBSD__ + 0) < 5)) /* * Why (__FreeBSD__ + 0)? See bug 141008. * Yes, we really need to test both (__FreeBSD__ + 0) and * ((__FreeBSD__ + 0) < 5). No, we can't remove "+ 0" from * ((__FreeBSD__ + 0) < 5). */ #ifdef VM_X86_64 #define FMTSZ "l" #define FMTPD "l" #else #define FMTSZ "" #define FMTPD "" #endif #elif defined(__linux__) \ || (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) \ || (defined(_POSIX_VERSION) && _POSIX_VERSION >= 200112L) \ || (defined(_POSIX2_VERSION) && _POSIX2_VERSION >= 200112L) /* BSD/Darwin, Linux */ #define FMTSZ "z" #define FMTPD "t" #else /* Systems with a pre-C99 libc */ #define FMTSZ "Z" #ifdef VM_X86_64 #define FMTPD "l" #else #define FMTPD "" #endif #endif #ifdef VM_X86_64 #define FMT64 "l" #elif defined(sun) || defined(__APPLE__) || defined(__FreeBSD__) #define FMT64 "ll" #else #define FMT64 "L" #endif #else #error - Need compiler define for FMT64 and FMTSZ #endif /* * Suffix for 64-bit constants. Use it like this: * CONST64(0x7fffffffffffffff) for signed or * CONST64U(0x7fffffffffffffff) for unsigned. * * 2004.08.30(thutt): * The vmcore/asm64/gen* programs are compiled as 32-bit * applications, but must handle 64 bit constants. If the * 64-bit-constant defining macros are already defined, the * definition will not be overwritten. */ #if !defined(CONST64) || !defined(CONST64U) #ifdef _MSC_VER #define CONST64(c) c##I64 #define CONST64U(c) c##uI64 #elif __GNUC__ #ifdef VM_X86_64 #define CONST64(c) c##L #define CONST64U(c) c##uL #else #define CONST64(c) c##LL #define CONST64U(c) c##uLL #endif #else #error - Need compiler define for CONST64 #endif #endif /* * Use CONST3264/CONST3264U if you want a constant to be * treated as a 32-bit number on 32-bit compiles and * a 64-bit number on 64-bit compiles. Useful in the case * of shifts, like (CONST3264U(1) << x), where x could be * more than 31 on a 64-bit compile. */ #ifdef VM_X86_64 #define CONST3264(a) CONST64(a) #define CONST3264U(a) CONST64U(a) #else #define CONST3264(a) (a) #define CONST3264U(a) (a) #endif #define MIN_INT32 ((int32)0x80000000) #define MAX_INT32 ((int32)0x7fffffff) #define MIN_UINT32 ((uint32)0) #define MAX_UINT32 ((uint32)0xffffffff) #define MIN_INT64 (CONST64(0x8000000000000000)) #define MAX_INT64 (CONST64(0x7fffffffffffffff)) #define MIN_UINT64 (CONST64U(0)) #define MAX_UINT64 (CONST64U(0xffffffffffffffff)) typedef uint8 *TCA; /* Pointer into TC (usually). */ /* * Type big enough to hold an integer between 0..100 */ typedef uint8 Percent; #define AsPercent(v) ((Percent)(v)) #define CHOOSE_PERCENT AsPercent(101) typedef uintptr_t VA; typedef uintptr_t VPN; typedef uint64 PA; typedef uint32 PPN; typedef uint64 PhysMemOff; typedef uint64 PhysMemSize; /* The Xserver source compiles with -ansi -pendantic */ #ifndef __STRICT_ANSI__ typedef uint64 BA; #endif typedef uint32 BPN; typedef uint32 PageNum; typedef unsigned MemHandle; typedef int32 World_ID; #define INVALID_WORLD_ID ((World_ID)0) typedef World_ID User_CartelID; #define INVALID_CARTEL_ID INVALID_WORLD_ID typedef User_CartelID User_SessionID; #define INVALID_SESSION_ID INVALID_CARTEL_ID typedef User_CartelID User_CartelGroupID; #define INVALID_CARTELGROUP_ID INVALID_CARTEL_ID typedef uint32 Worldlet_ID; #define INVALID_WORLDLET_ID ((Worldlet_ID)0) /* world page number */ typedef uint32 WPN; /* The Xserver source compiles with -ansi -pendantic */ #ifndef __STRICT_ANSI__ typedef uint64 MA; typedef uint32 MPN; #endif /* * This type should be used for variables that contain sector * position/quantity. */ typedef uint64 SectorType; /* * Linear address */ typedef uintptr_t LA; typedef uintptr_t LPN; #define LA_2_LPN(_la) ((_la) >> PAGE_SHIFT) #define LPN_2_LA(_lpn) ((_lpn) << PAGE_SHIFT) #define LAST_LPN ((((LA) 1) << (8 * sizeof(LA) - PAGE_SHIFT)) - 1) #define LAST_LPN32 ((((LA32)1) << (8 * sizeof(LA32) - PAGE_SHIFT)) - 1) #define LAST_LPN64 ((((LA64)1) << (8 * sizeof(LA64) - PAGE_SHIFT)) - 1) /* Valid bits in a LPN. */ #define LPN_MASK LAST_LPN #define LPN_MASK32 LAST_LPN32 #define LPN_MASK64 LAST_LPN64 /* * On 64 bit platform, address and page number types default * to 64 bit. When we need to represent a 32 bit address, we use * types defined below. * * On 32 bit platform, the following types are the same as the * default types. */ typedef uint32 VA32; typedef uint32 VPN32; typedef uint32 LA32; typedef uint32 LPN32; typedef uint32 PA32; typedef uint32 PPN32; typedef uint32 MA32; typedef uint32 MPN32; /* * On 64 bit platform, the following types are the same as the * default types. */ typedef uint64 VA64; typedef uint64 VPN64; typedef uint64 LA64; typedef uint64 LPN64; typedef uint64 PA64; typedef uint64 PPN64; typedef uint64 MA64; typedef uint64 MPN64; /* * VA typedefs for user world apps. */ typedef VA32 UserVA32; typedef VA64 UserVA64; typedef UserVA32 UserVAConst; /* Userspace ptr to data that we may only read. */ typedef UserVA64 UserVA64Const; /* Used by 64-bit syscalls until conversion is finished. */ #ifdef VMKERNEL typedef UserVA32 UserVA; #else typedef void * UserVA; #endif /* * Maximal possible PPN value (errors too) that PhysMem can handle. * Must be at least as large as MAX_PPN which is the maximum PPN * for any region other than buserror. */ #define PHYSMEM_MAX_PPN ((PPN)0xffffffff) #define MAX_PPN ((PPN)0x1fffffff) /* Maximal observable PPN value. */ #define INVALID_PPN ((PPN)0xffffffff) #define INVALID_BPN ((BPN) 0x1fffffff) #define INVALID_MPN ((MPN)-1) #define MEMREF_MPN ((MPN)-2) #define RESERVED_MPN ((MPN) 0) /* Support 43 bits of address space. */ #define MAX_MPN ((MPN)0x7fffffff) #define INVALID_LPN ((LPN)-1) #define INVALID_VPN ((VPN)-1) #define INVALID_LPN64 ((LPN64)-1) #define INVALID_PAGENUM ((PageNum)-1) #define INVALID_WPN ((WPN) -1) /* * Format modifier for printing VA, LA, and VPN. * Use them like this: Log("%#"FMTLA"x\n", laddr) */ #if defined(VMM64) || defined(FROBOS64) || vm_x86_64 || defined __APPLE__ # define FMTLA "l" # define FMTVA "l" # define FMTVPN "l" #else # define FMTLA "" # define FMTVA "" # define FMTVPN "" #endif #define EXTERN extern #define CONST const #ifndef INLINE # ifdef _MSC_VER # define INLINE __inline # else # define INLINE inline # endif #endif /* * Annotation for data that may be exported into a DLL and used by other * apps that load that DLL and import the data. */ #if defined(_WIN32) && defined(VMX86_IMPORT_DLLDATA) # define VMX86_EXTERN_DATA extern __declspec(dllimport) #else // !_WIN32 # define VMX86_EXTERN_DATA extern #endif #if defined(_WIN32) && !defined(VMX86_NO_THREADS) #define THREADSPECIFIC __declspec(thread) #else #define THREADSPECIFIC #endif /* * Due to the wonderful "registry redirection" feature introduced in * 64-bit Windows, if you access any key under HKLM\Software in 64-bit * code, you need to open/create/delete that key with * VMKEY_WOW64_32KEY if you want a consistent view with 32-bit code. */ #ifdef _WIN32 #ifdef _WIN64 #define VMW_KEY_WOW64_32KEY KEY_WOW64_32KEY #else #define VMW_KEY_WOW64_32KEY 0x0 #endif #endif /* * Consider the following reasons functions are inlined: * * 1) inlined for performance reasons * 2) inlined because it's a single-use function * * Functions which meet only condition 2 should be marked with this * inline macro; It is not critical to be inlined (but there is a * code-space & runtime savings by doing so), so when other callers * are added the inline-ness should be removed. */ #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) /* * Starting at version 3.3, gcc does not always inline functions marked * 'inline' (it depends on their size). To force gcc to do so, one must use the * extra __always_inline__ attribute. */ # define INLINE_SINGLE_CALLER INLINE __attribute__((__always_inline__)) # if defined(VMM) \ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 1)) # warning Verify INLINE_SINGLE_CALLER '__always_inline__' attribute (did \ monitor size change?) # endif #else # define INLINE_SINGLE_CALLER INLINE #endif /* * Used when a hard guaranteed of no inlining is needed. Very few * instances need this since the absence of INLINE is a good hint * that gcc will not do inlining. */ #if defined(__GNUC__) && defined(VMM) #define ABSOLUTELY_NOINLINE __attribute__((__noinline__)) #endif /* * Attributes placed on function declarations to tell the compiler * that the function never returns. */ #ifdef _MSC_VER #define NORETURN __declspec(noreturn) #elif __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 9) #define NORETURN __attribute__((__noreturn__)) #else #define NORETURN #endif /* * GCC 3.2 inline asm needs the + constraint for input/ouput memory operands. * Older GCCs don't know about it --hpreg */ #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2) # define VM_ASM_PLUS 1 #else # define VM_ASM_PLUS 0 #endif /* * Branch prediction hints: * LIKELY(exp) - Expression exp is likely TRUE. * UNLIKELY(exp) - Expression exp is likely FALSE. * Usage example: * if (LIKELY(excCode == EXC_NONE)) { * or * if (UNLIKELY(REAL_MODE(vc))) { * * We know how to predict branches on gcc3 and later (hopefully), * all others we don't so we do nothing. */ #if (__GNUC__ >= 3) /* * gcc3 uses __builtin_expect() to inform the compiler of an expected value. * We use this to inform the static branch predictor. The '!!' in LIKELY * will convert any !=0 to a 1. */ #define LIKELY(_exp) __builtin_expect(!!(_exp), 1) #define UNLIKELY(_exp) __builtin_expect((_exp), 0) #else #define LIKELY(_exp) (_exp) #define UNLIKELY(_exp) (_exp) #endif /* * GCC's argument checking for printf-like functions * This is conditional until we have replaced all `"%x", void *' * with `"0x%08x", (uint32) void *'. Note that %p prints different things * on different platforms. Argument checking is enabled for the * vmkernel, which has already been cleansed. * * fmtPos is the position of the format string argument, beginning at 1 * varPos is the position of the variable argument, beginning at 1 */ #if defined(__GNUC__) # define PRINTF_DECL(fmtPos, varPos) __attribute__((__format__(__printf__, fmtPos, varPos))) #else # define PRINTF_DECL(fmtPos, varPos) #endif #if defined(__GNUC__) # define SCANF_DECL(fmtPos, varPos) __attribute__((__format__(__scanf__, fmtPos, varPos))) #else # define SCANF_DECL(fmtPos, varPos) #endif /* * UNUSED_PARAM should surround the parameter name and type declaration, * e.g. "int MyFunction(int var1, UNUSED_PARAM(int var2))" * */ #ifndef UNUSED_PARAM # if defined(__GNUC__) # define UNUSED_PARAM(_parm) _parm __attribute__((__unused__)) # else # define UNUSED_PARAM(_parm) _parm # endif #endif /* * REGPARM defaults to REGPARM3, i.e., a requent that gcc * puts the first three arguments in registers. (It is fine * if the function has fewer than three args.) Gcc only. * Syntactically, put REGPARM where you'd put INLINE or NORETURN. */ #if defined(__GNUC__) # define REGPARM0 __attribute__((regparm(0))) # define REGPARM1 __attribute__((regparm(1))) # define REGPARM2 __attribute__((regparm(2))) # define REGPARM3 __attribute__((regparm(3))) # define REGPARM REGPARM3 #else # define REGPARM0 # define REGPARM1 # define REGPARM2 # define REGPARM3 # define REGPARM #endif /* * ALIGNED specifies minimum alignment in "n" bytes. */ #ifdef __GNUC__ #define ALIGNED(n) __attribute__((__aligned__(n))) #else #define ALIGNED(n) #endif /* *********************************************************************** * STRUCT_OFFSET_CHECK -- */ /** * * \brief Check if the actual offsef of a member in a structure * is what is expected * * * \param[in] STRUCT Structure the member is a part of. * \param[in] MEMBER Member to check the offset of. * \param[in] OFFSET Expected offset of MEMBER in STRUCTURE. * \param[in] DEBUG_EXTRA Additional bytes to be added to OFFSET to * compensate for extra info in debug builds. * *********************************************************************** */ #ifdef VMX86_DEBUG #define STRUCT_OFFSET_CHECK(STRUCT, MEMBER, OFFSET, DEBUG_EXTRA) \ ASSERT_ON_COMPILE(vmk_offsetof(STRUCT, MEMBER) == (OFFSET + DEBUG_EXTRA)) #else #define STRUCT_OFFSET_CHECK(STRUCT, MEMBER, OFFSET, DEBUG_EXTRA) \ ASSERT_ON_COMPILE(vmk_offsetof(STRUCT, MEMBER) == OFFSET) #endif /* *********************************************************************** * STRUCT_SIZE_CHECK -- */ /** * * \brief Check if the actual size of a structure is what is expected * * * \param[in] STRUCT Structure whose size is to be checked. * \param[in] SIZE Expected size of STRUCT. * \param[in] DEBUG_EXTRA Additional bytes to be added to SIZE to * compensate for extra info in debug builds. * *********************************************************************** */ #ifdef VMX86_DEBUG #define STRUCT_SIZE_CHECK(STRUCT, SIZE, DEBUG_EXTRA) \ ASSERT_ON_COMPILE(sizeof(STRUCT) == (SIZE + DEBUG_EXTRA)) #else #define STRUCT_SIZE_CHECK(STRUCT, SIZE, DEBUG_EXTRA) \ ASSERT_ON_COMPILE(sizeof(STRUCT) == SIZE) #endif /* * __func__ is a stringified function name that is part of the C99 standard. The block * below defines __func__ on older systems where the compiler does not support that * macro. */ #if defined(__GNUC__) \ && ((__GNUC__ == 2 && __GNUC_MINOR < 96) \ || (__GNUC__ < 2)) # define __func__ __FUNCTION__ #endif /* * Once upon a time, this was used to silence compiler warnings that * get generated when the compiler thinks that a function returns * when it is marked noreturn. Don't do it. Use NOT_REACHED(). */ #define INFINITE_LOOP() do { } while (1) /* * On FreeBSD (for the tools build), size_t is typedef'd if _BSD_SIZE_T_ * is defined. Use the same logic here so we don't define it twice. [greg] */ #ifdef __FreeBSD__ # ifdef _BSD_SIZE_T_ # undef _BSD_SIZE_T_ # ifdef VM_I386 # ifdef VM_X86_64 typedef uint64 size_t; # else typedef uint32 size_t; # endif # endif /* VM_I386 */ # endif # ifdef _BSD_SSIZE_T_ # undef _BSD_SSIZE_T_ # define _SSIZE_T # define __ssize_t_defined # define _SSIZE_T_DECLARED # ifdef VM_I386 # ifdef VM_X86_64 typedef int64 ssize_t; # else typedef int32 ssize_t; # endif # endif /* VM_I386 */ # endif #else # ifndef _SIZE_T # define _SIZE_T # ifdef VM_I386 # ifdef VM_X86_64 typedef uint64 size_t; # else typedef uint32 size_t; # endif # endif /* VM_I386 */ # endif # if !defined(FROBOS) && !defined(_SSIZE_T) && !defined(ssize_t) && \ !defined(__ssize_t_defined) && !defined(_SSIZE_T_DECLARED) # define _SSIZE_T # define __ssize_t_defined # define _SSIZE_T_DECLARED # ifdef VM_I386 # ifdef VM_X86_64 typedef int64 ssize_t; # else typedef int32 ssize_t; # endif # endif /* VM_I386 */ # endif #endif /* * Format modifier for printing pid_t. On sun the pid_t is a ulong, but on * Linux it's an int. * Use this like this: printf("The pid is %"FMTPID".\n", pid); */ #ifdef sun # ifdef VM_X86_64 # define FMTPID "d" # else # define FMTPID "lu" # endif #else # define FMTPID "d" #endif /* * Format modifier for printing uid_t. On sun the uid_t is a ulong, but on * Linux it's an int. * Use this like this: printf("The uid is %"FMTUID".\n", uid); */ #ifdef sun # ifdef VM_X86_64 # define FMTUID "u" # else # define FMTUID "lu" # endif #else # define FMTUID "u" #endif /* * Format modifier for printing mode_t. On sun the mode_t is a ulong, but on * Linux it's an int. * Use this like this: printf("The mode is %"FMTMODE".\n", mode); */ #ifdef sun # ifdef VM_X86_64 # define FMTMODE "o" # else # define FMTMODE "lo" # endif #else # define FMTMODE "o" #endif /* * Format modifier for printing time_t. Most platforms define a time_t to be * a long int, but on FreeBSD (as of 5.0, it seems), the time_t is a signed * size quantity. Refer to the definition of FMTSZ to see why we need silly * preprocessor arithmetic. * Use this like this: printf("The mode is %"FMTTIME".\n", time); */ #if defined(__FreeBSD__) && (__FreeBSD__ + 0) && ((__FreeBSD__ + 0) >= 5) # define FMTTIME FMTSZ"d" #else # define FMTTIME "ld" #endif /* * Define MXSemaHandle here so both vmmon and vmx see this definition. */ #ifdef _WIN32 typedef uintptr_t MXSemaHandle; #else typedef int MXSemaHandle; #endif /* * Define type for poll device handles. */ #ifdef _WIN32 typedef uintptr_t PollDevHandle; #else typedef int PollDevHandle; #endif /* * Define the utf16_t type. */ #if defined(_WIN32) && defined(_NATIVE_WCHAR_T_DEFINED) typedef wchar_t utf16_t; #else typedef uint16 utf16_t; #endif #endif /* _VM_BASIC_TYPES_H_ */ vsock-only/include/vmware_pack_init.h0000444000000000000000000000364412025726714017011 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __VMWARE_PACK_INIT_H__ # define __VMWARE_PACK_INIT_H__ /* * vmware_pack_init.h -- * * Platform-independent code to make the compiler pack (i.e. have them * occupy the smallest possible space) structure definitions. The following * constructs are known to work --hpreg * * #include "vmware_pack_begin.h" * struct foo { * ... * } * #include "vmware_pack_end.h" * ; * * typedef * #include "vmware_pack_begin.h" * struct foo { * ... * } * #include "vmware_pack_end.h" * foo; */ #ifdef _MSC_VER /* * MSVC 6.0 emits warning 4103 when the pack push and pop pragma pairing is * not balanced within 1 included file. That is annoying because our scheme * is based on the pairing being balanced between 2 included files. * * So we disable this warning, but this is safe because the compiler will also * emit warning 4161 when there is more pops than pushes within 1 main * file --hpreg */ # pragma warning(disable:4103) #elif __GNUC__ #else # error Compiler packing... #endif #endif /* __VMWARE_PACK_INIT_H__ */ vsock-only/include/vmware_pack_begin.h0000444000000000000000000000244412025726714017127 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vmware_pack_begin.h -- * * Begin of structure packing. See vmware_pack_init.h for details. * * Note that we do not use the following construct in this include file, * because we want to emit the code every time the file is included --hpreg * * #ifndef foo * # define foo * ... * #endif * */ #include "vmware_pack_init.h" #ifdef _MSC_VER # pragma pack(push, 1) #elif __GNUC__ #else # error Compiler packing... #endif vsock-only/include/vmware_pack_end.h0000444000000000000000000000247012025726714016610 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vmware_pack_end.h -- * * End of structure packing. See vmware_pack_init.h for details. * * Note that we do not use the following construct in this include file, * because we want to emit the code every time the file is included --hpreg * * #ifndef foo * # define foo * ... * #endif * */ #include "vmware_pack_init.h" #ifdef _MSC_VER # pragma pack(pop) #elif __GNUC__ __attribute__((__packed__)) #else # error Compiler packing... #endif vsock-only/include/vmci_defs.h0000444000000000000000000002224712025726714015426 0ustar rootroot/********************************************************* * Copyright (C) 2005-2008 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef _VMCI_DEF_H_ #define _VMCI_DEF_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMEXT #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMK_MODULE #define INCLUDE_ALLOW_VMKERNEL #define INCLUDE_ALLOW_DISTRIBUTE #include "includeCheck.h" #include "vm_basic_types.h" /* Register offsets. */ #define VMCI_STATUS_ADDR 0x00 #define VMCI_CONTROL_ADDR 0x04 #define VMCI_ICR_ADDR 0x08 #define VMCI_IMR_ADDR 0x0c #define VMCI_DATA_OUT_ADDR 0x10 #define VMCI_DATA_IN_ADDR 0x14 #define VMCI_CAPS_ADDR 0x18 #define VMCI_RESULT_LOW_ADDR 0x1c #define VMCI_RESULT_HIGH_ADDR 0x20 /* Max number of devices. */ #define VMCI_MAX_DEVICES 1 /* Status register bits. */ #define VMCI_STATUS_INT_ON 0x1 /* Control register bits. */ #define VMCI_CONTROL_RESET 0x1 #define VMCI_CONTROL_INT_ENABLE 0x2 #define VMCI_CONTROL_INT_DISABLE 0x4 /* Capabilities register bits. */ #define VMCI_CAPS_HYPERCALL 0x1 #define VMCI_CAPS_GUESTCALL 0x2 #define VMCI_CAPS_DATAGRAM 0x4 /* Interrupt Cause register bits. */ #define VMCI_ICR_DATAGRAM 0x1 /* Interrupt Mask register bits. */ #define VMCI_IMR_DATAGRAM 0x1 /* * We have a fixed set of resource IDs available in the VMX. * This allows us to have a very simple implementation since we statically * know how many will create datagram handles. If a new caller arrives and * we have run out of slots we can manually increment the maximum size of * available resource IDs. */ typedef uint32 VMCI_Resource; /* VMCI reserved hypervisor datagram resource IDs. */ #define VMCI_RESOURCES_QUERY 0 #define VMCI_GET_CONTEXT_ID 1 #define VMCI_SHAREDMEM_CREATE 2 #define VMCI_SHAREDMEM_ATTACH 3 #define VMCI_SHAREDMEM_DETACH 4 #define VMCI_SHAREDMEM_QUERY 5 #define VMCI_DATAGRAM_REQUEST_MAP 6 #define VMCI_DATAGRAM_REMOVE_MAP 7 #define VMCI_EVENT_SUBSCRIBE 8 #define VMCI_EVENT_UNSUBSCRIBE 9 #define VMCI_QUEUEPAIR_ALLOC 10 #define VMCI_QUEUEPAIR_DETACH 11 #define VMCI_RESOURCE_MAX 12 /* VMCI Ids. */ typedef uint32 VMCIId; typedef struct VMCIHandle { VMCIId context; VMCIId resource; } VMCIHandle; static INLINE VMCIHandle VMCI_MAKE_HANDLE(VMCIId cid, VMCIId rid) { VMCIHandle h = {cid, rid}; return h; } #define VMCI_HANDLE_TO_CONTEXT_ID(_handle) ((_handle).context) #define VMCI_HANDLE_TO_RESOURCE_ID(_handle) ((_handle).resource) #define VMCI_HANDLE_EQUAL(_h1, _h2) ((_h1).context == (_h2).context && \ (_h1).resource == (_h2).resource) #define VMCI_INVALID_ID 0xFFFFFFFF static const VMCIHandle VMCI_INVALID_HANDLE = {VMCI_INVALID_ID, VMCI_INVALID_ID}; #define VMCI_HANDLE_INVALID(_handle) \ VMCI_HANDLE_EQUAL((_handle), VMCI_INVALID_HANDLE) /* * The below defines can be used to send anonymous requests. * This also indicates that no response is expected. */ #define VMCI_ANON_SRC_CONTEXT_ID VMCI_INVALID_ID #define VMCI_ANON_SRC_RESOURCE_ID VMCI_INVALID_ID #define VMCI_ANON_SRC_HANDLE VMCI_MAKE_HANDLE(VMCI_ANON_SRC_CONTEXT_ID, \ VMCI_ANON_SRC_RESOURCE_ID) /* The lowest 16 context ids are reserved for internal use. */ #define VMCI_RESERVED_CID_LIMIT 16 /* * Hypervisor context id, used for calling into hypervisor * supplied services from the VM. */ #define VMCI_HYPERVISOR_CONTEXT_ID 0 /* * Well-known context id, a logical context that contains * a set of well-known services. */ #define VMCI_WELL_KNOWN_CONTEXT_ID 1 /* Todo: Change host context id to dynamic/random id. */ #define VMCI_HOST_CONTEXT_ID 2 /* * The VMCI_CONTEXT_RESOURCE_ID is used together with VMCI_MAKE_HANDLE to make * handles that refer to a specific context. */ #define VMCI_CONTEXT_RESOURCE_ID 0 /* VMCI error codes. */ #define VMCI_SUCCESS_QUEUEPAIR_ATTACH 5 #define VMCI_SUCCESS_QUEUEPAIR_CREATE 4 #define VMCI_SUCCESS_LAST_DETACH 3 #define VMCI_SUCCESS_ACCESS_GRANTED 2 #define VMCI_SUCCESS_ENTRY_DEAD 1 #define VMCI_SUCCESS 0 #define VMCI_ERROR_INVALID_RESOURCE -1 #define VMCI_ERROR_INVALID_ARGS -2 #define VMCI_ERROR_NO_MEM -3 #define VMCI_ERROR_DATAGRAM_FAILED -4 #define VMCI_ERROR_MORE_DATA -5 #define VMCI_ERROR_NO_MORE_DATAGRAMS -6 #define VMCI_ERROR_NO_ACCESS -7 #define VMCI_ERROR_NO_HANDLE -8 #define VMCI_ERROR_DUPLICATE_ENTRY -9 #define VMCI_ERROR_DST_UNREACHABLE -10 #define VMCI_ERROR_PAYLOAD_TOO_LARGE -11 #define VMCI_ERROR_INVALID_PRIV -12 #define VMCI_ERROR_GENERIC -13 #define VMCI_ERROR_PAGE_ALREADY_SHARED -14 #define VMCI_ERROR_CANNOT_SHARE_PAGE -15 #define VMCI_ERROR_CANNOT_UNSHARE_PAGE -16 #define VMCI_ERROR_NO_PROCESS -17 #define VMCI_ERROR_NO_DATAGRAM -18 #define VMCI_ERROR_NO_RESOURCES -19 #define VMCI_ERROR_UNAVAILABLE -20 #define VMCI_ERROR_NOT_FOUND -21 #define VMCI_ERROR_ALREADY_EXISTS -22 #define VMCI_ERROR_NOT_PAGE_ALIGNED -23 #define VMCI_ERROR_INVALID_SIZE -24 #define VMCI_ERROR_REGION_ALREADY_SHARED -25 #define VMCI_ERROR_TIMEOUT -26 #define VMCI_ERROR_DATAGRAM_INCOMPLETE -27 #define VMCI_ERROR_INCORRECT_IRQL -28 #define VMCI_ERROR_EVENT_UNKNOWN -29 #define VMCI_ERROR_OBSOLETE -30 #define VMCI_ERROR_QUEUEPAIR_MISMATCH -31 #define VMCI_ERROR_QUEUEPAIR_NOTSET -32 #define VMCI_ERROR_QUEUEPAIR_NOTOWNER -33 #define VMCI_ERROR_QUEUEPAIR_NOTATTACHED -34 #define VMCI_ERROR_QUEUEPAIR_NOSPACE -35 #define VMCI_ERROR_QUEUEPAIR_NODATA -36 #define VMCI_ERROR_BUSMEM_INVALIDATION -37 /* Internal error codes. */ #define VMCI_SHAREDMEM_ERROR_BAD_CONTEXT -1000 #define VMCI_PATH_MAX 256 /* VMCI reserved events. */ typedef uint32 VMCI_Event; #define VMCI_EVENT_CTX_ID_UPDATE 0 #define VMCI_EVENT_CTX_REMOVED 1 #define VMCI_EVENT_QP_RESUMED 2 #define VMCI_EVENT_QP_PEER_ATTACH 3 #define VMCI_EVENT_QP_PEER_DETACH 4 #define VMCI_EVENT_MAX 5 /* Reserved guest datagram resource ids. */ #define VMCI_EVENT_HANDLER 0 /* VMCI privileges. */ typedef enum VMCIResourcePrivilegeType { VMCI_PRIV_CH_PRIV, VMCI_PRIV_DESTROY_RESOURCE, VMCI_PRIV_ASSIGN_CLIENT, VMCI_PRIV_DG_CREATE, VMCI_PRIV_DG_SEND, VMCI_PRIV_SM_CREATE, VMCI_PRIV_SM_ATTACH, VMCI_NUM_PRIVILEGES, } VMCIResourcePrivilegeType; /* * VMCI coarse-grained privileges (per context or host * process/endpoint. An entity with the restricted flag is only * allowed to interact with the hypervisor and trusted entities. */ typedef uint32 VMCIPrivilegeFlags; #define VMCI_PRIVILEGE_FLAG_RESTRICTED 0x01 #define VMCI_PRIVILEGE_FLAG_TRUSTED 0x02 #define VMCI_PRIVILEGE_ALL_FLAGS (VMCI_PRIVILEGE_FLAG_RESTRICTED | \ VMCI_PRIVILEGE_FLAG_TRUSTED) #define VMCI_NO_PRIVILEGE_FLAGS 0x00 #define VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS VMCI_NO_PRIVILEGE_FLAGS #define VMCI_LEAST_PRIVILEGE_FLAGS VMCI_PRIVILEGE_FLAG_RESTRICTED #define VMCI_MAX_PRIVILEGE_FLAGS VMCI_PRIVILEGE_FLAG_TRUSTED /* VMCI Discovery Service. */ /* Well-known handle to the discovery service. */ #define VMCI_DS_RESOURCE_ID 1 /* Reserved resource ID for discovery service. */ #define VMCI_DS_HANDLE VMCI_MAKE_HANDLE(VMCI_WELL_KNOWN_CONTEXT_ID, \ VMCI_DS_RESOURCE_ID) #define VMCI_DS_CONTEXT VMCI_MAKE_HANDLE(VMCI_WELL_KNOWN_CONTEXT_ID, \ VMCI_CONTEXT_RESOURCE_ID) /* Maximum length of a DS message. */ #define VMCI_DS_MAX_MSG_SIZE 300 /* Command actions. */ #define VMCI_DS_ACTION_LOOKUP 0 #define VMCI_DS_ACTION_REGISTER 1 #define VMCI_DS_ACTION_UNREGISTER 2 /* Defines wire-protocol format for a request send to the DS from a context. */ typedef struct VMCIDsRequestHeader { int32 action; int32 msgid; VMCIHandle handle; int32 nameLen; int8 name[1]; } VMCIDsRequestHeader; /* Defines the wire-protocol format for a request send from the DS to a context. */ typedef struct VMCIDsReplyHeader { int32 msgid; int32 code; VMCIHandle handle; int32 msgLen; int8 msg[1]; } VMCIDsReplyHeader; #define VMCI_PUBLIC_GROUP_NAME "vmci public group" /* 0 through VMCI_RESERVED_RESOURCE_ID_MAX are reserved. */ #define VMCI_RESERVED_RESOURCE_ID_MAX 1023 #define VMCI_DOMAIN_NAME_MAXLEN 32 #define VMCI_LGPFX "VMCI: " #endif vsock-only/include/vmci_call_defs.h0000444000000000000000000001752112025726714016420 0ustar rootroot/********************************************************* * Copyright (C) 2006-2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef _VMCI_CALL_DEFS_H_ #define _VMCI_CALL_DEFS_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMEXT #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMKMOD #define INCLUDE_ALLOW_VMKERNEL #define INCLUDE_ALLOW_DISTRIBUTE #include "includeCheck.h" #include "vm_basic_types.h" #include "vmci_defs.h" /* * All structs here are an integral size of their largest member, ie. a struct * with at least one 8-byte member will have a size that is an integral of 8. * A struct which has a largest member of size 4 will have a size that is an * integral of 4. This is because Windows CL enforces this rule. 32 bit gcc * doesn't e.g. 32 bit gcc can misalign an 8 byte member if it is preceeded by * a 4 byte member. */ /* * Base struct for vmci datagrams. */ typedef struct VMCIDatagram { VMCIHandle dst; VMCIHandle src; uint64 payloadSize; } VMCIDatagram; typedef int (*VMCIDatagramRecvCB)(void *clientData, // IN: client data for handler VMCIDatagram *msg); // IN: /* Flag for creating a wellknown handle instead of a per context handle. */ #define VMCI_FLAG_WELLKNOWN_DG_HND 0x1 /* * Maximum supported size of a VMCI datagram for routable datagrams. * Datagrams going to the hypervisor are allowed to be larger. */ #define VMCI_MAX_DG_SIZE (17 * 4096) #define VMCI_MAX_DG_PAYLOAD_SIZE (VMCI_MAX_DG_SIZE - sizeof(VMCIDatagram)) #define VMCI_DG_PAYLOAD(_dg) (void *)((char *)(_dg) + sizeof(VMCIDatagram)) #define VMCI_DG_HEADERSIZE sizeof(VMCIDatagram) #define VMCI_DG_SIZE(_dg) (VMCI_DG_HEADERSIZE + (size_t)(_dg)->payloadSize) #define VMCI_DG_SIZE_ALIGNED(_dg) ((VMCI_DG_SIZE(_dg) + 7) & (size_t)CONST64U(0xfffffffffffffff8)) #define VMCI_MAX_DATAGRAM_QUEUE_SIZE (VMCI_MAX_DG_SIZE * 2) /* * Struct for sending VMCI_DATAGRAM_REQUEST_MAP and VMCI_DATAGRAM_REMOVE_MAP * datagrams. Struct size is 32 bytes. All fields in struct are aligned to * their natural alignment. */ typedef struct VMCIDatagramWellKnownMapMsg { VMCIDatagram hdr; VMCIId wellKnownID; uint32 _pad; } VMCIDatagramWellKnownMapMsg; /* * Struct used for querying, via VMCI_RESOURCES_QUERY, the availability of * hypervisor resources. * Struct size is 16 bytes. All fields in struct are aligned to their natural * alignment. */ typedef struct VMCIResourcesQueuryHdr { VMCIDatagram hdr; uint32 numResources; uint32 _padding; } VMCIResourcesQueryHdr; /* * Convenience struct for negotiating vectors. Must match layout of * VMCIResourceQueryHdr minus the VMCIDatagram header. */ typedef struct VMCIResourcesQueryMsg { uint32 numResources; uint32 _padding; VMCI_Resource resources[1]; } VMCIResourcesQueryMsg; /* * The maximum number of resources that can be queried using * VMCI_RESOURCE_QUERY is 31, as the result is encoded in the lower 31 * bits of a positive return value. Negative values are reserved for * errors. */ #define VMCI_RESOURCE_QUERY_MAX_NUM 31 /* Maximum size for the VMCI_RESOURCE_QUERY request. */ #define VMCI_RESOURCE_QUERY_MAX_SIZE sizeof(VMCIResourcesQueryHdr) \ + VMCI_RESOURCE_QUERY_MAX_NUM * sizeof(VMCI_Resource) /* * Struct used for making VMCI_SHAREDMEM_CREATE message. Struct size is 24 bytes. * All fields in struct are aligned to their natural alignment. */ typedef struct VMCISharedMemCreateMsg { VMCIDatagram hdr; VMCIHandle handle; uint32 memSize; uint32 _padding; /* PPNs placed after struct. */ } VMCISharedMemCreateMsg; /* * Struct used for sending VMCI_SHAREDMEM_ATTACH messages. Same as struct used * for create messages. */ typedef VMCISharedMemCreateMsg VMCISharedMemAttachMsg; /* * Struct used for sending VMCI_SHAREDMEM_DETACH messsages. Struct size is 16 * bytes. All fields in struct are aligned to their natural alignment. */ typedef struct VMCISharedMemDetachMsg { VMCIDatagram hdr; VMCIHandle handle; } VMCISharedMemDetachMsg; /* * Struct used for sending VMCI_SHAREDMEM_QUERY messages. Same as struct used * for detach messages. */ typedef VMCISharedMemDetachMsg VMCISharedMemQueryMsg; /* * This struct is used to contain data for events. Size of this struct is a * multiple of 8 bytes, and all fields are aligned to their natural alignment. */ typedef struct VMCI_EventData { VMCI_Event event; /* 4 bytes. */ uint32 _pad; /* * Event payload is put here. */ } VMCI_EventData; /* * We use the following inline function to access the payload data associated * with an event data. */ static INLINE void * VMCIEventDataPayload(VMCI_EventData *evData) // IN: { return (void *)((char *)evData + sizeof *evData); } /* * Define the different VMCI_EVENT payload data types here. All structs must * be a multiple of 8 bytes, and fields must be aligned to their natural * alignment. */ typedef struct VMCIEventPayload_Context { VMCIId contextID; /* 4 bytes. */ uint32 _pad; } VMCIEventPayload_Context; typedef struct VMCIEventPayload_QP { VMCIHandle handle; /* QueuePair handle. */ VMCIId peerId; /* Context id of attaching/detaching VM. */ uint32 _pad; } VMCIEventPayload_QP; /* * We define the following struct to get the size of the maximum event data * the hypervisor may send to the guest. If adding a new event payload type * above, add it to the following struct too (inside the union). */ typedef struct VMCIEventData_Max { VMCI_EventData eventData; union { VMCIEventPayload_Context contextPayload; VMCIEventPayload_QP qpPayload; } evDataPayload; } VMCIEventData_Max; /* * Struct used for VMCI_EVENT_SUBSCRIBE/UNSUBSCRIBE and VMCI_EVENT_HANDLER * messages. Struct size is 32 bytes. All fields in struct are aligned to * their natural alignment. */ typedef struct VMCIEventMsg { VMCIDatagram hdr; VMCI_EventData eventData; /* Has event type and payload. */ /* * Payload gets put here. */ } VMCIEventMsg; /* * We use the following inline function to access the payload data associated * with an event message. */ static INLINE void * VMCIEventMsgPayload(VMCIEventMsg *eMsg) // IN: { return VMCIEventDataPayload(&eMsg->eventData); } /* Flags for VMCI QueuePair API. */ #define VMCI_QPFLAG_ATTACH_ONLY 0x1 /* Fail alloc if QP not created by peer. */ #define VMCI_QPFLAG_LOCAL 0x2 /* Only allow attaches from local context. */ /* Update the following (bitwise OR flags) while adding new flags. */ #define VMCI_QP_ALL_FLAGS (VMCI_QPFLAG_ATTACH_ONLY | VMCI_QPFLAG_LOCAL) /* * Structs used for QueuePair alloc and detach messages. We align fields of * these structs to 64bit boundaries. */ typedef struct VMCIQueuePairAllocMsg { VMCIDatagram hdr; VMCIHandle handle; VMCIId peer; /* 32bit field. */ uint32 flags; uint64 produceSize; uint64 consumeSize; uint64 numPPNs; /* List of PPNs placed here. */ } VMCIQueuePairAllocMsg; typedef struct VMCIQueuePairDetachMsg { VMCIDatagram hdr; VMCIHandle handle; } VMCIQueuePairDetachMsg; #endif vsock-only/include/vmci_infrastructure.h0000444000000000000000000000530712025726714017563 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vmci_infrastructure.h -- * * This file implements the VMCI infrastructure. */ #ifndef _VMCI_INFRASTRUCTURE_H_ #define _VMCI_INFRASTRUCTURE_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMEXT #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMKERNEL #define INCLUDE_ALLOW_DISTRIBUTE #include "includeCheck.h" #include "vmware.h" #include "vmci_defs.h" typedef enum { VMCIOBJ_VMX_VM = 10, VMCIOBJ_CONTEXT, VMCIOBJ_PROCESS, VMCIOBJ_DATAGRAM_PROCESS, VMCIOBJ_NOT_SET, } VMCIObjType; /* Guestcalls currently support a maximum of 8 uint64 arguments. */ #define VMCI_GUESTCALL_MAX_ARGS_SIZE 64 /* Used to determine what checkpoint state to get and set. */ #define VMCI_NOTIFICATION_CPT_STATE 0x1 #define VMCI_WELLKNOWN_CPT_STATE 0x2 #define VMCI_QP_CPT_STATE 0x3 #define VMCI_QP_INFO_CPT_STATE 0x4 /* Used to control the VMCI device in the vmkernel */ #define VMCI_DEV_RESET 0x01 #define VMCI_DEV_QP_RESET 0x02 #define VMCI_DEV_QUIESCE 0x03 #define VMCI_DEV_UNQUIESCE 0x04 #define VMCI_DEV_QP_BREAK_SHARING 0x05 /* *------------------------------------------------------------------------- * * VMCI_Hash -- * * Hash function used by the Simple Datagram API. Based on the djb2 * hash function by Dan Bernstein. * * Result: * Returns guest call size. * * Side effects: * None. * *------------------------------------------------------------------------- */ static INLINE int VMCI_Hash(VMCIHandle handle, // IN unsigned size) // IN { int i; int hash = 5381; uint64 handleValue = (uint64)handle.resource << 32 | handle.context; for (i = 0; i < sizeof handle; i++) { hash = ((hash << 5) + hash) + (uint8)(handleValue >> (i*8)); } return hash & (size -1); } #endif // _VMCI_INFRASTRUCTURE_H_ vsock-only/include/vmci_kernel_if.h0000444000000000000000000002241212025726714016435 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vmci_kernel_if.h -- * * This file defines helper functions for VMCI host _and_ guest * kernel code. It must work for windows, macosx, vmkernel and * linux kernel, ie. using defines where necessary. */ #ifndef _VMCI_KERNEL_IF_H_ #define _VMCI_KERNEL_IF_H_ #if !defined(linux) && !defined(_WIN32) && !defined(__APPLE__) && \ !defined(VMKERNEL) && !defined(SOLARIS) #error "Platform not supported." #endif #if defined(_WIN32) #include #endif #if defined(linux) && !defined(VMKERNEL) # include "compat_version.h" # include "compat_wait.h" # include "compat_spinlock.h" # include "compat_semaphore.h" #endif // linux #ifdef __APPLE__ # include #include #include #endif #ifdef VMKERNEL #include "splock.h" #include "semaphore_ext.h" #endif #ifdef SOLARIS # include # include # include #endif #include "vm_basic_types.h" #include "vmci_defs.h" /* Flags for specifying memory type. */ #define VMCI_MEMORY_NORMAL 0x0 #define VMCI_MEMORY_ATOMIC 0x1 #define VMCI_MEMORY_NONPAGED 0x2 /* Platform specific type definitions. */ #if defined(VMKERNEL) typedef SP_SpinLock VMCILock; typedef SP_IRQL VMCILockFlags; typedef Semaphore VMCIEvent; typedef Semaphore VMCIMutex; #elif defined(linux) typedef spinlock_t VMCILock; typedef unsigned long VMCILockFlags; typedef wait_queue_head_t VMCIEvent; typedef struct semaphore VMCIMutex; typedef PPN *VMCIPpnList; /* List of PPNs in produce/consume queue. */ #elif defined(__APPLE__) typedef IOLock *VMCILock; typedef unsigned long VMCILockFlags; typedef semaphore_t VMCIEvent; typedef IOLock *VMCIMutex; #elif defined(_WIN32) typedef KSPIN_LOCK VMCILock; typedef KIRQL VMCILockFlags; typedef KEVENT VMCIEvent; typedef FAST_MUTEX VMCIMutex; typedef PMDL VMCIPpnList; /* MDL to map the produce/consume queue. */ #elif defined(SOLARIS) typedef kmutex_t VMCILock; typedef unsigned long VMCILockFlags; typedef ksema_t VMCIEvent; #endif // VMKERNEL /* Callback needed for correctly waiting on events. */ typedef int (*VMCIEventReleaseCB)(void *clientData); /* * The VMCI locks use a ranking scheme similar to the one used by * vmkernel. While holding a lock L1 with rank R1, only locks with * rank higher than R1 may be grabbed. The available ranks for VMCI * locks are (in descending order): * - VMCI_LOCK_RANK_HIGH_BH : to be used for locks grabbed while executing * in a bottom half and not held while grabbing other locks. * - VMCI_LOCK_RANK_MIDDLE_BH : to be for locks grabbed while executing in a * bottom half and held while grabbing locks of rank VMCI_LOCK_RANK_HIGH_BH. * - VMCI_LOCK_RANK_LOW_BH : to be for locks grabbed while executing in a * bottom half and held while grabbing locks of rank * VMCI_LOCK_RANK_MIDDLE_BH. * - VMCI_LOCK_RANK_HIGHEST : to be used for locks that are not held while * grabbing other locks except system locks with higher ranks and bottom * half locks. * - VMCI_LOCK_RANK_HIGHER : to be used for locks that are held while * grabbing locks of rank VMCI_LOCK_RANK_HIGHEST or higher. * - VMCI_LOCK_RANK_HIGH : to be used for locks that are held while * grabbing locks of rank VMCI_LOCK_RANK_HIGHER or higher. This is * the highest lock rank used by core VMCI services * - VMCI_LOCK_RANK_MIDDLE : to be used for locks that are held while * grabbing locks of rank VMCI_LOCK_RANK_HIGH or higher. * - VMCI_LOCK_RANK_LOW : to be used for locks that are held while * grabbing locks of rank VMCI_LOCK_RANK_MIDDLE or higher. * - VMCI_LOCK_RANK_LOWEST : to be used for locks that are held while * grabbing locks of rank VMCI_LOCK_RANK_LOW or higher. */ #ifdef VMKERNEL typedef SP_Rank VMCILockRank; #define VMCI_LOCK_RANK_HIGH_BH SP_RANK_IRQ_LEAF #define VMCI_LOCK_RANK_MIDDLE_BH (SP_RANK_IRQ_LEAF-1) #define VMCI_LOCK_RANK_LOW_BH SP_RANK_IRQ_LOWEST #define VMCI_LOCK_RANK_HIGHEST SP_RANK_SHM_MGR-1 #else typedef unsigned long VMCILockRank; #define VMCI_LOCK_RANK_HIGH_BH 0x4000 #define VMCI_LOCK_RANK_MIDDLE_BH 0x2000 #define VMCI_LOCK_RANK_LOW_BH 0x1000 #define VMCI_LOCK_RANK_HIGHEST 0x0fff #endif // VMKERNEL #define VMCI_LOCK_RANK_HIGHER (VMCI_LOCK_RANK_HIGHEST-1) #define VMCI_LOCK_RANK_HIGH (VMCI_LOCK_RANK_HIGHER-1) #define VMCI_LOCK_RANK_MIDDLE_HIGH (VMCI_LOCK_RANK_HIGH-1) #define VMCI_LOCK_RANK_MIDDLE (VMCI_LOCK_RANK_MIDDLE_HIGH-1) #define VMCI_LOCK_RANK_MIDDLE_LOW (VMCI_LOCK_RANK_MIDDLE-1) #define VMCI_LOCK_RANK_LOW (VMCI_LOCK_RANK_MIDDLE_LOW-1) #define VMCI_LOCK_RANK_LOWEST (VMCI_LOCK_RANK_LOW-1) /* * In vmkernel, we try to reduce the amount of memory mapped into the * virtual address space by only mapping the memory of buffered * datagrams when copying from and to the guest. In other OSes, * regular kernel memory is used. VMCIBuffer is used to reference * possibly unmapped memory. */ #ifdef VMKERNEL typedef MPN VMCIBuffer; #define VMCI_BUFFER_INVALID INVALID_MPN #else typedef void * VMCIBuffer; #define VMCI_BUFFER_INVALID NULL #endif /* * Host specific struct used for signalling. */ typedef struct VMCIHost { #if defined(VMKERNEL) World_ID vmmWorldID; #elif defined(linux) wait_queue_head_t waitQueue; #elif defined(__APPLE__) struct Socket *socket; /* vmci Socket object on Mac OS. */ #elif defined(_WIN32) KEVENT *callEvent; /* Ptr to userlevel event used when signalling * new pending guestcalls in kernel. */ #elif defined(SOLARIS) struct pollhead pollhead; /* Per datagram handle pollhead structure to * be treated as a black-box. None of its * fields should be referenced. */ #endif } VMCIHost; void VMCI_InitLock(VMCILock *lock, char *name, VMCILockRank rank); void VMCI_CleanupLock(VMCILock *lock); void VMCI_GrabLock(VMCILock *lock, VMCILockFlags *flags); void VMCI_ReleaseLock(VMCILock *lock, VMCILockFlags flags); void VMCI_GrabLock_BH(VMCILock *lock, VMCILockFlags *flags); void VMCI_ReleaseLock_BH(VMCILock *lock, VMCILockFlags flags); void VMCIHost_InitContext(VMCIHost *hostContext, uintptr_t eventHnd); void VMCIHost_ReleaseContext(VMCIHost *hostContext); void VMCIHost_SignalCall(VMCIHost *hostContext); void VMCIHost_ClearCall(VMCIHost *hostContext); Bool VMCIHost_WaitForCallLocked(VMCIHost *hostContext, VMCILock *lock, VMCILockFlags *flags, Bool useBH); void *VMCI_AllocKernelMem(size_t size, int flags); void VMCI_FreeKernelMem(void *ptr, size_t size); VMCIBuffer VMCI_AllocBuffer(size_t size, int flags); void *VMCI_MapBuffer(VMCIBuffer buf); void VMCI_ReleaseBuffer(void *ptr); void VMCI_FreeBuffer(VMCIBuffer buf, size_t size); #ifdef SOLARIS int VMCI_CopyToUser(void *dst, const void *src, unsigned int len, int mode); #else int VMCI_CopyToUser(void *dst, const void *src, unsigned int len); /* * Don't need the following for guests, hence no Solaris code for this * function. */ Bool VMCIWellKnownID_AllowMap(VMCIId wellKnownID, VMCIPrivilegeFlags privFlags); #endif void VMCI_CreateEvent(VMCIEvent *event); void VMCI_DestroyEvent(VMCIEvent *event); void VMCI_SignalEvent(VMCIEvent *event); void VMCI_WaitOnEvent(VMCIEvent *event, VMCIEventReleaseCB releaseCB, void *clientData); /* XXX TODO for VMKERNEL (host) and Solaris (guest). */ #if !defined(VMKERNEL) && (defined(__linux__) || defined(_WIN32) || \ defined(__APPLE__)) int VMCI_CopyFromUser(void *dst, const void *src, size_t len); #endif #if !defined(SOLARIS) int VMCIMutex_Init(VMCIMutex *mutex); void VMCIMutex_Destroy(VMCIMutex *mutex); void VMCIMutex_Acquire(VMCIMutex *mutex); void VMCIMutex_Release(VMCIMutex *mutex); #endif /* XXX TODO for Solaris (guest). */ #if !defined(VMKERNEL) && (defined(__linux__) || defined(_WIN32)) VA VMCI_AllocQueueKVA(uint64 size); void VMCI_FreeQueueKVA(VA va, uint64 size); typedef struct PPNSet { uint64 numProducePages; uint64 numConsumePages; VMCIPpnList producePPNs; VMCIPpnList consumePPNs; Bool initialized; } PPNSet; int VMCI_AllocPPNSet(VA produceVA, uint64 numProducePages, VA consumeVA, uint64 numConsumePages, PPNSet *ppnSet); void VMCI_FreePPNSet(PPNSet *ppnSet); int VMCI_PopulatePPNList(uint8 *callBuf, const PPNSet *ppnSet); #endif #endif // _VMCI_KERNEL_IF_H_ vsock-only/include/vmci_iocontrols.h0000444000000000000000000003326012025726714016675 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vmci_iocontrols.h * * The VMCI driver io controls. */ #ifndef _VMCI_IOCONTROLS_H_ #define _VMCI_IOCONTROLS_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMKERNEL #include "includeCheck.h" #include "vmci_defs.h" /* * Driver version. * * Increment major version when you make an incompatible change. * Compatibility goes both ways (old driver with new executable * as well as new driver with old executable). */ #define VMCI_VERSION_SHIFT_WIDTH 16 /* Never change this. */ #define VMCI_MAJOR_VERSION_VALUE 8 /* Bump major version number here. */ #define VMCI_MINOR_VERSION_VALUE 0 /* Bump minor version number here. */ /* Don't modify the next three macros. */ #define VMCI_VERSION (VMCI_MAJOR_VERSION_VALUE << \ VMCI_VERSION_SHIFT_WIDTH | \ VMCI_MINOR_VERSION_VALUE) #define VMCI_VERSION_MAJOR(v) ((uint32) (v) >> VMCI_VERSION_SHIFT_WIDTH) #define VMCI_VERSION_MINOR(v) ((uint16) (v)) #if defined(__linux__) || defined(__APPLE__) || defined(SOLARIS) || defined(VMKERNEL) /* * Linux defines _IO* macros, but the core kernel code ignore the encoded * ioctl value. It is up to individual drivers to decode the value (for * example to look at the size of a structure to determine which version * of a specific command should be used) or not (which is what we * currently do, so right now the ioctl value for a given command is the * command itself). * * Hence, we just define the IOCTL_VMCI_foo values directly, with no * intermediate IOCTLCMD_ representation. */ # define IOCTLCMD(_cmd) IOCTL_VMCI_ ## _cmd #else // if defined(__linux__) /* * On platforms other than Linux, IOCTLCMD_foo values are just numbers, and * we build the IOCTL_VMCI_foo values around these using platform-specific * format for encoding arguments and sizes. */ # define IOCTLCMD(_cmd) IOCTLCMD_VMCI_ ## _cmd #endif enum IOCTLCmd_VMCI { /* * We need to bracket the range of values used for ioctls, because x86_64 * Linux forces us to explicitly register ioctl handlers by value for * handling 32 bit ioctl syscalls. Hence FIRST and LAST. Pick something * for FIRST that doesn't collide with vmmon (2001+). */ #if defined(__linux__) IOCTLCMD(FIRST) = 1951, #else /* Start at 0. */ IOCTLCMD(FIRST), #endif IOCTLCMD(VERSION) = IOCTLCMD(FIRST), /* BEGIN VMCI */ IOCTLCMD(INIT_CONTEXT), IOCTLCMD(CREATE_PROCESS), IOCTLCMD(CREATE_DATAGRAM_PROCESS), IOCTLCMD(SHAREDMEM_CREATE), IOCTLCMD(SHAREDMEM_ATTACH), IOCTLCMD(SHAREDMEM_QUERY), IOCTLCMD(SHAREDMEM_DETACH), IOCTLCMD(VERSION2), IOCTLCMD(QUEUEPAIR_ALLOC), IOCTLCMD(QUEUEPAIR_SETPAGEFILE), IOCTLCMD(QUEUEPAIR_DETACH), IOCTLCMD(DATAGRAM_SEND), IOCTLCMD(DATAGRAM_RECEIVE), IOCTLCMD(DATAGRAM_REQUEST_MAP), IOCTLCMD(DATAGRAM_REMOVE_MAP), IOCTLCMD(CTX_ADD_NOTIFICATION), IOCTLCMD(CTX_REMOVE_NOTIFICATION), IOCTLCMD(CTX_GET_CPT_STATE), IOCTLCMD(CTX_SET_CPT_STATE), IOCTLCMD(GET_CONTEXT_ID), /* END VMCI */ /* * BEGIN VMCI SOCKETS * * We mark the end of the vmci commands and the start of the vmci sockets * commands since they are used in separate modules on Linux. * */ IOCTLCMD(LAST), IOCTLCMD(SOCKETS_FIRST) = IOCTLCMD(LAST), IOCTLCMD(SOCKETS_ACCEPT) = IOCTLCMD(SOCKETS_FIRST), IOCTLCMD(SOCKETS_BIND), IOCTLCMD(SOCKETS_CLOSE), IOCTLCMD(SOCKETS_CONNECT), /* * The next two values are public (vmci_sockets.h) and cannot be changed. * That means the number of values above these cannot be changed either * unless the base index (specified below) is updated accordingly. */ IOCTLCMD(SOCKETS_GET_AF_VALUE), IOCTLCMD(SOCKETS_GET_LOCAL_CID), IOCTLCMD(SOCKETS_GET_SOCK_NAME), IOCTLCMD(SOCKETS_GET_SOCK_OPT), IOCTLCMD(SOCKETS_GET_VM_BY_NAME), IOCTLCMD(SOCKETS_LISTEN), IOCTLCMD(SOCKETS_RECV), IOCTLCMD(SOCKETS_RECV_FROM), IOCTLCMD(SOCKETS_SELECT), IOCTLCMD(SOCKETS_SEND), IOCTLCMD(SOCKETS_SEND_TO), IOCTLCMD(SOCKETS_SET_SOCK_OPT), IOCTLCMD(SOCKETS_SHUTDOWN), IOCTLCMD(SOCKETS_SOCKET), /* END VMCI SOCKETS */ // Must be last. IOCTLCMD(SOCKETS_LAST) }; #if defined _WIN32 /* * Windows VMCI ioctl definitions. */ /* These values cannot be changed since some of the ioctl values are public. */ #define FILE_DEVICE_VMCI 0x8103 #define VMCI_IOCTL_BASE_INDEX 0x801 #define VMCIIOCTL_BUFFERED(name) \ CTL_CODE(FILE_DEVICE_VMCI, \ VMCI_IOCTL_BASE_INDEX + IOCTLCMD_VMCI_ ## name, \ METHOD_BUFFERED, \ FILE_ANY_ACCESS) #define VMCIIOCTL_NEITHER(name) \ CTL_CODE(FILE_DEVICE_VMCI, \ VMCI_IOCTL_BASE_INDEX + IOCTLCMD_VMCI_ ## name, \ METHOD_NEITHER, \ FILE_ANY_ACCESS) #define IOCTL_VMCI_VERSION VMCIIOCTL_BUFFERED(VERSION) /* BEGIN VMCI */ #define IOCTL_VMCI_INIT_CONTEXT VMCIIOCTL_BUFFERED(INIT_CONTEXT) #define IOCTL_VMCI_CREATE_PROCESS VMCIIOCTL_BUFFERED(CREATE_PROCESS) #define IOCTL_VMCI_CREATE_DATAGRAM_PROCESS \ VMCIIOCTL_BUFFERED(CREATE_DATAGRAM_PROCESS) #define IOCTL_VMCI_HYPERCALL VMCIIOCTL_BUFFERED(HYPERCALL) #define IOCTL_VMCI_SHAREDMEM_CREATE \ VMCIIOCTL_BUFFERED(SHAREDMEM_CREATE) #define IOCTL_VMCI_SHAREDMEM_ATTACH \ VMCIIOCTL_BUFFERED(SHAREDMEM_ATTACH) #define IOCTL_VMCI_SHAREDMEM_QUERY \ VMCIIOCTL_BUFFERED(SHAREDMEM_QUERY) #define IOCTL_VMCI_SHAREDMEM_DETACH \ VMCIIOCTL_BUFFERED(SHAREDMEM_DETACH) #define IOCTL_VMCI_VERSION2 VMCIIOCTL_BUFFERED(VERSION2) #define IOCTL_VMCI_QUEUEPAIR_ALLOC \ VMCIIOCTL_BUFFERED(QUEUEPAIR_ALLOC) #define IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE \ VMCIIOCTL_BUFFERED(QUEUEPAIR_SETPAGEFILE) #define IOCTL_VMCI_QUEUEPAIR_DETACH \ VMCIIOCTL_BUFFERED(QUEUEPAIR_DETACH) #define IOCTL_VMCI_DATAGRAM_SEND VMCIIOCTL_BUFFERED(DATAGRAM_SEND) #define IOCTL_VMCI_DATAGRAM_RECEIVE VMCIIOCTL_NEITHER(DATAGRAM_RECEIVE) #define IOCTL_VMCI_DATAGRAM_REQUEST_MAP VMCIIOCTL_BUFFERED(DATAGRAM_REQUEST_MAP) #define IOCTL_VMCI_DATAGRAM_REMOVE_MAP VMCIIOCTL_BUFFERED(DATAGRAM_REMOVE_MAP) #define IOCTL_VMCI_CTX_ADD_NOTIFICATION VMCIIOCTL_BUFFERED(CTX_ADD_NOTIFICATION) #define IOCTL_VMCI_CTX_REMOVE_NOTIFICATION \ VMCIIOCTL_BUFFERED(CTX_REMOVE_NOTIFICATION) #define IOCTL_VMCI_CTX_GET_CPT_STATE \ VMCIIOCTL_BUFFERED(CTX_GET_CPT_STATE) #define IOCTL_VMCI_CTX_SET_CPT_STATE \ VMCIIOCTL_BUFFERED(CTX_SET_CPT_STATE) #define IOCTL_VMCI_GET_CONTEXT_ID \ VMCIIOCTL_BUFFERED(GET_CONTEXT_ID) /* END VMCI */ /* BEGIN VMCI SOCKETS */ #define IOCTL_VMCI_SOCKETS_ACCEPT \ VMCIIOCTL_BUFFERED(SOCKETS_ACCEPT) #define IOCTL_VMCI_SOCKETS_BIND \ VMCIIOCTL_BUFFERED(SOCKETS_BIND) #define IOCTL_VMCI_SOCKETS_CLOSE \ VMCIIOCTL_BUFFERED(SOCKETS_CLOSE) #define IOCTL_VMCI_SOCKETS_CONNECT \ VMCIIOCTL_BUFFERED(SOCKETS_CONNECT) #define IOCTL_VMCI_SOCKETS_GET_AF_VALUE \ VMCIIOCTL_BUFFERED(SOCKETS_GET_AF_VALUE) #define IOCTL_VMCI_SOCKETS_GET_LOCAL_CID \ VMCIIOCTL_BUFFERED(SOCKETS_GET_LOCAL_CID) #define IOCTL_VMCI_SOCKETS_GET_SOCK_NAME \ VMCIIOCTL_BUFFERED(SOCKETS_GET_SOCK_NAME) #define IOCTL_VMCI_SOCKETS_GET_SOCK_OPT \ VMCIIOCTL_BUFFERED(SOCKETS_GET_SOCK_OPT) #define IOCTL_VMCI_SOCKETS_GET_VM_BY_NAME \ VMCIIOCTL_BUFFERED(SOCKETS_GET_VM_BY_NAME) #define IOCTL_VMCI_SOCKETS_LISTEN \ VMCIIOCTL_BUFFERED(SOCKETS_LISTEN) #define IOCTL_VMCI_SOCKETS_RECV \ VMCIIOCTL_BUFFERED(SOCKETS_RECV) #define IOCTL_VMCI_SOCKETS_RECV_FROM \ VMCIIOCTL_BUFFERED(SOCKETS_RECV_FROM) #define IOCTL_VMCI_SOCKETS_SELECT \ VMCIIOCTL_BUFFERED(SOCKETS_SELECT) #define IOCTL_VMCI_SOCKETS_SEND \ VMCIIOCTL_BUFFERED(SOCKETS_SEND) #define IOCTL_VMCI_SOCKETS_SEND_TO \ VMCIIOCTL_BUFFERED(SOCKETS_SEND_TO) #define IOCTL_VMCI_SOCKETS_SET_SOCK_OPT \ VMCIIOCTL_BUFFERED(SOCKETS_SET_SOCK_OPT) #define IOCTL_VMCI_SOCKETS_SHUTDOWN \ VMCIIOCTL_BUFFERED(SOCKETS_SHUTDOWN) #define IOCTL_VMCI_SOCKETS_SOCKET \ VMCIIOCTL_BUFFERED(SOCKETS_SOCKET) /* END VMCI SOCKETS */ #endif // _WIN32 /* * VMCI driver initialization. This block can also be used to * pass initial group membership etc. */ typedef struct VMCIInitBlock { VMCIId cid; VMCIPrivilegeFlags flags; #ifdef _WIN32 uint64 event; /* Handle for signalling vmci calls on windows. */ #endif // _WIN32 } VMCIInitBlock; typedef struct VMCISharedMemInfo { VMCIHandle handle; uint32 size; uint32 result; VA64 va; /* Currently only used in the guest. */ char pageFileName[VMCI_PATH_MAX]; } VMCISharedMemInfo; typedef struct VMCIQueuePairAllocInfo { VMCIHandle handle; VMCIId peer; uint32 flags; uint64 produceSize; uint64 consumeSize; VA64 producePageFile; /* User VA. */ VA64 consumePageFile; /* User VA. */ uint64 producePageFileSize; /* Size of the file name array. */ uint64 consumePageFileSize; /* Size of the file name array. */ int32 result; uint32 _pad; } VMCIQueuePairAllocInfo; typedef struct VMCIQueuePairPageFileInfo { VMCIHandle handle; VA64 producePageFile; /* User VA. */ VA64 consumePageFile; /* User VA. */ uint64 producePageFileSize; /* Size of the file name array. */ uint64 consumePageFileSize; /* Size of the file name array. */ int32 result; uint32 _pad; } VMCIQueuePairPageFileInfo; typedef struct VMCIQueuePairDetachInfo { VMCIHandle handle; int32 result; uint32 _pad; } VMCIQueuePairDetachInfo; typedef struct VMCIDatagramSendRecvInfo { VA64 addr; uint32 len; int32 result; } VMCIDatagramSendRecvInfo; /* Used to create datagram endpoints in guest or host userlevel. */ typedef struct VMCIDatagramCreateInfo { VMCIId resourceID; uint32 flags; int eventHnd; int result; // result of handle create operation VMCIHandle handle; // handle if successfull } VMCIDatagramCreateInfo; /* Used to add/remove well-known datagram mappings. */ typedef struct VMCIDatagramMapInfo { VMCIId wellKnownID; int result; } VMCIDatagramMapInfo; /* Used to add/remove remote context notifications. */ typedef struct VMCINotifyAddRemoveInfo { VMCIId remoteCID; int result; } VMCINotifyAddRemoveInfo; /* Used to set/get current context's checkpoint state. */ typedef struct VMCICptBufInfo { VA64 cptBuf; uint32 cptType; uint32 bufSize; int32 result; uint32 _pad; } VMCICptBufInfo; #ifdef __APPLE__ /* * Mac OS ioctl definitions. * * Mac OS defines _IO* macros, and the core kernel code uses the size encoded * in the ioctl value to copy the memory back and forth (depending on the * direction encoded in the ioctl value) between the user and kernel address * spaces. * See iocontrolsMacOS.h for details on how this is done. We use sockets only * for vmci. */ #include enum VMCrossTalkSockOpt { VMCI_SO_VERSION = 0, VMCI_SO_CONTEXT = IOCTL_VMCI_INIT_CONTEXT, VMCI_SO_PROCESS = IOCTL_VMCI_CREATE_PROCESS, VMCI_SO_DATAGRAM_PROCESS = IOCTL_VMCI_CREATE_DATAGRAM_PROCESS, VMCI_SO_SHAREDMEM_CREATE = IOCTL_VMCI_SHAREDMEM_CREATE, VMCI_SO_SHAREDMEM_ATTACH = IOCTL_VMCI_SHAREDMEM_ATTACH, VMCI_SO_SHAREDMEM_QUERY = IOCTL_VMCI_SHAREDMEM_QUERY, VMCI_SO_SHAREDMEM_DETACH = IOCTL_VMCI_SHAREDMEM_DETACH, VMCI_SO_VERSION2 = IOCTL_VMCI_VERSION2, VMCI_SO_QUEUEPAIR_ALLOC = IOCTL_VMCI_QUEUEPAIR_ALLOC, VMCI_SO_QUEUEPAIR_SETPAGEFILE = IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE, VMCI_SO_QUEUEPAIR_DETACH = IOCTL_VMCI_QUEUEPAIR_DETACH, VMCI_SO_DATAGRAM_SEND = IOCTL_VMCI_DATAGRAM_SEND, VMCI_SO_DATAGRAM_RECEIVE = IOCTL_VMCI_DATAGRAM_RECEIVE, VMCI_SO_DATAGRAM_REQUEST_MAP = IOCTL_VMCI_DATAGRAM_REQUEST_MAP, VMCI_SO_DATAGRAM_REMOVE_MAP = IOCTL_VMCI_DATAGRAM_REMOVE_MAP, VMCI_SO_CTX_ADD_NOTIFICATION = IOCTL_VMCI_CTX_ADD_NOTIFICATION, VMCI_SO_CTX_REMOVE_NOTIFICATION = IOCTL_VMCI_CTX_REMOVE_NOTIFICATION, VMCI_SO_CTX_GET_CPT_STATE = IOCTL_VMCI_CTX_GET_CPT_STATE, VMCI_SO_CTX_SET_CPT_STATE = IOCTL_VMCI_CTX_SET_CPT_STATE, VMCI_SO_GET_CONTEXT_ID = IOCTL_VMCI_GET_CONTEXT_ID, VMCI_SO_USERFD, }; # define VMCI_MACOS_HOST_DEVICE_BASE "com.vmware.kext.vmci" # ifdef VMX86_DEVEL # define VMCI_MACOS_HOST_DEVICE VMCI_MACOS_HOST_DEVICE_BASE ".devel" # else # define VMCI_MACOS_HOST_DEVICE VMCI_MACOS_HOST_DEVICE_BASE # endif #endif /* Clean up helper macros */ #undef IOCTLCMD #endif // ifndef _VMCI_IOCONTROLS_H_ vsock-only/include/vmci_queue_pair.h0000444000000000000000000005113312025726714016640 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef _VMCI_QUEUE_PAIR_H_ #define _VMCI_QUEUE_PAIR_H_ /* * * vmci_queue_pair.h -- * * Defines queue layout in memory, and helper functions to enqueue and * dequeue items. XXX needs checksumming? */ #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMX #include "includeCheck.h" #include "vm_basic_defs.h" #include "vm_basic_types.h" #include "vm_atomic.h" #include "vmci_defs.h" #include "vm_assert.h" #if defined(__linux__) && defined(__KERNEL__) # include "vmci_kernel_if.h" #endif #if defined(__linux__) && defined(__KERNEL__) struct page; #endif /* * For a queue of buffer 'size' bytes, the tail and head pointers will be in * the range [0, size-1]. */ typedef struct VMCIQueueHeader { /* All fields are 64bit and aligned. */ VMCIHandle handle; /* Identifier. */ Atomic_uint64 producerTail; /* Offset in this queue. */ Atomic_uint64 consumerHead; /* Offset in peer queue. */ } VMCIQueueHeader; typedef struct VMCIQueue { VMCIQueueHeader queueHeader; uint8 _padding[PAGE_SIZE - sizeof(VMCIQueueHeader)]; #if defined(__linux__) && defined(__KERNEL__) struct page *page[0]; /* List of pages containing queue data. */ #else uint8 buffer[0]; /* Buffer containing data. */ #endif } VMCIQueue; typedef int VMCIMemcpyToQueueFunc(VMCIQueue *queue, uint64 queueOffset, const void *src, size_t srcOffset, size_t size); typedef int VMCIMemcpyFromQueueFunc(void *dest, size_t destOffset, const VMCIQueue *queue, uint64 queueOffset, size_t size); #if defined(__linux__) && defined(__KERNEL__) int VMCIMemcpyToQueue(VMCIQueue *queue, uint64 queueOffset, const void *src, size_t srcOffset, size_t size); int VMCIMemcpyFromQueue(void *dest, size_t destOffset, const VMCIQueue *queue, uint64 queueOffset, size_t size); int VMCIMemcpyToQueueV(VMCIQueue *queue, uint64 queueOffset, const void *src, size_t srcOffset, size_t size); int VMCIMemcpyFromQueueV(void *dest, size_t destOffset, const VMCIQueue *queue, uint64 queueOffset, size_t size); #elif defined(_WIN32) && defined(WINNT_DDK) int VMCIMemcpyToQueue(VMCIQueue *queue, uint64 queueOffset, const void *src, size_t srcOffset, size_t size); int VMCIMemcpyFromQueue(void *dest, size_t destOffset, const VMCIQueue *queue, uint64 queueOffset, size_t size); #else /* *----------------------------------------------------------------------------- * * VMCIMemcpyToQueue -- * * Wrapper for memcpy --- copies from a given buffer to a VMCI Queue. * Assumes that offset + size does not wrap around in the queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE int VMCIMemcpyToQueue(VMCIQueue *queue, // OUT: uint64 queueOffset, // IN: const void *src, // IN: size_t srcOffset, // IN: size_t size) // IN: { memcpy(queue->buffer + queueOffset, (uint8 *)src + srcOffset, size); return 0; } /* *----------------------------------------------------------------------------- * * VMCIMemcpyFromQueue -- * * Wrapper for memcpy --- copies to a given buffer from a VMCI Queue. * Assumes that offset + size does not wrap around in the queue. * * Results: * Zero on success, negative error code on failure. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE int VMCIMemcpyFromQueue(void *dest, // OUT: size_t destOffset, // IN: const VMCIQueue *queue, // IN: uint64 queueOffset, // IN: size_t size) // IN: { memcpy((uint8 *)dest + destOffset, queue->buffer + queueOffset, size); return 0; } #endif /* __linux__ && __KERNEL__ */ /* * If one client of a QueuePair is a 32bit entity, we restrict the QueuePair * size to be less than 4GB, and use 32bit atomic operations on the head and * tail pointers. 64bit atomic read on a 32bit entity involves cmpxchg8b which * is an atomic read-modify-write. This will cause traces to fire when a 32bit * consumer tries to read the producer's tail pointer, for example, because the * consumer has read-only access to the producer's tail pointer. * * We provide the following macros to invoke 32bit or 64bit atomic operations * based on the architecture the code is being compiled on. */ /* Architecture independent maximum queue size. */ #define QP_MAX_QUEUE_SIZE_ARCH_ANY CONST64U(0xffffffff) #ifdef __x86_64__ # define QP_MAX_QUEUE_SIZE_ARCH CONST64U(0xffffffffffffffff) # define QPAtomic_ReadOffset(x) Atomic_Read64(x) # define QPAtomic_WriteOffset(x, y) Atomic_Write64(x, y) #else # define QP_MAX_QUEUE_SIZE_ARCH CONST64U(0xffffffff) # define QPAtomic_ReadOffset(x) Atomic_Read32((Atomic_uint32 *)(x)) # define QPAtomic_WriteOffset(x, y) \ Atomic_Write32((Atomic_uint32 *)(x), (uint32)(y)) #endif /* *----------------------------------------------------------------------------- * * VMCIQueue_CheckAlignment -- * * Checks if the given queue is aligned to page boundary. * * Results: * TRUE or FALSE. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE Bool VMCIQueue_CheckAlignment(const VMCIQueue *queue) // IN: { return ((uintptr_t)queue & (PAGE_SIZE - 1)) == 0; } static INLINE void VMCIQueue_GetPointers(const VMCIQueue *produceQ, const VMCIQueue *consumeQ, uint64 *producerTail, uint64 *consumerHead) { *producerTail = QPAtomic_ReadOffset(&produceQ->queueHeader.producerTail); *consumerHead = QPAtomic_ReadOffset(&consumeQ->queueHeader.consumerHead); } /* *----------------------------------------------------------------------------- * * VMCIQueue_ResetPointers -- * * Reset the tail pointer (of "this" queue) and the head pointer (of * "peer" queue). * * Results: * None. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE void VMCIQueue_ResetPointers(VMCIQueue *queue) // IN: { QPAtomic_WriteOffset(&queue->queueHeader.producerTail, CONST64U(0)); QPAtomic_WriteOffset(&queue->queueHeader.consumerHead, CONST64U(0)); } /* *----------------------------------------------------------------------------- * * VMCIQueue_Init -- * * Initializes a queue's state (head & tail pointers). * * Results: * None. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE void VMCIQueue_Init(const VMCIHandle handle, // IN: VMCIQueue *queue) // IN: { ASSERT_NOT_IMPLEMENTED(VMCIQueue_CheckAlignment(queue)); queue->queueHeader.handle = handle; VMCIQueue_ResetPointers(queue); } /* *----------------------------------------------------------------------------- * * VMCIQueueFreeSpaceInt -- * * Finds available free space in a produce queue to enqueue more * data or reports an error if queue pair corruption is detected. * * Results: * Free space size in bytes. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE int VMCIQueueFreeSpaceInt(const VMCIQueue *produceQueue, // IN: const VMCIQueue *consumeQueue, // IN: const uint64 produceQSize, // IN: uint64 *freeSpace) // OUT: { const uint64 tail = QPAtomic_ReadOffset(&produceQueue->queueHeader.producerTail); const uint64 head = QPAtomic_ReadOffset(&consumeQueue->queueHeader.consumerHead); ASSERT(freeSpace); if (tail >= produceQSize || head >= produceQSize) { return VMCI_ERROR_INVALID_SIZE; } /* * Deduct 1 to avoid tail becoming equal to head which causes ambiguity. If * head and tail are equal it means that the queue is empty. */ if (tail >= head) { *freeSpace = produceQSize - (tail - head) - 1; } else { *freeSpace = head - tail - 1; } return VMCI_SUCCESS; } /* *----------------------------------------------------------------------------- * * VMCIQueue_FreeSpace -- * * Finds available free space in a produce queue to enqueue more data. * * Results: * On success, free space size in bytes (up to MAX_INT64). * On failure, appropriate error code. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE int64 VMCIQueue_FreeSpace(const VMCIQueue *produceQueue, // IN: const VMCIQueue *consumeQueue, // IN: const uint64 produceQSize) // IN: { uint64 freeSpace; int retval; retval = VMCIQueueFreeSpaceInt(produceQueue, consumeQueue, produceQSize, &freeSpace); if (retval != VMCI_SUCCESS) { return retval; } return MIN(freeSpace, MAX_INT64); } /* *----------------------------------------------------------------------------- * * VMCIQueue_BufReady -- * * Finds available data to dequeue from a consume queue. * * Results: * On success, available data size in bytes (up to MAX_INT64). * On failure, appropriate error code. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE int64 VMCIQueue_BufReady(const VMCIQueue *consumeQueue, // IN: const VMCIQueue *produceQueue, // IN: const uint64 consumeQSize) // IN: { int retval; uint64 freeSpace; retval = VMCIQueueFreeSpaceInt(consumeQueue, produceQueue, consumeQSize, &freeSpace); if (retval != VMCI_SUCCESS) { return retval; } else { uint64 available = consumeQSize - freeSpace - 1; return MIN(available, MAX_INT64); } } /* *----------------------------------------------------------------------------- * * AddPointer -- * * Helper to add a given offset to a head or tail pointer. Wraps the value * of the pointer around the max size of the queue. * * Results: * None. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE void AddPointer(Atomic_uint64 *var, // IN: size_t add, // IN: uint64 max) // IN: { uint64 newVal = QPAtomic_ReadOffset(var); if (newVal >= max - add) { newVal -= max; } newVal += add; QPAtomic_WriteOffset(var, newVal); } /* *----------------------------------------------------------------------------- * * __VMCIQueue_Enqueue -- * * Enqueues a given buffer to the produce queue using the provided * function. As many bytes as possible (space available in the queue) * are enqueued. * * Results: * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue data. * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue * (as defined by the queue size). * Otherwise, the number of bytes written to the queue is returned. * * Side effects: * Updates the tail pointer of the produce queue. * *----------------------------------------------------------------------------- */ static INLINE ssize_t __VMCIQueue_Enqueue(VMCIQueue *produceQueue, // IN: const VMCIQueue *consumeQueue, // IN: const uint64 produceQSize, // IN: const void *buf, // IN: size_t bufSize, // IN: VMCIMemcpyToQueueFunc memcpyToQueue) // IN: { const int64 freeSpace = VMCIQueue_FreeSpace(produceQueue, consumeQueue, produceQSize); const uint64 tail = QPAtomic_ReadOffset(&produceQueue->queueHeader.producerTail); size_t written; if (!freeSpace) { return VMCI_ERROR_QUEUEPAIR_NOSPACE; } if (freeSpace < 0) { return (ssize_t)freeSpace; } written = (size_t)(freeSpace > bufSize ? bufSize : freeSpace); if (LIKELY(tail + written < produceQSize)) { memcpyToQueue(produceQueue, tail, buf, 0, written); } else { /* Tail pointer wraps around. */ const size_t tmp = (size_t)(produceQSize - tail); memcpyToQueue(produceQueue, tail, buf, 0, tmp); memcpyToQueue(produceQueue, 0, buf, tmp, written - tmp); } AddPointer(&produceQueue->queueHeader.producerTail, written, produceQSize); return written; } /* *----------------------------------------------------------------------------- * * VMCIQueue_Enqueue -- * * Enqueues a given buffer to the produce queue. As many bytes as possible * (space available in the queue) are enqueued. If bufSize is larger than * the maximum value of ssize_t the result is unspecified. * * Results: * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue data. * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue * (as defined by the queue size). * Otherwise, the number of bytes written to the queue is returned. * * Side effects: * Updates the tail pointer of the produce queue. * *----------------------------------------------------------------------------- */ static INLINE ssize_t VMCIQueue_Enqueue(VMCIQueue *produceQueue, // IN: const VMCIQueue *consumeQueue, // IN: const uint64 produceQSize, // IN: const void *buf, // IN: size_t bufSize) // IN: { return __VMCIQueue_Enqueue(produceQueue, consumeQueue, produceQSize, buf, bufSize, VMCIMemcpyToQueue); } #if defined(__linux__) && defined(__KERNEL__) /* *----------------------------------------------------------------------------- * * VMCIQueue_EnqueueV -- * * Enqueues a given iovec to the produce queue. As many bytes as possible * (space available in the queue) are enqueued. If bufSize is larger than * the maximum value of ssize_t the result is unspecified. * * Results: * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue data. * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue * (as defined by the queue size). * Otherwise, the number of bytes written to the queue is returned. * * Side effects: * Updates the tail pointer of the produce queue. * *----------------------------------------------------------------------------- */ static INLINE ssize_t VMCIQueue_EnqueueV(VMCIQueue *produceQueue, // IN: const VMCIQueue *consumeQueue, // IN: const uint64 produceQSize, // IN: struct iovec *iov, // IN: size_t iovSize) // IN: { return __VMCIQueue_Enqueue(produceQueue, consumeQueue, produceQSize, (void *)iov, iovSize, VMCIMemcpyToQueueV); } #endif /* *----------------------------------------------------------------------------- * * __VMCIQueue_Dequeue -- * * Dequeues data (if available) from the given consume queue. Writes data * to the user provided buffer using the provided function. * * Results: * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue. * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue * (as defined by the queue size). * Otherwise the number of bytes dequeued is returned. * * Side effects: * Updates the head pointer of the consume queue. * *----------------------------------------------------------------------------- */ static INLINE ssize_t __VMCIQueue_Dequeue(VMCIQueue *produceQueue, // IN: const VMCIQueue *consumeQueue, // IN: const uint64 consumeQSize, // IN: void *buf, // IN: size_t bufSize, // IN: VMCIMemcpyFromQueueFunc memcpyFromQueue) // IN: { const int64 bufReady = VMCIQueue_BufReady(consumeQueue, produceQueue, consumeQSize); const uint64 head = QPAtomic_ReadOffset(&produceQueue->queueHeader.consumerHead); size_t written; if (!bufReady) { return VMCI_ERROR_QUEUEPAIR_NODATA; } if (bufReady < 0) { return (ssize_t)bufReady; } written = (size_t)(bufReady > bufSize ? bufSize : bufReady); if (LIKELY(head + written < consumeQSize)) { memcpyFromQueue(buf, 0, consumeQueue, head, written); } else { /* Head pointer wraps around. */ const size_t tmp = (size_t)(consumeQSize - head); memcpyFromQueue(buf, 0, consumeQueue, head, tmp); memcpyFromQueue(buf, tmp, consumeQueue, 0, written - tmp); } AddPointer(&produceQueue->queueHeader.consumerHead, written, consumeQSize); return written; } /* *----------------------------------------------------------------------------- * * VMCIQueue_Dequeue -- * * Dequeues data (if available) from the given consume queue. Writes data * to the user provided buffer. If bufSize is larger than the maximum * value of ssize_t the result is unspecified. * * Results: * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue. * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue * (as defined by the queue size). * Otherwise the number of bytes dequeued is returned. * * Side effects: * Updates the head pointer of the consume queue. * *----------------------------------------------------------------------------- */ static INLINE ssize_t VMCIQueue_Dequeue(VMCIQueue *produceQueue, // IN: const VMCIQueue *consumeQueue, // IN: const uint64 consumeQSize, // IN: void *buf, // IN: size_t bufSize) // IN: { return __VMCIQueue_Dequeue(produceQueue, consumeQueue, consumeQSize, buf, bufSize, VMCIMemcpyFromQueue); } #if defined(__linux__) && defined(__KERNEL__) /* *----------------------------------------------------------------------------- * * VMCIQueue_DequeueV -- * * Dequeues data (if available) from the given consume queue. Writes data * to the user provided iovec. If bufSize is larger than the maximum * value of ssize_t the result is unspecified. * * Results: * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue. * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue * (as defined by the queue size). * Otherwise the number of bytes dequeued is returned. * * Side effects: * Updates the head pointer of the consume queue. * *----------------------------------------------------------------------------- */ static INLINE ssize_t VMCIQueue_DequeueV(VMCIQueue *produceQueue, // IN: const VMCIQueue *consumeQueue, // IN: const uint64 consumeQSize, // IN: struct iovec *iov, // IN: size_t iovSize) // IN: { return __VMCIQueue_Dequeue(produceQueue, consumeQueue, consumeQSize, (void *)iov, iovSize, VMCIMemcpyFromQueueV); } #endif #endif /* !_VMCI_QUEUE_PAIR_H_ */ vsock-only/include/vm_atomic.h0000444000000000000000000015202112025726715015440 0ustar rootroot/********************************************************* * Copyright (C) 1998 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vm_atomic.h -- * * Atomic power */ #ifndef _ATOMIC_H_ #define _ATOMIC_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMEXT #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMNIXMOD #define INCLUDE_ALLOW_VMKDRIVERS #define INCLUDE_ALLOW_VMK_MODULE #define INCLUDE_ALLOW_VMKERNEL #define INCLUDE_ALLOW_DISTRIBUTE #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMIROM #include "includeCheck.h" #include "vm_basic_types.h" /* Basic atomic type: 32 bits */ typedef struct Atomic_uint32 { volatile uint32 value; } Atomic_uint32; /* Basic atomic type: 64 bits */ typedef struct Atomic_uint64 { volatile uint64 value; } Atomic_uint64 ALIGNED(8); /* * Prototypes for msft atomics. These are defined & inlined by the * compiler so no function definition is needed. The prototypes are * needed for c++. Since amd64 compiler doesn't support inline asm we * have to use these. Unfortunately, we still have to use some inline asm * for the 32 bit code since the and/or/xor implementations didn't show up * untill xp or 2k3. * * The declarations for the intrinsic functions were taken from ntddk.h * in the DDK. The declarations must match otherwise the 64-bit c++ * compiler will complain about second linkage of the intrinsic functions. * We define the intrinsic using the basic types corresponding to the * Windows typedefs. This avoids having to include windows header files * to get to the windows types. */ #if defined(_MSC_VER) && _MSC_VER >= 1310 #ifdef __cplusplus extern "C" { #endif long _InterlockedExchange(long volatile*, long); long _InterlockedCompareExchange(long volatile*, long, long); long _InterlockedExchangeAdd(long volatile*, long); long _InterlockedDecrement(long volatile*); long _InterlockedIncrement(long volatile*); #pragma intrinsic(_InterlockedExchange, _InterlockedCompareExchange) #pragma intrinsic(_InterlockedExchangeAdd, _InterlockedDecrement) #pragma intrinsic(_InterlockedIncrement) #if defined(VM_X86_64) long _InterlockedAnd(long volatile*, long); __int64 _InterlockedAnd64(__int64 volatile*, __int64); long _InterlockedOr(long volatile*, long); __int64 _InterlockedOr64(__int64 volatile*, __int64); long _InterlockedXor(long volatile*, long); __int64 _InterlockedXor64(__int64 volatile*, __int64); __int64 _InterlockedExchangeAdd64(__int64 volatile*, __int64); __int64 _InterlockedIncrement64(__int64 volatile*); __int64 _InterlockedDecrement64(__int64 volatile*); __int64 _InterlockedExchange64(__int64 volatile*, __int64); __int64 _InterlockedCompareExchange64(__int64 volatile*, __int64, __int64); #if !defined(_WIN64) #pragma intrinsic(_InterlockedAnd, _InterlockedAnd64) #pragma intrinsic(_InterlockedOr, _InterlockedOr64) #pragma intrinsic(_InterlockedXor, _InterlockedXor64) #pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedIncrement64) #pragma intrinsic(_InterlockedDecrement64, _InterlockedExchange64) #pragma intrinsic(_InterlockedCompareExchange64) #endif /* !_WIN64 */ #endif /* __x86_64__ */ #ifdef __cplusplus } #endif #endif /* _MSC_VER */ /* Convert a volatile int to Atomic_uint32. */ static INLINE Atomic_uint32 * Atomic_VolatileToAtomic(volatile uint32 *var) { return (Atomic_uint32 *)var; } /* *----------------------------------------------------------------------------- * * Atomic_Init, Atomic_SetFence, AtomicUseFence -- * * Determine whether an lfence intruction is executed after * every locked instruction. * * Certain AMD processes have a bug (see bug 107024) that * requires an lfence after every locked instruction. * * The global variable AtomicUseFence controls whether lfence * is used (see AtomicEpilogue). * * Atomic_SetFence sets AtomicUseFence to the given value. * * Atomic_Init computes and sets AtomicUseFence. * It does not take into account the number of processors. * * The rationale for all this complexity is that Atomic_Init * is the easy-to-use interface. It can be called a number * of times cheaply, and does not depend on other libraries. * However, because the number of CPUs is difficult to compute, * it does without it and always assumes there are more than one. * * For programs that care or have special requirements, * Atomic_SetFence can be called directly, in addition to Atomic_Init. * It overrides the effect of Atomic_Init, and can be called * before, after, or between calls to Atomic_Init. * *----------------------------------------------------------------------------- */ // The freebsd assembler doesn't know the lfence instruction #if defined(__GNUC__) && \ __GNUC__ >= 3 && \ !defined(BSD_VERSION) && \ (!defined(MODULE) || defined(__VMKERNEL_MODULE__)) && \ !defined(__APPLE__) /* PR136775 */ #define ATOMIC_USE_FENCE #endif #if defined(VMATOMIC_IMPORT_DLLDATA) VMX86_EXTERN_DATA Bool AtomicUseFence; #else EXTERN Bool AtomicUseFence; #endif EXTERN Bool atomicFenceInitialized; void AtomicInitFence(void); static INLINE void Atomic_Init(void) { #ifdef ATOMIC_USE_FENCE if (!atomicFenceInitialized) { AtomicInitFence(); } #endif } static INLINE void Atomic_SetFence(Bool fenceAfterLock) /* IN: TRUE to enable lfence */ /* FALSE to disable. */ { AtomicUseFence = fenceAfterLock; #if defined(__VMKERNEL__) extern void Atomic_SetFenceVMKAPI(Bool fenceAfterLock); Atomic_SetFenceVMKAPI(fenceAfterLock); #endif atomicFenceInitialized = TRUE; } /* Conditionally execute fence after interlocked instruction. */ static INLINE void AtomicEpilogue(void) { #ifdef ATOMIC_USE_FENCE if (UNLIKELY(AtomicUseFence)) { asm volatile ("lfence" ::: "memory"); } #endif } /* * All the assembly code is tricky and written conservatively. * For example, to make sure gcc won't introduce copies, * we force the addressing mode like this: * * "xchgl %0, (%1)" * : "=r" (val) * : "r" (&var->value), * "0" (val) * : "memory" * * - edward * * Actually - turns out that gcc never generates memory aliases (it * still does generate register aliases though), so we can be a bit * more agressive with the memory constraints. The code above can be * modified like this: * * "xchgl %0, %1" * : "=r" (val), * "=m" (var->value), * : "0" (val), * "1" (var->value) * * The advantages are that gcc can use whatever addressing mode it * likes to access the memory value, and that we dont have to use a * way-too-generic "memory" clobber as there is now an explicit * declaration that var->value is modified. * * see also /usr/include/asm/atomic.h to convince yourself this is a * valid optimization. * * - walken */ /* *----------------------------------------------------------------------------- * * Atomic_Read -- * * Read * * Results: * The value of the atomic variable. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_Read(Atomic_uint32 const *var) // IN { return var->value; } #define Atomic_Read32 Atomic_Read /* *----------------------------------------------------------------------------- * * Atomic_Write -- * * Write * * Results: * None. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Write(Atomic_uint32 *var, // IN uint32 val) // IN { var->value = val; } #define Atomic_Write32 Atomic_Write /* *----------------------------------------------------------------------------- * * Atomic_ReadWrite -- * * Read followed by write * * Results: * The value of the atomic variable before the write. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_ReadWrite(Atomic_uint32 *var, // IN uint32 val) // IN #ifdef __GNUC__ { /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "xchgl %0, %1" # if VM_ASM_PLUS : "=r" (val), "+m" (var->value) : "0" (val) # else : "=r" (val), "=m" (var->value) : "0" (val), "1" (var->value) # endif ); AtomicEpilogue(); return val; } #elif _MSC_VER >= 1310 { return _InterlockedExchange((long *)&var->value, (long)val); } #elif _MSC_VER #pragma warning(push) #pragma warning(disable : 4035) // disable no-return warning { __asm mov eax, val __asm mov ebx, var __asm xchg [ebx]Atomic_uint32.value, eax // eax is the return value, this is documented to work - edward } #pragma warning(pop) #else #error No compiler defined for Atomic_ReadWrite #endif #define Atomic_ReadWrite32 Atomic_ReadWrite /* *----------------------------------------------------------------------------- * * Atomic_ReadIfEqualWrite -- * * Compare exchange: Read variable, if equal to oldVal, write newVal * * Results: * The value of the atomic variable before the write. * * Side effects: * The variable may be modified. * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_ReadIfEqualWrite(Atomic_uint32 *var, // IN uint32 oldVal, // IN uint32 newVal) // IN #ifdef __GNUC__ { uint32 val; /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "lock; cmpxchgl %2, %1" # if VM_ASM_PLUS : "=a" (val), "+m" (var->value) : "r" (newVal), "0" (oldVal) # else : "=a" (val), "=m" (var->value) : "r" (newVal), "0" (oldVal) /* * "1" (var->value): results in inconsistent constraints on gcc 2.7.2.3 * when compiling enterprise-2.2.17-14-RH7.0-update. * The constraint has been commented out for now. We may consider doing * this systematically, but we need to be sure it is the right thing to * do. However, it is also possible that the offending use of this asm * function will be removed in the near future in which case we may * decide to reintroduce the constraint instead. hpreg & agesen. */ # endif : "cc" ); AtomicEpilogue(); return val; } #elif _MSC_VER >= 1310 { return _InterlockedCompareExchange((long *)&var->value, (long)newVal, (long)oldVal); } #elif _MSC_VER #pragma warning(push) #pragma warning(disable : 4035) // disable no-return warning { __asm mov eax, oldVal __asm mov ebx, var __asm mov ecx, newVal __asm lock cmpxchg [ebx]Atomic_uint32.value, ecx // eax is the return value, this is documented to work - edward } #pragma warning(pop) #else #error No compiler defined for Atomic_ReadIfEqualWrite #endif #define Atomic_ReadIfEqualWrite32 Atomic_ReadIfEqualWrite #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_ReadIfEqualWrite64 -- * * Compare exchange: Read variable, if equal to oldVal, write newVal * * Results: * The value of the atomic variable before the write. * * Side effects: * The variable may be modified. * *----------------------------------------------------------------------------- */ static INLINE uint64 Atomic_ReadIfEqualWrite64(Atomic_uint64 *var, // IN uint64 oldVal, // IN uint64 newVal) // IN { #if defined(__GNUC__) uint64 val; /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; cmpxchgq %2, %1" : "=a" (val), "+m" (var->value) : "r" (newVal), "0" (oldVal) : "cc" ); AtomicEpilogue(); return val; #elif _MSC_VER return _InterlockedCompareExchange64((__int64 *)&var->value, (__int64)newVal, (__int64)oldVal); #else #error No compiler defined for Atomic_ReadIfEqualWrite64 #endif } #endif /* *----------------------------------------------------------------------------- * * Atomic_And -- * * Atomic read, bitwise AND with a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_And(Atomic_uint32 *var, // IN uint32 val) // IN { #ifdef __GNUC__ /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "lock; andl %1, %0" # if VM_ASM_PLUS : "+m" (var->value) : "ri" (val) # else : "=m" (var->value) : "ri" (val), "0" (var->value) # endif : "cc" ); AtomicEpilogue(); #elif _MSC_VER #if defined(__x86_64__) _InterlockedAnd((long *)&var->value, (long)val); #else __asm mov eax, val __asm mov ebx, var __asm lock and [ebx]Atomic_uint32.value, eax #endif #else #error No compiler defined for Atomic_And #endif } #define Atomic_And32 Atomic_And #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_And64 -- * * Atomic read, bitwise AND with a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_And64(Atomic_uint64 *var, // IN uint64 val) // IN { #if defined(__GNUC__) /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; andq %1, %0" : "+m" (var->value) : "ri" (val) : "cc" ); AtomicEpilogue(); #elif _MSC_VER _InterlockedAnd64((__int64 *)&var->value, (__int64)val); #else #error No compiler defined for Atomic_And64 #endif } #endif /* *----------------------------------------------------------------------------- * * Atomic_Or -- * * Atomic read, bitwise OR with a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Or(Atomic_uint32 *var, // IN uint32 val) // IN { #ifdef __GNUC__ /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "lock; orl %1, %0" # if VM_ASM_PLUS : "+m" (var->value) : "ri" (val) # else : "=m" (var->value) : "ri" (val), "0" (var->value) # endif : "cc" ); AtomicEpilogue(); #elif _MSC_VER #if defined(__x86_64__) _InterlockedOr((long *)&var->value, (long)val); #else __asm mov eax, val __asm mov ebx, var __asm lock or [ebx]Atomic_uint32.value, eax #endif #else #error No compiler defined for Atomic_Or #endif } #define Atomic_Or32 Atomic_Or #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_Or64 -- * * Atomic read, bitwise OR with a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Or64(Atomic_uint64 *var, // IN uint64 val) // IN { #if defined(__GNUC__) /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; orq %1, %0" : "+m" (var->value) : "ri" (val) : "cc" ); AtomicEpilogue(); #elif _MSC_VER _InterlockedOr64((__int64 *)&var->value, (__int64)val); #else #error No compiler defined for Atomic_Or64 #endif } #endif /* *----------------------------------------------------------------------------- * * Atomic_Xor -- * * Atomic read, bitwise XOR with a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Xor(Atomic_uint32 *var, // IN uint32 val) // IN { #ifdef __GNUC__ /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "lock; xorl %1, %0" # if VM_ASM_PLUS : "+m" (var->value) : "ri" (val) # else : "=m" (var->value) : "ri" (val), "0" (var->value) # endif : "cc" ); AtomicEpilogue(); #elif _MSC_VER #if defined(__x86_64__) _InterlockedXor((long *)&var->value, (long)val); #else __asm mov eax, val __asm mov ebx, var __asm lock xor [ebx]Atomic_uint32.value, eax #endif #else #error No compiler defined for Atomic_Xor #endif } #define Atomic_Xor32 Atomic_Xor #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_Xor64 -- * * Atomic read, bitwise XOR with a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Xor64(Atomic_uint64 *var, // IN uint64 val) // IN { #if defined(__GNUC__) /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; xorq %1, %0" : "+m" (var->value) : "ri" (val) : "cc" ); AtomicEpilogue(); #elif _MSC_VER _InterlockedXor64((__int64 *)&var->value, (__int64)val); #else #error No compiler defined for Atomic_Xor64 #endif } #endif /* *----------------------------------------------------------------------------- * * Atomic_Add -- * * Atomic read, add a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Add(Atomic_uint32 *var, // IN uint32 val) // IN { #ifdef __GNUC__ /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "lock; addl %1, %0" # if VM_ASM_PLUS : "+m" (var->value) : "ri" (val) # else : "=m" (var->value) : "ri" (val), "0" (var->value) # endif : "cc" ); AtomicEpilogue(); #elif _MSC_VER >= 1310 _InterlockedExchangeAdd((long *)&var->value, (long)val); #elif _MSC_VER __asm mov eax, val __asm mov ebx, var __asm lock add [ebx]Atomic_uint32.value, eax #else #error No compiler defined for Atomic_Add #endif } #define Atomic_Add32 Atomic_Add #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_Add64 -- * * Atomic read, add a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Add64(Atomic_uint64 *var, // IN uint64 val) // IN { #if defined(__GNUC__) /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; addq %1, %0" : "+m" (var->value) : "ri" (val) : "cc" ); AtomicEpilogue(); #elif _MSC_VER _InterlockedExchangeAdd64((__int64 *)&var->value, (__int64)val); #else #error No compiler defined for Atomic_Add64 #endif } #endif /* *----------------------------------------------------------------------------- * * Atomic_Sub -- * * Atomic read, subtract a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Sub(Atomic_uint32 *var, // IN uint32 val) // IN { #ifdef __GNUC__ /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "lock; subl %1, %0" # if VM_ASM_PLUS : "+m" (var->value) : "ri" (val) # else : "=m" (var->value) : "ri" (val), "0" (var->value) # endif : "cc" ); AtomicEpilogue(); #elif _MSC_VER >= 1310 _InterlockedExchangeAdd((long *)&var->value, (long)-val); #elif _MSC_VER __asm mov eax, val __asm mov ebx, var __asm lock sub [ebx]Atomic_uint32.value, eax #else #error No compiler defined for Atomic_Sub #endif } #define Atomic_Sub32 Atomic_Sub #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_Sub64 -- * * Atomic read, subtract a value, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Sub64(Atomic_uint64 *var, // IN uint64 val) // IN { #ifdef __GNUC__ /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; subq %1, %0" : "+m" (var->value) : "ri" (val) : "cc" ); AtomicEpilogue(); #elif _MSC_VER _InterlockedExchangeAdd64((__int64 *)&var->value, (__int64)-val); #else #error No compiler defined for Atomic_Sub64 #endif } #endif /* *----------------------------------------------------------------------------- * * Atomic_Inc -- * * Atomic read, increment, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Inc(Atomic_uint32 *var) // IN { #ifdef __GNUC__ /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "lock; incl %0" # if VM_ASM_PLUS : "+m" (var->value) : # else : "=m" (var->value) : "0" (var->value) # endif : "cc" ); AtomicEpilogue(); #elif _MSC_VER >= 1310 _InterlockedIncrement((long *)&var->value); #elif _MSC_VER __asm mov ebx, var __asm lock inc [ebx]Atomic_uint32.value #else #error No compiler defined for Atomic_Inc #endif } #define Atomic_Inc32 Atomic_Inc #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_Inc64 -- * * Atomic read, increment, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Inc64(Atomic_uint64 *var) // IN { #if defined(__GNUC__) /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; incq %0" : "+m" (var->value) : : "cc" ); AtomicEpilogue(); #elif _MSC_VER _InterlockedIncrement64((__int64 *)&var->value); #else #error No compiler defined for Atomic_Inc64 #endif } #endif /* *----------------------------------------------------------------------------- * * Atomic_Dec -- * * Atomic read, decrement, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Dec(Atomic_uint32 *var) // IN { #ifdef __GNUC__ /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( "lock; decl %0" # if VM_ASM_PLUS : "+m" (var->value) : # else : "=m" (var->value) : "0" (var->value) # endif : "cc" ); AtomicEpilogue(); #elif _MSC_VER >= 1310 _InterlockedDecrement((long *)&var->value); #elif _MSC_VER __asm mov ebx, var __asm lock dec [ebx]Atomic_uint32.value #else #error No compiler defined for Atomic_Dec #endif } #define Atomic_Dec32 Atomic_Dec #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_Dec64 -- * * Atomic read, decrement, write. * * Results: * None * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Dec64(Atomic_uint64 *var) // IN { #if defined(__GNUC__) /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; decq %0" : "+m" (var->value) : : "cc" ); AtomicEpilogue(); #elif _MSC_VER _InterlockedDecrement64((__int64 *)&var->value); #else #error No compiler defined for Atomic_Dec64 #endif } #endif /* * Note that the technique below can be used to implement ReadX(), where X is * an arbitrary mathematical function. */ /* *----------------------------------------------------------------------------- * * Atomic_FetchAndOr -- * * Atomic read (returned), bitwise OR with a value, write. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_FetchAndOr(Atomic_uint32 *var, // IN uint32 val) // IN { uint32 res; do { res = var->value; } while (res != Atomic_ReadIfEqualWrite(var, res, res | val)); return res; } /* *----------------------------------------------------------------------------- * * Atomic_FetchAndAnd -- * * Atomic read (returned), bitwise And with a value, write. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_FetchAndAnd(Atomic_uint32 *var, // IN uint32 val) // IN { uint32 res; do { res = var->value; } while (res != Atomic_ReadIfEqualWrite(var, res, res & val)); return res; } #define Atomic_ReadOr32 Atomic_FetchAndOr #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_ReadOr64 -- * * Atomic read (returned), bitwise OR with a value, write. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint64 Atomic_ReadOr64(Atomic_uint64 *var, // IN uint64 val) // IN { uint64 res; do { res = var->value; } while (res != Atomic_ReadIfEqualWrite64(var, res, res | val)); return res; } #endif /* *----------------------------------------------------------------------------- * * Atomic_FetchAndAddUnfenced -- * * Atomic read (returned), add a value, write. * * If you have to implement FetchAndAdd() on an architecture other than * x86 or x86-64, you might want to consider doing something similar to * Atomic_FetchAndOr(). * * The "Unfenced" version of Atomic_FetchAndInc never executes * "lfence" after the interlocked operation. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_FetchAndAddUnfenced(Atomic_uint32 *var, // IN uint32 val) // IN #ifdef __GNUC__ { /* Checked against the Intel manual and GCC --walken */ __asm__ __volatile__( # if VM_ASM_PLUS "lock; xaddl %0, %1" : "=r" (val), "+m" (var->value) : "0" (val) : "cc" # else "lock; xaddl %0, (%1)" : "=r" (val) : "r" (&var->value), "0" (val) : "cc", "memory" # endif ); return val; } #elif _MSC_VER >= 1310 { return _InterlockedExchangeAdd((long *)&var->value, (long)val); } #elif _MSC_VER #pragma warning(push) #pragma warning(disable : 4035) // disable no-return warning { __asm mov eax, val __asm mov ebx, var __asm lock xadd [ebx]Atomic_uint32.value, eax } #pragma warning(pop) #else #error No compiler defined for Atomic_FetchAndAdd #endif #define Atomic_ReadAdd32 Atomic_FetchAndAdd /* *----------------------------------------------------------------------------- * * Atomic_FetchAndAdd -- * * Atomic read (returned), add a value, write. * * If you have to implement FetchAndAdd() on an architecture other than * x86 or x86-64, you might want to consider doing something similar to * Atomic_FetchAndOr(). * * Unlike "Unfenced" version, this one may execute the "lfence" after * interlocked operation. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_FetchAndAdd(Atomic_uint32 *var, // IN uint32 val) // IN #ifdef __GNUC__ { val = Atomic_FetchAndAddUnfenced(var, val); AtomicEpilogue(); return val; } #else { return Atomic_FetchAndAddUnfenced(var, val); } #endif #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_ReadAdd64 -- * * Atomic read (returned), add a value, write. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint64 Atomic_ReadAdd64(Atomic_uint64 *var, // IN uint64 val) // IN { #if defined(__GNUC__) /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "lock; xaddq %0, %1" : "=r" (val), "+m" (var->value) : "0" (val) : "cc" ); AtomicEpilogue(); return val; #elif _MSC_VER return _InterlockedExchangeAdd64((__int64 *)&var->value, (__int64)val); #else #error No compiler defined for Atomic_ReadAdd64 #endif } #endif /* *----------------------------------------------------------------------------- * * Atomic_FetchAndInc -- * * Atomic read (returned), increment, write. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_FetchAndInc(Atomic_uint32 *var) // IN { return Atomic_FetchAndAdd(var, 1); } #define Atomic_ReadInc32 Atomic_FetchAndInc #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_ReadInc64 -- * * Atomic read (returned), increment, write. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint64 Atomic_ReadInc64(Atomic_uint64 *var) // IN { return Atomic_ReadAdd64(var, 1); } #endif /* *----------------------------------------------------------------------------- * * Atomic_FetchAndDec -- * * Atomic read (returned), decrement, write. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint32 Atomic_FetchAndDec(Atomic_uint32 *var) // IN { return Atomic_FetchAndAdd(var, (uint32)-1); } #define Atomic_ReadDec32 Atomic_FetchAndDec #if defined(__x86_64__) /* *----------------------------------------------------------------------------- * * Atomic_ReadDec64 -- * * Atomic read (returned), decrement, write. * * Results: * The value of the variable before the operation. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint64 Atomic_ReadDec64(Atomic_uint64 *var) // IN { return Atomic_ReadAdd64(var, CONST64U(-1)); } #endif /* * Usage of this helper struct is strictly reserved to the following * function. --hpreg */ typedef struct { uint32 lowValue; uint32 highValue; } S_uint64; /* *----------------------------------------------------------------------------- * * Atomic_CMPXCHG64 -- * * Compare exchange: Read variable, if equal to oldVal, write newVal * * XXX: Ensure that if this function is to be inlined by gcc, it is * compiled with -fno-strict-aliasing. Otherwise it will break. * Unfortunately we know that gcc 2.95.3 (used to build the FreeBSD 3.2 * Tools) does not honor -fno-strict-aliasing. As a workaround, we avoid * inlining the function entirely for versions of gcc under 3.0. * * Results: * TRUE if equal, FALSE if not equal * * Side effects: * None * *----------------------------------------------------------------------------- */ #if defined(__GNUC__) && __GNUC__ < 3 static Bool #else static INLINE Bool #endif Atomic_CMPXCHG64(Atomic_uint64 *var, // IN/OUT uint64 const *oldVal, // IN uint64 const *newVal) // IN #ifdef __GNUC__ { Bool equal; /* Checked against the Intel manual and GCC --walken */ #ifdef VMM64 uint64 dummy; __asm__ __volatile__( "lock; cmpxchgq %3, %0" "\n\t" "sete %1" : "+m" (*var), "=qm" (equal), "=a" (dummy) : "r" (*newVal), "2" (*oldVal) : "cc" ); #else /* 32-bit version */ int dummy1, dummy2; # if defined __PIC__ && !vm_x86_64 // %ebx is reserved by the compiler. # if defined __GNUC__ && __GNUC__ < 3 // Part of #188541 - for RHL 6.2 etc. __asm__ __volatile__( "xchg %%ebx, %6\n\t" "mov (%%ebx), %%ecx\n\t" "mov (%%ebx), %%ebx\n\t" "lock; cmpxchg8b (%3)\n\t" "xchg %%ebx, %6\n\t" "sete %0" : "=a" (equal), "=d" (dummy2), "=D" (dummy1) : "S" (var), "0" (((S_uint64 const *)oldVal)->lowValue), "1" (((S_uint64 const *)oldVal)->highValue), "D" (newVal) : "ecx", "cc", "memory" ); # else __asm__ __volatile__( "xchgl %%ebx, %6" "\n\t" // %3 is a register to make sure it cannot be %ebx-relative. "lock; cmpxchg8b (%3)" "\n\t" "xchgl %%ebx, %6" "\n\t" // Must come after restoring %ebx: %0 could be %ebx-relative. "sete %0" : "=qm" (equal), "=a" (dummy1), "=d" (dummy2) : "r" (var), "1" (((S_uint64 const *)oldVal)->lowValue), "2" (((S_uint64 const *)oldVal)->highValue), // Cannot use "m" here: 'newVal' is read-only. "r" (((S_uint64 const *)newVal)->lowValue), "c" (((S_uint64 const *)newVal)->highValue) : "cc", "memory" ); # endif # else __asm__ __volatile__( "lock; cmpxchg8b %0" "\n\t" "sete %1" # if VM_ASM_PLUS : "+m" (*var), # else : "=m" (*var), # endif "=qm" (equal), "=a" (dummy1), "=d" (dummy2) : "2" (((S_uint64 const *)oldVal)->lowValue), "3" (((S_uint64 const *)oldVal)->highValue), "b" (((S_uint64 const *)newVal)->lowValue), "c" (((S_uint64 const *)newVal)->highValue) : "cc" ); # endif #endif AtomicEpilogue(); return equal; } #elif _MSC_VER #if defined(__x86_64__) { return *oldVal == _InterlockedCompareExchange64((__int64 *)&var->value, (__int64)*newVal, (__int64)*oldVal); } #else #pragma warning(push) #pragma warning(disable : 4035) // disable no-return warning { __asm mov esi, var __asm mov edx, oldVal __asm mov ecx, newVal __asm mov eax, [edx]S_uint64.lowValue __asm mov edx, [edx]S_uint64.highValue __asm mov ebx, [ecx]S_uint64.lowValue __asm mov ecx, [ecx]S_uint64.highValue __asm lock cmpxchg8b [esi] __asm sete al __asm movzx eax, al // eax is the return value, this is documented to work - edward } #pragma warning(pop) #endif #else #error No compiler defined for Atomic_CMPXCHG64 #endif /* *----------------------------------------------------------------------------- * * Atomic_CMPXCHG32 -- * * Compare exchange: Read variable, if equal to oldVal, write newVal * * Results: * TRUE if equal, FALSE if not equal * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE Bool Atomic_CMPXCHG32(Atomic_uint32 *var, // IN/OUT uint32 oldVal, // IN uint32 newVal) // IN { #ifdef __GNUC__ Bool equal; uint32 dummy; __asm__ __volatile__( "lock; cmpxchgl %3, %0" "\n\t" "sete %1" # if VM_ASM_PLUS : "+m" (*var), "=qm" (equal), "=a" (dummy) : "r" (newVal), "2" (oldVal) # else : "=m" (*var), "=qm" (equal), "=a" (dummy) : /*"0" (*var), */ "r" (newVal), "2" (oldVal) # endif : "cc" ); AtomicEpilogue(); return equal; #else return (Atomic_ReadIfEqualWrite(var, oldVal, newVal) == oldVal); #endif } /* *----------------------------------------------------------------------------- * * Atomic_Read64 -- * * Read and return. * * Results: * The value of the atomic variable. * * Side effects: * None. * *----------------------------------------------------------------------------- */ static INLINE uint64 Atomic_Read64(Atomic_uint64 const *var) // IN #if defined(__x86_64__) { return var->value; } #elif defined(__GNUC__) && defined(__i386__) /* GCC on x86 */ { uint64 value; /* * Since cmpxchg8b will replace the contents of EDX:EAX with the * value in memory if there is no match, we need only execute the * instruction once in order to atomically read 64 bits from * memory. The only constraint is that ECX:EBX must have the same * value as EDX:EAX so that if the comparison succeeds. We * intentionally don't tell gcc that we are using ebx and ecx as we * don't modify them and do not care what value they store. */ __asm__ __volatile__( "mov %%ebx, %%eax" "\n\t" "mov %%ecx, %%edx" "\n\t" "lock; cmpxchg8b %1" : "=&A" (value) : "m" (*var) : "cc" ); AtomicEpilogue(); return value; } #elif _MSC_VER /* MSC (assume on x86 for now) */ # pragma warning(push) # pragma warning(disable : 4035) // disable no-return warning { __asm mov ecx, var __asm mov edx, ecx __asm mov eax, ebx __asm lock cmpxchg8b [ecx] // edx:eax is the return value; this is documented to work. --mann } # pragma warning(pop) #else # error No compiler defined for Atomic_Read64 #endif /* *---------------------------------------------------------------------- * * Atomic_FetchAndAdd64 -- * * Atomically adds a 64-bit integer to another * * Results: * Returns the old value just prior to the addition * * Side effects: * None * *---------------------------------------------------------------------- */ static INLINE uint64 Atomic_FetchAndAdd64(Atomic_uint64 *var, // IN/OUT uint64 addend) // IN { uint64 oldVal; uint64 newVal; do { oldVal = var->value; newVal = oldVal + addend; } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); return oldVal; } /* *---------------------------------------------------------------------- * * Atomic_FetchAndInc64 -- * * Atomically increments a 64-bit integer * * Results: * Returns the old value just prior to incrementing * * Side effects: * None * *---------------------------------------------------------------------- */ static INLINE uint64 Atomic_FetchAndInc64(Atomic_uint64 *var) // IN/OUT { return Atomic_FetchAndAdd64(var, 1); } /* *---------------------------------------------------------------------- * * Atomic_FetchAndDec64 -- * * Atomically decrements a 64-bit integer * * Results: * Returns the old value just prior to decrementing * * Side effects: * None * *---------------------------------------------------------------------- */ static INLINE uint64 Atomic_FetchAndDec64(Atomic_uint64 *var) // IN/OUT { uint64 oldVal; uint64 newVal; do { oldVal = var->value; newVal = oldVal - 1; } while (!Atomic_CMPXCHG64(var, &oldVal, &newVal)); return oldVal; } /* *----------------------------------------------------------------------------- * * Atomic_ReadWrite64 -- * * Read followed by write * * Results: * The value of the atomic variable before the write. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE uint64 Atomic_ReadWrite64(Atomic_uint64 *var, // IN uint64 val) // IN { #if defined(__x86_64__) #if defined(__GNUC__) /* Checked against the AMD manual and GCC --hpreg */ __asm__ __volatile__( "xchgq %0, %1" : "=r" (val), "+m" (var->value) : "0" (val) ); AtomicEpilogue(); return val; #elif _MSC_VER return _InterlockedExchange64((__int64 *)&var->value, (__int64)val); #else #error No compiler defined for Atomic_ReadWrite64 #endif #else uint64 oldVal; do { oldVal = var->value; } while (!Atomic_CMPXCHG64(var, &oldVal, &val)); return oldVal; #endif } /* *----------------------------------------------------------------------------- * * Atomic_Write64 -- * * Write * * Results: * None. * * Side effects: * None * *----------------------------------------------------------------------------- */ static INLINE void Atomic_Write64(Atomic_uint64 *var, // IN uint64 val) // IN { #if defined(__x86_64__) var->value = val; #else (void)Atomic_ReadWrite64(var, val); #endif } /* * Template code for the Atomic_ type and its operators. * * The cast argument is an intermedia type cast to make some * compilers stop complaining about casting uint32 <-> void *, * even though we only do it in the 32-bit case so they are always * the same size. So for val of type uint32, instead of * (void *)val, we have (void *)(uintptr_t)val. * The specific problem case is the Windows ddk compiler * (as used by the SVGA driver). -- edward */ #define MAKE_ATOMIC_TYPE(name, size, in, out, cast) \ typedef Atomic_uint ## size Atomic_ ## name; \ \ \ static INLINE out \ Atomic_Read ## name(Atomic_ ## name const *var) \ { \ return (out)(cast)Atomic_Read ## size(var); \ } \ \ \ static INLINE void \ Atomic_Write ## name(Atomic_ ## name *var, \ in val) \ { \ Atomic_Write ## size(var, (uint ## size)(cast)val); \ } \ \ \ static INLINE out \ Atomic_ReadWrite ## name(Atomic_ ## name *var, \ in val) \ { \ return (out)(cast)Atomic_ReadWrite ## size(var, \ (uint ## size)(cast)val); \ } \ \ \ static INLINE out \ Atomic_ReadIfEqualWrite ## name(Atomic_ ## name *var, \ in oldVal, \ in newVal) \ { \ return (out)(cast)Atomic_ReadIfEqualWrite ## size(var, \ (uint ## size)(cast)oldVal, (uint ## size)(cast)newVal); \ } \ \ \ static INLINE void \ Atomic_And ## name(Atomic_ ## name *var, \ in val) \ { \ Atomic_And ## size(var, (uint ## size)(cast)val); \ } \ \ \ static INLINE void \ Atomic_Or ## name(Atomic_ ## name *var, \ in val) \ { \ Atomic_Or ## size(var, (uint ## size)(cast)val); \ } \ \ \ static INLINE void \ Atomic_Xor ## name(Atomic_ ## name *var, \ in val) \ { \ Atomic_Xor ## size(var, (uint ## size)(cast)val); \ } \ \ \ static INLINE void \ Atomic_Add ## name(Atomic_ ## name *var, \ in val) \ { \ Atomic_Add ## size(var, (uint ## size)(cast)val); \ } \ \ \ static INLINE void \ Atomic_Sub ## name(Atomic_ ## name *var, \ in val) \ { \ Atomic_Sub ## size(var, (uint ## size)(cast)val); \ } \ \ \ static INLINE void \ Atomic_Inc ## name(Atomic_ ## name *var) \ { \ Atomic_Inc ## size(var); \ } \ \ \ static INLINE void \ Atomic_Dec ## name(Atomic_ ## name *var) \ { \ Atomic_Dec ## size(var); \ } \ \ \ static INLINE out \ Atomic_ReadOr ## name(Atomic_ ## name *var, \ in val) \ { \ return (out)(cast)Atomic_ReadOr ## size(var, (uint ## size)(cast)val); \ } \ \ \ static INLINE out \ Atomic_ReadAdd ## name(Atomic_ ## name *var, \ in val) \ { \ return (out)(cast)Atomic_ReadAdd ## size(var, (uint ## size)(cast)val); \ } \ \ \ static INLINE out \ Atomic_ReadInc ## name(Atomic_ ## name *var) \ { \ return (out)(cast)Atomic_ReadInc ## size(var); \ } \ \ \ static INLINE out \ Atomic_ReadDec ## name(Atomic_ ## name *var) \ { \ return (out)(cast)Atomic_ReadDec ## size(var); \ } /* * Since we use a macro to generate these definitions, it is hard to look for * them. So DO NOT REMOVE THIS COMMENT and keep it up-to-date. --hpreg * * Atomic_Ptr * Atomic_ReadPtr -- * Atomic_WritePtr -- * Atomic_ReadWritePtr -- * Atomic_ReadIfEqualWritePtr -- * Atomic_AndPtr -- * Atomic_OrPtr -- * Atomic_XorPtr -- * Atomic_AddPtr -- * Atomic_SubPtr -- * Atomic_IncPtr -- * Atomic_DecPtr -- * Atomic_ReadOrPtr -- * Atomic_ReadAddPtr -- * Atomic_ReadIncPtr -- * Atomic_ReadDecPtr -- * * Atomic_Int * Atomic_ReadInt -- * Atomic_WriteInt -- * Atomic_ReadWriteInt -- * Atomic_ReadIfEqualWriteInt -- * Atomic_AndInt -- * Atomic_OrInt -- * Atomic_XorInt -- * Atomic_AddInt -- * Atomic_SubInt -- * Atomic_IncInt -- * Atomic_DecInt -- * Atomic_ReadOrInt -- * Atomic_ReadAddInt -- * Atomic_ReadIncInt -- * Atomic_ReadDecInt -- */ #if defined(__x86_64__) MAKE_ATOMIC_TYPE(Ptr, 64, void const *, void *, uintptr_t) MAKE_ATOMIC_TYPE(Int, 64, int, int, int) #else MAKE_ATOMIC_TYPE(Ptr, 32, void const *, void *, uintptr_t) MAKE_ATOMIC_TYPE(Int, 32, int, int, int) #endif /* *----------------------------------------------------------------------------- * * Atomic_MFence -- * * Implements mfence in terms of a lock xor. The reason for implementing * our own mfence is that not all of our supported cpus have an assembly * mfence (P3, Athlon). We put it here to avoid duplicating code which is * also why it is prefixed with "Atomic_". * * Results: * None. * * Side effects: * Cause loads and stores prior to this to be globally * visible. * *----------------------------------------------------------------------------- */ static INLINE void Atomic_MFence(void) { Atomic_uint32 fence; Atomic_Xor(&fence, 0x1); } #endif // ifndef _ATOMIC_H_ vsock-only/include/vmciGuestKernelAPI.h0000444000000000000000000000517212025726715017127 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * vmciGuestKernelAPI.h -- * * Kernel API exported from the VMCI guest driver. */ #ifndef __VMCI_GUESTKERNELAPI_H__ #define __VMCI_GUESTKERNELAPI_H__ /* VMCI guest kernel API version number. */ #define VMCI_GUEST_KERNEL_API_VERSION 1 /* Macros to operate on the driver version number. */ #define VMCI_MAJOR_VERSION(v) (((v) >> 16) & 0xffff) #define VMCI_MINOT_VERSION(v) ((v) & 0xffff) #define INCLUDE_ALLOW_MODULE #include "includeCheck.h" #include "vmci_defs.h" #include "vmci_call_defs.h" #if defined(__linux__) || defined(_WIN32) /* XXX TODO for other guests. */ # include "vmci_queue_pair.h" #endif /* VMCI Device Usage API. */ Bool VMCI_DeviceGet(void); void VMCI_DeviceRelease(void); /* VMCI Datagram API. */ int VMCIDatagram_CreateHnd(VMCIId resourceID, uint32 flags, VMCIDatagramRecvCB recvCB, void *clientData, VMCIHandle *outHandle); int VMCIDatagram_DestroyHnd(VMCIHandle handle); int VMCIDatagram_Send(VMCIDatagram *msg); /* VMCI Utility API. */ VMCIId VMCI_GetContextID(void); uint32 VMCI_Version(void); /* VMCI Event API. */ typedef void (*VMCI_EventCB)(VMCIId subID, VMCI_EventData *ed, void *clientData); int VMCIEvent_Subscribe(VMCI_Event event, VMCI_EventCB callback, void *callbackData, VMCIId *subID); int VMCIEvent_Unsubscribe(VMCIId subID); /* VMCI Discovery Service API. */ int VMCIDs_Lookup(const char *name, VMCIHandle *out); #if defined(__linux__) || defined(_WIN32) /* VMCI QueuePair API. XXX TODO for other guests. */ int VMCIQueuePair_Alloc(VMCIHandle *handle, VMCIQueue **produceQ, uint64 produceSize, VMCIQueue **consumeQ, uint64 consumeSize, VMCIId peer, uint32 flags); int VMCIQueuePair_Detach(VMCIHandle handle); #endif #endif /* !__VMCI_GUESTKERNELAPI_H__ */ vsock-only/include/circList.h0000444000000000000000000002400112025726715015232 0ustar rootroot/********************************************************* * Copyright (C) 1998 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * circList.h -- * * macros, prototypes and struct definitions for double-linked * circular lists. */ #ifndef _CIRCLIST_H_ #define _CIRCLIST_H_ #define INCLUDE_ALLOW_USERLEVEL #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMKERNEL #include "includeCheck.h" #include "vmware.h" typedef struct ListItem { struct ListItem *prev; struct ListItem *next; } ListItem; /* A list with no elements is a null pointer. */ #define LIST_ITEM_DEF(name) \ ListItem * name = NULL #define LIST_EMPTY(l) ((l) == NULL) /* initialize list item */ #define INIT_LIST_ITEM(p) \ do { \ (p)->prev = (p)->next = (p); \ } while (0) /* check if initialized */ #define IS_LIST_ITEM_INITIALIZED(li) \ (((li) == (li)->prev) && ((li) == (li)->next)) /* return first element in the list */ #define LIST_FIRST(l) (l) #define LIST_FIRST_CHK(l) (l) /* return last element in the list */ #define LIST_LAST(l) ((l)->prev) #define LIST_LAST_CHK(l) (LIST_EMPTY(l) ? NULL : LIST_LAST(l)) /* * LIST_CONTAINER - get the struct for this entry (like list_entry) * @ptr: the &struct ListItem pointer. * @type: the type of the struct this is embedded in. * @member: the name of the list struct within the struct. */ #define LIST_CONTAINER(ptr, type, member) \ ((type *)((char *)(ptr) - offsetof(type, member))) /* * delete item from the list */ #define LIST_DEL DelListItem /* * link two lists together */ #define LIST_SPLICE SpliceLists /* * Split a list into two lists */ #define LIST_SPLIT SplitLists /* * Add item to front of stack. List pointer points to new head. */ #define LIST_PUSH PushListItem /* * Add item at back of queue. List pointer only changes if list was empty. */ #define LIST_QUEUE QueueListItem /* * Get the list size. */ #define LIST_SIZE GetListSize /* * LIST_SCAN_FROM scans the list from "from" up until "until". * The loop variable p should not be destroyed in the process. * "from" is an element in the list where to start scanning. * "until" is the element where search should stop. * member is the field to use for the search - either "next" or "prev". */ #define LIST_SCAN_FROM(p, from, until, member) \ for (p = (from); (p) != NULL; \ (p) = (((p)->member == (until)) ? NULL : (p)->member)) /* scan the entire list (non-destructively) */ #define LIST_SCAN(p, l) \ LIST_SCAN_FROM(p, LIST_FIRST(l), LIST_FIRST(l), next) /* scan a list backward from last element to first (non-destructively) */ #define LIST_SCAN_BACK(p, l) \ LIST_SCAN_FROM(p, LIST_LAST_CHK(l), LIST_LAST(l), prev) /* scan the entire list where loop element may be destroyed */ #define LIST_SCAN_SAFE(p, pn, l) \ if (!LIST_EMPTY(l)) \ for (p = (l), (pn) = NextListItem(p, l); (p) != NULL; \ (p) = (pn), (pn) = NextListItem(p, l)) /* scan the entire list backwards where loop element may be destroyed */ #define LIST_SCAN_BACK_SAFE(p, pn, l) \ if (!LIST_EMPTY(l)) \ for (p = LIST_LAST(l), (pn) = PrevListItem(p, l); (p) != NULL; \ (p) = (pn), (pn) = PrevListItem(p, l)) /* function definitions */ /* *---------------------------------------------------------------------- * * NextListItem -- * * Returns the next member of a doubly linked list, or NULL if last. * Assumes: p is member of the list headed by head. * * Result * If head or p is NULL, return NULL. Otherwise, * next list member (or null if last). * * Side effects: * None. * *---------------------------------------------------------------------- */ static INLINE ListItem * NextListItem(ListItem *p, // IN ListItem *head) // IN { if (head == NULL || p == NULL) { return NULL; } /* both p and head are non-null */ p = p->next; return p == head ? NULL : p; } /* *---------------------------------------------------------------------- * * PrevListItem -- * * Returns the prev member of a doubly linked list, or NULL if first. * Assumes: p is member of the list headed by head. * * Result * If head or prev is NULL, return NULL. Otherwise, * prev list member (or null if first). * * Side effects: * None. * *---------------------------------------------------------------------- */ static INLINE ListItem * PrevListItem(ListItem *p, // IN ListItem *head) // IN { if (head == NULL || p == NULL) { return NULL; } /* both p and head are non-null */ return p == head ? NULL : p->prev; } /* *---------------------------------------------------------------------- * * DelListItem -- * * Deletes a member of a doubly linked list, possibly modifies the * list header itself. * Assumes neither p nor headp is null and p is a member of *headp. * * Result * None * * Side effects: * Modifies *headp. * *---------------------------------------------------------------------- */ static INLINE void DelListItem(ListItem *p, // IN ListItem **headp) // IN/OUT { ListItem *next; ASSERT(p); ASSERT(headp); next = p->next; if (p == next) { *headp = NULL; } else { next->prev = p->prev; p->prev->next = next; if (*headp == p) { *headp = next; } } } /* *---------------------------------------------------------------------- * * QueueListItem -- * * Adds a new member to the back of a doubly linked list (queue) * Assumes neither p nor headp is null and p is not a member of *headp. * * Result * None * * Side effects: * Modifies *headp. * *---------------------------------------------------------------------- */ static INLINE void QueueListItem(ListItem *p, // IN ListItem **headp) // IN/OUT { ListItem *head; head = *headp; if (LIST_EMPTY(head)) { INIT_LIST_ITEM(p); *headp = p; } else { p->prev = head->prev; p->next = head; p->prev->next = p; head->prev = p; } } /* *---------------------------------------------------------------------- * * PushListItem -- * * Adds a new member to the front of a doubly linked list (stack) * Assumes neither p nor headp is null and p is not a member of *headp. * * Result * None * * Side effects: * Modifies *headp. * *---------------------------------------------------------------------- */ static INLINE void PushListItem(ListItem *p, // IN ListItem **headp) // IN/OUT { QueueListItem(p, headp); *headp = p; } /* *---------------------------------------------------------------------- * * SpliceLists -- * * Make a single list {l1 l2} from {l1} and {l2} and return it. * It is okay for one or both lists to be NULL. * No checking is done. It is assumed that l1 and l2 are two * distinct lists. * * Result * A list { l1 l2 }. * * Side effects: * Modifies l1 and l2 list pointers. * *---------------------------------------------------------------------- */ static INLINE ListItem * SpliceLists(ListItem *l1, // IN ListItem *l2) // IN { ListItem *l1Last, *l2Last; if (LIST_EMPTY(l1)) { return l2; } if (LIST_EMPTY(l2)) { return l1; } l1Last = l1->prev; /* last elem of l1 */ l2Last = l2->prev; /* last elem of l2 */ /* * l1 -> ... -> l1Last l2 -> ... l2Last */ l1Last->next = l2; l2->prev = l1Last; l1->prev = l2Last; l2Last->next = l1; return l1; } /* *---------------------------------------------------------------------- * * SplitLists -- * * Make a list l = {l1 l2} into two separate lists {l1} and {l2}, where: * l = { ... x -> p -> ... } split into: * l1 = { ... -> x } * l2 = { p -> ... } * Assumes neither p nor l is null and p is a member of l. * If p is the first element of l, then l1 will be NULL. * * Result * None. * * Side effects: * Sets *l1p and *l2p to the resulting two lists. * Modifies l's pointers. * *---------------------------------------------------------------------- */ static INLINE void SplitLists(ListItem *p, // IN ListItem *l, // IN ListItem **l1p, // OUT ListItem **l2p) // OUT { ListItem *last; if (p == LIST_FIRST(l)) { /* first element */ *l1p = NULL; *l2p = l; return; } last = l->prev; *l1p = l; p->prev->next = l; l->prev = p->prev; *l2p = p; p->prev = last; last->next = p; } /* *---------------------------------------------------------------------- * * GetListSize -- * * Return the number of items in the list. * * Result: * The number of items in the list. * * Side effects: * None. * *---------------------------------------------------------------------- */ static INLINE int GetListSize(ListItem *head) // IN { ListItem *li; int ret = 0; LIST_SCAN(li, head) { ret++; } return ret; } #endif /* _CIRCLIST_H_ */ vsock-only/include/driver-config.h0000444000000000000000000000425012025726715016220 0ustar rootroot/********************************************************* * Copyright (C) 1998 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * Sets the proper defines from the Linux header files * * This file must be included before the inclusion of any kernel header file, * with the exception of linux/autoconf.h and linux/version.h --hpreg */ #ifndef __VMX_CONFIG_H__ #define __VMX_CONFIG_H__ #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMNIXMOD #include "includeCheck.h" #include #include "compat_version.h" /* * We rely on Kernel Module support. Check here. */ #ifndef CONFIG_MODULES # error "No Module support in this kernel. Please configure with CONFIG_MODULES" #endif /* * 2.2 kernels still use __SMP__ (derived from CONFIG_SMP * in the main Makefile), so we do it here. */ #ifdef CONFIG_SMP # define __SMP__ 1 #endif #if defined(CONFIG_MODVERSIONS) && defined(KERNEL_2_1) # if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,60) /* * MODVERSIONS might be already defined when using kernel's Makefiles. */ # ifndef MODVERSIONS # define MODVERSIONS # endif # include # endif #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) /* * Force the uintptr_t definition to come from linux/types.h instead of vm_basic_types.h. */ # include # define _STDINT_H 1 #endif #ifndef __KERNEL__ # define __KERNEL__ #endif #endif vsock-only/include/compat_completion.h0000444000000000000000000001371012025726715017177 0ustar rootroot/********************************************************* * Copyright (C) 2004 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_COMPLETION_H__ # define __COMPAT_COMPLETION_H__ /* * The kernel's completion objects were made available for module use in 2.4.9. * * Between 2.4.0 and 2.4.9, we implement completions on our own using * waitqueues and counters. This was done so that we could safely support * functions like complete_all(), which cannot be implemented using semaphores. * * Prior to that, the waitqueue API is substantially different, and since none * of our modules that are built against older kernels need complete_all(), * we fallback on a simple semaphore-based implementation. */ /* * Native completions. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 9) #include #define compat_completion struct completion #define compat_init_completion(comp) init_completion(comp) #define COMPAT_DECLARE_COMPLETION DECLARE_COMPLETION #define compat_wait_for_completion(comp) wait_for_completion(comp) #define compat_complete(comp) complete(comp) /* complete_all() was exported in 2.6.6. */ # if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 6) # include "compat_wait.h" # include "compat_list.h" # include "compat_spinlock.h" # include "compat_sched.h" # define compat_complete_all(x) \ ({ \ struct list_head *currLinks; \ spin_lock(&(x)->wait.lock); \ (x)->done += UINT_MAX/2; \ \ list_for_each(currLinks, &(x)->wait.task_list) { \ wait_queue_t *currQueue = list_entry(currLinks, wait_queue_t, task_list); \ wake_up_process(currQueue->task); \ } \ spin_unlock(&(x)->wait.lock); \ }) # else # define compat_complete_all complete_all # endif /* * Completions via waitqueues. */ #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 0) /* * Kernel completions in 2.4.9 and beyond use a counter and a waitqueue, and * our implementation is quite similar. Because __wake_up_common() is not * exported, our implementations of compat_complete() and compat_complete_all() * are somewhat racy: the counter is incremented outside of the waitqueue's * lock. * * As a result, our completion cannot guarantee in-order wake ups. For example, * suppose thread A is entering compat_complete(), thread B is sleeping inside * compat_wait_for_completion(), and thread C is just now entering * compat_wait_for_completion(). If Thread A is scheduled first and increments * the counter, then gets swapped out, thread C may get scheduled and will * quickly go through compat_wait_for_completion() (since done != 0) while * thread B continues to sleep, even though thread B should have been the one * to wake up. */ #include #include "compat_sched.h" #include "compat_list.h" #include // for lock_kernel()/unlock_kernel() #include "compat_wait.h" typedef struct compat_completion { unsigned int done; wait_queue_head_t wq; } compat_completion; #define compat_init_completion(comp) do { \ (comp)->done = 0; \ init_waitqueue_head(&(comp)->wq); \ } while (0) #define COMPAT_DECLARE_COMPLETION(comp) \ compat_completion comp = { \ .done = 0, \ .wq = __WAIT_QUEUE_HEAD_INITIALIZER((comp).wq), \ } /* * Locking and unlocking the kernel lock here ensures that the thread * is no longer running in module code: compat_complete_and_exit * performs the sequence { lock_kernel(); up(comp); compat_exit(); }, with * the final unlock_kernel performed implicitly by the resident kernel * in do_exit. */ #define compat_wait_for_completion(comp) do { \ spin_lock_irq(&(comp)->wq.lock); \ if (!(comp)->done) { \ DECLARE_WAITQUEUE(wait, current); \ wait.flags |= WQ_FLAG_EXCLUSIVE; \ __add_wait_queue_tail(&(comp)->wq, &wait); \ do { \ __set_current_state(TASK_UNINTERRUPTIBLE); \ spin_unlock_irq(&(comp)->wq.lock); \ schedule(); \ spin_lock_irq(&(comp)->wq.lock); \ } while (!(comp)->done); \ __remove_wait_queue(&(comp)->wq, &wait); \ } \ (comp)->done--; \ spin_unlock_irq(&(comp)->wq.lock); \ lock_kernel(); \ unlock_kernel(); \ } while (0) /* XXX: I don't think I need to touch the BKL. */ #define compat_complete(comp) do { \ unsigned long flags; \ spin_lock_irqsave(&(comp)->wq.lock, flags); \ (comp)->done++; \ spin_unlock_irqrestore(&(comp)->wq.lock, flags); \ wake_up(&(comp)->wq); \ } while (0) #define compat_complete_all(comp) do { \ unsigned long flags; \ spin_lock_irqsave(&(comp)->wq.lock, flags); \ (comp)->done += UINT_MAX / 2; \ spin_unlock_irqrestore(&(comp)->wq.lock, flags); \ wake_up_all(&(comp)->wq); \ } while (0) /* * Completions via semaphores. */ #else #include "compat_semaphore.h" #define compat_completion struct semaphore #define compat_init_completion(comp) init_MUTEX_LOCKED(comp) #define COMPAT_DECLARE_COMPLETION(comp) DECLARE_MUTEX_LOCKED(comp) #define compat_wait_for_completion(comp) do { \ down(comp); \ lock_kernel(); \ unlock_kernel(); \ } while (0) #define compat_complete(comp) up(comp) #endif #endif /* __COMPAT_COMPLETION_H__ */ vsock-only/include/compat_file.h0000444000000000000000000000352312025726715015746 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_FILE_H__ # define __COMPAT_FILE_H__ /* The fput() API is modified in 2.2.0 --hpreg */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 2, 0) # define compat_fput(_file) fput(_file) #else # define compat_fput(_file) fput(_file, (_file)->f_inode) #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 0) # define compat_get_file(_file) get_file(_file) # define compat_file_count(_file) file_count(_file) #else # define compat_get_file(_file) (_file)->f_count++ # define compat_file_count(_file) (_file)->f_count #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 2, 4) # define compat_filp_close(_file, _files) filp_close(_file, _files) #else static inline void compat_filp_close(struct file* filp, fl_owner_t files) { if (filp->f_op && filp->f_op->flush) { filp->f_op->flush(filp); } /* * Hopefully there are no locks to release on this filp. * locks_remove_posix is not exported so we cannot use it... */ fput(filp); } #endif #endif /* __COMPAT_FILE_H__ */ vsock-only/include/compat_fs.h0000444000000000000000000002317212025726715015441 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_FS_H__ # define __COMPAT_FS_H__ #include /* * 2.6.5+ kernels define FS_BINARY_MOUNTDATA. Since it didn't exist and * wasn't used prior, it's safe to define it to zero. */ #ifndef FS_BINARY_MOUNTDATA #define FS_BINARY_MOUNTDATA 0 #endif /* * MAX_LFS_FILESIZE wasn't defined until 2.5.4. */ #ifndef MAX_LFS_FILESIZE # include # if BITS_PER_LONG == 32 # define MAX_LFS_FILESIZE (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG - 1)) - 1) # elif BITS_PER_LONG == 64 # define MAX_LFS_FILESIZE 0x7fffffffffffffffUL # endif #endif /* * sendfile as a VFS op was born in 2.5.30. Unfortunately, it also changed * signatures, first in 2.5.47, then again in 2.5.70, then again in 2.6.8. * Luckily, the 2.6.8+ signature is the same as the 2.5.47 signature. And * as of 2.6.23-rc1 sendfile is gone, replaced by splice_read... * * Let's not support sendfile from 2.5.30 to 2.5.47, because the 2.5.30 * signature is much different and file_send_actor isn't externed. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) #define VMW_SENDFILE_NONE #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 8) #define VMW_SENDFILE_NEW #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 70) #define VMW_SENDFILE_OLD #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 47) #define VMW_SENDFILE_NEW #else #define VMW_SENDFILE_NONE #endif /* * splice_read is there since 2.6.17, but let's avoid 2.6.17-rcX kernels... * After all nobody is using splice system call until 2.6.23 using it to * implement sendfile. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18) #define VMW_SPLICE_READ 1 #endif /* * Filesystems wishing to use generic page cache read/write routines are * supposed to implement aio_read and aio_write (calling into * generic_file_aio_read() and generic_file_aio_write() if necessary). * * The VFS exports do_sync_read() and do_sync_write() as the "new" * generic_file_read() and generic_file_write(), but filesystems need not * actually implement read and write- the VFS will automatically call * do_sync_write() and do_sync_read() when applications invoke the standard * read() and write() system calls. * * In 2.6.19, generic_file_read() and generic_file_write() were removed, * necessitating this change. AIO dates as far back as 2.5.42, but the API has * changed over time, so for simplicity, we'll only enable it from 2.6.19 and * on. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 19) # define VMW_USE_AIO #endif /* * The alloc_inode and destroy_inode VFS ops didn't exist prior to 2.4.21. * Without these functions, file systems can't embed inodes. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 21) # define VMW_EMBED_INODE #endif /* * iget() was removed from the VFS as of 2.6.25-rc1. The replacement for iget() * is iget_locked() which was added in 2.5.17. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 17) # define VMW_USE_IGET_LOCKED #endif /* * parent_ino was born in 2.5.5. For older kernels, let's use 2.5.5 * implementation. It uses the dcache lock which is OK because per-dentry * locking appeared after 2.5.5. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) #define compat_parent_ino(dentry) parent_ino(dentry) #else #define compat_parent_ino(dentry) \ ({ \ ino_t res; \ spin_lock(&dcache_lock); \ res = dentry->d_parent->d_inode->i_ino; \ spin_unlock(&dcache_lock); \ res; \ }) #endif /* * putname changed to __putname in 2.6.6. */ #define compat___getname() __getname() #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 6) #define compat___putname(name) putname(name) #else #define compat___putname(name) __putname(name) #endif /* * inc_nlink, drop_nlink, and clear_nlink were added in 2.6.19. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19) #define compat_inc_nlink(inode) ((inode)->i_nlink++) #define compat_drop_nlink(inode) ((inode)->i_nlink--) #define compat_clear_nlink(inode) ((inode)->i_nlink = 0) #else #define compat_inc_nlink(inode) inc_nlink(inode) #define compat_drop_nlink(inode) drop_nlink(inode) #define compat_clear_nlink(inode) clear_nlink(inode) #endif /* * i_size_write and i_size_read were introduced in 2.6.0-test1 * (though we'll look for them as of 2.6.1). They employ slightly different * locking in order to guarantee atomicity, depending on the length of a long, * whether the kernel is SMP, or whether the kernel is preemptible. Prior to * i_size_write and i_size_read, there was no such locking, so that's the * behavior we'll emulate. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 1) #define compat_i_size_read(inode) ((inode)->i_size) #define compat_i_size_write(inode, size) ((inode)->i_size = size) #else #define compat_i_size_read(inode) i_size_read(inode) #define compat_i_size_write(inode, size) i_size_write(inode, size) #endif /* * filemap_fdatawrite was introduced in 2.5.12. Prior to that, modules used * filemap_fdatasync instead. In 2.4.18, both filemap_fdatawrite and * filemap_fdatawait began returning status codes. Prior to that, they were * void functions, so we'll just have them return 0. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 18) #define compat_filemap_fdatawrite(mapping) \ ({ \ int result = 0; \ filemap_fdatasync(mapping); \ result; \ }) #define compat_filemap_fdatawait(mapping) \ ({ \ int result = 0; \ filemap_fdatawait(mapping); \ result; \ }) #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 12) #define compat_filemap_fdatawrite(mapping) filemap_fdatasync(mapping) #define compat_filemap_fdatawait(mapping) filemap_fdatawait(mapping) #else #define compat_filemap_fdatawrite(mapping) filemap_fdatawrite(mapping) #define compat_filemap_fdatawait(mapping) filemap_fdatawait(mapping) #endif /* * filemap_write_and_wait was introduced in 2.6.6 and exported for module use * in 2.6.16. It's really just a simple wrapper around filemap_fdatawrite and * and filemap_fdatawait, which initiates a flush of all dirty pages, then * waits for the pages to flush. The implementation here is a simplified form * of the one found in 2.6.20-rc3. * * Unfortunately, it just isn't possible to implement this prior to 2.4.5, when * neither filemap_fdatawait nor filemap_fdatasync were exported for module * use. So we'll define it out and hope for the best. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 5) #define compat_filemap_write_and_wait(mapping) #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 16) #define compat_filemap_write_and_wait(mapping) \ ({ \ int result = 0; \ if (mapping->nrpages) { \ result = compat_filemap_fdatawrite(mapping); \ if (result != -EIO) { \ int result2 = compat_filemap_fdatawait(mapping); \ if (!result) { \ result = result2; \ } \ } \ } \ result; \ }) #else #define compat_filemap_write_and_wait(mapping) filemap_write_and_wait(mapping) #endif /* * invalidate_remote_inode was introduced in 2.6.0-test5. Prior to that, * filesystems wishing to invalidate pages belonging to an inode called * invalidate_inode_pages. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) #define compat_invalidate_remote_inode(inode) invalidate_inode_pages(inode) #else #define compat_invalidate_remote_inode(inode) invalidate_remote_inode(inode) #endif #endif /* __COMPAT_FS_H__ */ vsock-only/include/compat_init.h0000444000000000000000000000235512025726715015774 0ustar rootroot/********************************************************* * Copyright (C) 1999 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * compat_init.h: Initialization compatibility wrappers. */ #ifndef __COMPAT_INIT_H__ #define __COMPAT_INIT_H__ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 2, 0) #include #endif #ifndef module_init #define module_init(x) int init_module(void) { return x(); } #endif #ifndef module_exit #define module_exit(x) void cleanup_module(void) { x(); } #endif #endif /* __COMPAT_INIT_H__ */ vsock-only/include/compat_list.h0000444000000000000000000000357512025726715016011 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_LIST_H__ # define __COMPAT_LIST_H__ #include /* * list_add_tail is with us since 2.4.0, or something like that. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 0) #define list_add_tail(newe, head) do { \ struct list_head *__h = (head); \ __list_add((newe), __h->prev, __h); \ } while (0) #endif /* * list_for_each_safe() showed up in 2.4.10, but it may be backported so we * just check for its existence. */ #ifndef list_for_each_safe # define list_for_each_safe(pos, n, head) \ for (pos = (head)->next, n = pos->next; pos != (head); \ pos = n, n = pos->next) #endif /* * list_for_each_entry() showed up in 2.4.20, but it may be backported so we * just check for its existence. */ #ifndef list_for_each_entry # define list_for_each_entry(pos, head, member) \ for (pos = list_entry((head)->next, typeof(*pos), member); \ &pos->member != (head); \ pos = list_entry(pos->member.next, typeof(*pos), member)) #endif #endif /* __COMPAT_LIST_H__ */ vsock-only/include/compat_kernel.h0000444000000000000000000000514312025726715016307 0ustar rootroot/********************************************************* * Copyright (C) 2004 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_KERNEL_H__ # define __COMPAT_KERNEL_H__ #include #include /* * container_of was introduced in 2.5.28 but it's easier to check like this. */ #ifndef container_of #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) #endif /* * wait_for_completion and friends did not exist before 2.4.9. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 9) #define compat_complete_and_exit(comp, status) complete_and_exit(comp, status) #else #include "compat_completion.h" /* * Used by _syscallX macros. Note that this is global variable, so * do not rely on its contents too much. As exit() is only function * we use, and we never check return value from exit(), we have * no problem... */ extern int errno; /* * compat_exit() provides an access to the exit() function. It must * be named compat_exit(), as exit() (with different signature) is * provided by x86-64, arm and other (but not by i386). */ #define __NR_compat_exit __NR_exit static inline _syscall1(int, compat_exit, int, exit_code); /* * See compat_wait_for_completion in compat_completion.h. * compat_exit implicitly performs an unlock_kernel, in resident code, * ensuring that the thread is no longer running in module code when the * module is unloaded. */ #define compat_complete_and_exit(comp, status) do { \ lock_kernel(); \ compat_complete(comp); \ compat_exit(status); \ } while (0) #endif /* * vsnprintf became available in 2.4.10. For older kernels, just fall back on * vsprintf. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 10) #define vsnprintf(str, size, fmt, args) vsprintf(str, fmt, args) #endif #endif /* __COMPAT_KERNEL_H__ */ vsock-only/include/compat_mm.h0000444000000000000000000001020712025726715015435 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_MM_H__ # define __COMPAT_MM_H__ #include /* The get_page() API appeared in 2.3.7 --hpreg */ /* Sometime during development it became function instead of macro --petr */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 0) && !defined(get_page) # define get_page(_page) atomic_inc(&(_page)->count) /* The __free_page() API is exported in 2.1.67 --hpreg */ # if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 1, 67) # define put_page __free_page # else # include "compat_page.h" # define page_to_phys(_page) (page_to_pfn(_page) << PAGE_SHIFT) # define put_page(_page) free_page(page_to_phys(_page)) # endif #endif /* page_count() is 2.4.0 invention. Unfortunately unavailable in some RedHat * kernels (for example 2.4.21-4-RHEL3). */ /* It is function since 2.6.0, and hopefully RedHat will not play silly games * with mm_inline.h again... */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(page_count) # define page_count(page) atomic_read(&(page)->count) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 0) # define compat_vm_pgoff(vma) ((vma)->vm_offset >> PAGE_SHIFT) static inline unsigned long compat_do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { unsigned long ret = -EINVAL; if (pgoff < 1 << (32 - PAGE_SHIFT)) { ret = do_mmap(file, addr, len, prot, flag, pgoff << PAGE_SHIFT); } return ret; } #else # define compat_vm_pgoff(vma) (vma)->vm_pgoff # ifdef VMW_SKAS_MMAP # define compat_do_mmap_pgoff(f, a, l, p, g, o) \ do_mmap_pgoff(current->mm, f, a, l, p, g, o) # else # define compat_do_mmap_pgoff(f, a, l, p, g, o) \ do_mmap_pgoff(f, a, l, p, g, o) # endif #endif /* 2.2.x uses 0 instead of some define */ #ifndef NOPAGE_SIGBUS #define NOPAGE_SIGBUS (0) #endif /* 2.2.x does not have HIGHMEM support */ #ifndef GFP_HIGHUSER #define GFP_HIGHUSER (GFP_USER) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 0) #include "compat_page.h" static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) { unsigned long addr; addr = __get_free_pages(gfp_mask, order); if (!addr) { return NULL; } return virt_to_page(addr); } #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #endif /* * In 2.4.14, the logic behind the UnlockPage macro was moved to the * unlock_page() function. Later (in 2.5.12), the UnlockPage macro was removed * altogether, and nowadays everyone uses unlock_page(). */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 14) #define compat_unlock_page(page) UnlockPage(page) #else #define compat_unlock_page(page) unlock_page(page) #endif /* * In 2.4.10, vmtruncate was changed from returning void to returning int. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 10) #define compat_vmtruncate(inode, size) \ ({ \ int result = 0; \ vmtruncate(inode, size); \ result; \ }) #else #define compat_vmtruncate(inode, size) vmtruncate(inode, size) #endif #endif /* __COMPAT_MM_H__ */ vsock-only/include/compat_module.h0000444000000000000000000000437212025726715016317 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * compat_module.h -- */ #ifndef __COMPAT_MODULE_H__ # define __COMPAT_MODULE_H__ #include /* * Modules wishing to use the GPL license are required to include a * MODULE_LICENSE definition in their module source as of 2.4.10. */ #ifndef MODULE_LICENSE #define MODULE_LICENSE(license) #endif /* * To make use of our own home-brewed MODULE_INFO, we need macros to * concatenate two expressions to "__mod_", and and to convert an * expression into a string. I'm sure we've got these in our codebase, * but I'd rather not introduce such a dependency in a compat header. */ #ifndef __module_cat #define __module_cat_1(a, b) __mod_ ## a ## b #define __module_cat(a, b) __module_cat_1(a, b) #endif #ifndef __stringify #define __stringify_1(x) #x #define __stringify(x) __stringify_1(x) #endif /* * MODULE_INFO was born in 2.5.69. */ #ifndef MODULE_INFO #define MODULE_INFO(tag, info) \ static const char __module_cat(tag, __LINE__)[] \ __attribute__((section(".modinfo"), unused)) = __stringify(tag) "=" info #endif /* * MODULE_VERSION was born in 2.6.4. The earlier form appends a long "\0xxx" * string to the module's version, but that was removed in 2.6.10, so we'll * ignore it in our wrapper. */ #ifndef MODULE_VERSION #define MODULE_VERSION(_version) MODULE_INFO(version, _version) #endif #endif /* __COMPAT_MODULE_H__ */ vsock-only/include/compat_namei.h0000444000000000000000000000410612025726715016116 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_NAMEI_H__ # define __COMPAT_NAMEI_H__ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 18) #include #endif /* * In 2.6.25-rc2, dentry and mount objects were removed from the nameidata * struct. They were both replaced with a struct path. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) #define compat_vmw_nd_to_dentry(nd) (nd).path.dentry #else #define compat_vmw_nd_to_dentry(nd) (nd).dentry #endif /* In 2.6.25-rc2, path_release(&nd) was replaced with path_put(&nd.path). */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 25) #define compat_path_release(nd) path_put(&(nd)->path) #else #define compat_path_release(nd) path_release(nd) #endif /* path_lookup was exported in 2.4.25 */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 25) #define compat_path_lookup(path, flags, nd) path_lookup(path, flags, nd) #else #define compat_path_lookup(path, flags, nd) \ ({ \ int ret = 0; \ if (path_init(path, flags, nd)) { \ ret = path_walk(path, nd); \ } \ ret; \ }) #endif #endif /* __COMPAT_NAMEI_H__ */ vsock-only/include/compat_page.h0000444000000000000000000000466312025726715015751 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_PAGE_H__ # define __COMPAT_PAGE_H__ #include #include /* The pfn_to_page() API appeared in 2.5.14 and changed to function during 2.6.x */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) && !defined(pfn_to_page) # define pfn_to_page(_pfn) (mem_map + (_pfn)) # define page_to_pfn(_page) ((_page) - mem_map) #endif /* The virt_to_page() API appeared in 2.4.0 --hpreg */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 0) && !defined(virt_to_page) # define virt_to_page(_kvAddr) pfn_to_page(MAP_NR(_kvAddr)) #endif /* * The get_order() API appeared at some point in 2.3.x, and was then backported * in 2.2.17-21mdk and in the stock 2.2.18. Because we can only detect its * definition through makefile tricks, we provide our own for now --hpreg */ static inline int compat_get_order(unsigned long size) // IN { int order; size = (size - 1) >> (PAGE_SHIFT - 1); order = -1; do { size >>= 1; order++; } while (size); return order; } /* * BUG() was added to in 2.2.18, and was moved to * in 2.5.58. * * XXX: Technically, this belongs in some sort of "compat_asm_page.h" file, but * since our compatibility wrappers don't distinguish between and * , putting it here is reasonable. */ #ifndef BUG #define BUG() do { \ printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ __asm__ __volatile__(".byte 0x0f,0x0b"); \ } while (0) #endif #endif /* __COMPAT_PAGE_H__ */ vsock-only/include/compat_sched.h0000444000000000000000000002425212025726715016117 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_SCHED_H__ # define __COMPAT_SCHED_H__ #include /* CLONE_KERNEL available in 2.5.35 and higher. */ #ifndef CLONE_KERNEL #define CLONE_KERNEL CLONE_FILES | CLONE_FS | CLONE_SIGHAND #endif /* TASK_COMM_LEN become available in 2.6.11. */ #ifndef TASK_COMM_LEN #define TASK_COMM_LEN 16 #endif /* The capable() API appeared in 2.1.92 --hpreg */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 1, 92) # define capable(_capability) suser() #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 2, 0) # define need_resched() need_resched #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 3) # define need_resched() (current->need_resched) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 3) # define cond_resched() (need_resched() ? schedule() : (void) 0) #endif /* Oh well. We need yield... Happy us! */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 20) # ifdef __x86_64__ # define compat_yield() there_is_nothing_like_yield() # else # include # include /* * Used by _syscallX macros. Note that this is global variable, so * do not rely on its contents too much. As exit() is only function * we use, and we never check return value from exit(), we have * no problem... */ extern int errno; /* * compat_exit() provides an access to the exit() function. It must * be named compat_exit(), as exit() (with different signature) is * provided by x86-64, arm and other (but not by i386). */ # define __NR_compat_yield __NR_sched_yield static inline _syscall0(int, compat_yield); # endif #else # define compat_yield() yield() #endif /* * Since 2.5.34 there are two methods to enumerate tasks: * for_each_process(p) { ... } which enumerates only tasks and * do_each_thread(g,t) { ... } while_each_thread(g,t) which enumerates * also threads even if they share same pid. */ #ifndef for_each_process # define for_each_process(p) for_each_task(p) #endif #ifndef do_each_thread # define do_each_thread(g, t) for_each_task(g) { t = g; do # define while_each_thread(g, t) while (0) } #endif /* * Lock for signal mask is moving target... */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 40) && defined(CLONE_PID) /* 2.4.x without NPTL patches or early 2.5.x */ #define compat_sigmask_lock sigmask_lock #define compat_dequeue_signal_current(siginfo_ptr) \ dequeue_signal(¤t->blocked, (siginfo_ptr)) #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 60) && !defined(INIT_SIGHAND) /* RedHat's 2.4.x with first version of NPTL support, or 2.5.40 to 2.5.59 */ #define compat_sigmask_lock sig->siglock #define compat_dequeue_signal_current(siginfo_ptr) \ dequeue_signal(¤t->blocked, (siginfo_ptr)) #else /* RedHat's 2.4.x with second version of NPTL support, or 2.5.60+. */ #define compat_sigmask_lock sighand->siglock #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 0) #define compat_dequeue_signal_current(siginfo_ptr) \ dequeue_signal(¤t->blocked, (siginfo_ptr)) #else #define compat_dequeue_signal_current(siginfo_ptr) \ dequeue_signal(current, ¤t->blocked, (siginfo_ptr)) #endif #endif /* * recalc_sigpending() had task argument in the past */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 29) && defined(CLONE_PID) /* 2.4.x without NPTL patches or early 2.5.x */ #define compat_recalc_sigpending() recalc_sigpending(current) #else /* RedHat's 2.4.x with NPTL support, or 2.5.29+ */ #define compat_recalc_sigpending() recalc_sigpending() #endif /* * reparent_to_init() was introduced in 2.4.8. In 2.5.38 (or possibly * earlier, but later than 2.5.31) a call to it was added into * daemonize(), so compat_daemonize no longer needs to call it. * * In 2.4.x kernels reparent_to_init() forgets to do correct refcounting * on current->user. It is better to count one too many than one too few... */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 38) #define compat_reparent_to_init() do { \ reparent_to_init(); \ atomic_inc(¤t->user->__count); \ } while (0) #else #define compat_reparent_to_init() do {} while (0) #endif /* * daemonize appeared in 2.2.18. Except 2.2.17-4-RH7.0, which has it too. * Fortunately 2.2.17-4-RH7.0 uses versioned symbols, so we can check * its existence with defined(). */ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 2, 18)) && !defined(daemonize) static inline void daemonize(void) { struct fs_struct *fs; exit_mm(current); current->session = 1; current->pgrp = 1; exit_fs(current); fs = init_task.fs; current->fs = fs; atomic_inc(&fs->count); } #endif /* * flush_signals acquires sighand->siglock since 2.5.61... Verify RH's kernels! */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 61) #define compat_flush_signals(task) do { \ spin_lock_irq(&task->compat_sigmask_lock); \ flush_signals(task); \ spin_unlock_irq(&task->compat_sigmask_lock); \ } while (0) #else #define compat_flush_signals(task) flush_signals(task) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 61) #define compat_allow_signal(signr) do { \ spin_lock_irq(¤t->compat_sigmask_lock); \ sigdelset(¤t->blocked, signr); \ compat_recalc_sigpending(); \ spin_unlock_irq(¤t->compat_sigmask_lock); \ } while (0) #else #define compat_allow_signal(signr) allow_signal(signr) #endif /* * daemonize can set process name since 2.5.61. Prior to 2.5.61, daemonize * didn't block signals on our behalf. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 61) #define compat_daemonize(x...) \ ({ \ /* Beware! No snprintf here, so verify arguments! */ \ sprintf(current->comm, x); \ \ /* Block all signals. */ \ spin_lock_irq(¤t->compat_sigmask_lock); \ sigfillset(¤t->blocked); \ compat_recalc_sigpending(); \ spin_unlock_irq(¤t->compat_sigmask_lock); \ compat_flush_signals(current); \ \ daemonize(); \ compat_reparent_to_init(); \ }) #else #define compat_daemonize(x...) daemonize(x) #endif /* * set priority for specified thread. Exists on 2.6.x kernels and some * 2.4.x vendor's kernels. */ #if defined(VMW_HAVE_SET_USER_NICE) #define compat_set_user_nice(task, n) set_user_nice((task), (n)) #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 0) #define compat_set_user_nice(task, n) do { (task)->priority = 20 - (n); } while (0) #elif !defined(VMW_HAVE_SET_USER_NICE) #define compat_set_user_nice(task, n) do { (task)->nice = (n); } while (0) #endif /* * try to freeze a process. For kernels 2.6.11 or newer, we know how to choose * the interface. The problem is that the oldest interface, introduced in * 2.5.18, was backported to 2.4.x kernels. So if we're older than 2.6.11, * we'll decide what to do based on whether or not swsusp was configured * for the kernel. For kernels 2.6.20 and newer, we'll also need to include * freezer.h since the try_to_freeze definition was pulled out of sched.h. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) #include #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 13) || defined(VMW_TL10S64_WORKAROUND) #define compat_try_to_freeze() try_to_freeze() #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 11) #define compat_try_to_freeze() try_to_freeze(PF_FREEZE) #elif defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_SOFTWARE_SUSPEND2) #include "compat_mm.h" #include #include static inline int compat_try_to_freeze(void) { if (current->flags & PF_FREEZE) { refrigerator(PF_FREEZE); return 1; } else { return 0; } } #else static inline int compat_try_to_freeze(void) { return 0; } #endif /* * As of 2.6.23-rc1, kernel threads are no longer freezable by * default. Instead, kernel threads that need to be frozen must opt-in * by calling set_freezable() as soon as the thread is created. */ #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 22) #define compat_set_freezable() do { set_freezable(); } while (0) #else #define compat_set_freezable() do {} while (0) #endif /* * Since 2.6.27-rc2 kill_proc() is gone... Replacement (GPL-only!) * API is available since 2.6.19. Use them from 2.6.27-rc1 up. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27) typedef int compat_pid; #define compat_find_get_pid(pid) (pid) #define compat_put_pid(pid) do { } while (0) #define compat_kill_pid(pid, sig, flag) kill_proc(pid, sig, flag) #else typedef struct pid * compat_pid; #define compat_find_get_pid(pid) find_get_pid(pid) #define compat_put_pid(pid) put_pid(pid) #define compat_kill_pid(pid, sig, flag) kill_pid(pid, sig, flag) #endif #endif /* __COMPAT_SCHED_H__ */ vsock-only/include/compat_semaphore.h0000444000000000000000000000314212025726715017007 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_SEMAPHORE_H__ # define __COMPAT_SEMAPHORE_H__ /* <= 2.6.25 have asm only, 2.6.26 has both, and 2.6.27-rc2+ has linux only. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27) # include #else # include #endif /* * The init_MUTEX_LOCKED() API appeared in 2.2.18, and is also in * 2.2.17-21mdk --hpreg */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 2, 18) #ifndef init_MUTEX_LOCKED #define init_MUTEX_LOCKED(_sem) *(_sem) = MUTEX_LOCKED #endif #ifndef DECLARE_MUTEX #define DECLARE_MUTEX(name) struct semaphore name = MUTEX #endif #ifndef DECLARE_MUTEX_LOCKED #define DECLARE_MUTEX_LOCKED(name) struct semaphore name = MUTEX_LOCKED #endif #endif #endif /* __COMPAT_SEMAPHORE_H__ */ vsock-only/include/compat_slab.h0000444000000000000000000000665312025726715015757 0ustar rootroot/********************************************************* * Copyright (C) 2005 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_SLAB_H__ # define __COMPAT_SLAB_H__ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 2, 0) # include #else # include #endif /* * Before 2.6.20, kmem_cache_t was the accepted way to refer to a kmem_cache * structure. Prior to 2.6.15, this structure was called kmem_cache_s, and * afterwards it was renamed to kmem_cache. Here we keep things simple and use * the accepted typedef until it became deprecated, at which point we switch * over to the kmem_cache name. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) # define compat_kmem_cache struct kmem_cache #else # define compat_kmem_cache kmem_cache_t #endif /* * Up to 2.6.22 kmem_cache_create has 6 arguments - name, size, alignment, flags, * constructor, and destructor. Then for some time kernel was asserting that * destructor is NULL, and since 2.6.23-pre1 kmem_cache_create takes only 5 * arguments - destructor is gone. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) || defined(VMW_KMEMCR_HAS_DTOR) #define compat_kmem_cache_create(name, size, align, flags, ctor) \ kmem_cache_create(name, size, align, flags, ctor, NULL) #else #define compat_kmem_cache_create(name, size, align, flags, ctor) \ kmem_cache_create(name, size, align, flags, ctor) #endif /* * Up to 2.6.23 kmem_cache constructor has three arguments - pointer to block to * prepare (aka "this"), from which cache it came, and some unused flags. After * 2.6.23 flags were removed, and order of "this" and cache parameters was swapped... * Since 2.6.27-rc2 everything is different again, and ctor has only one argument. * * HAS_3_ARGS has precedence over HAS_2_ARGS if both are defined. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23) && !defined(VMW_KMEMCR_CTOR_HAS_3_ARGS) # define VMW_KMEMCR_CTOR_HAS_3_ARGS #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26) && !defined(VMW_KMEMCR_CTOR_HAS_2_ARGS) # define VMW_KMEMCR_CTOR_HAS_2_ARGS #endif #if defined(VMW_KMEMCR_CTOR_HAS_3_ARGS) typedef void compat_kmem_cache_ctor(void *, compat_kmem_cache *, unsigned long); #define COMPAT_KMEM_CACHE_CTOR_ARGS(arg) void *arg, \ compat_kmem_cache *cache, \ unsigned long flags #elif defined(VMW_KMEMCR_CTOR_HAS_2_ARGS) typedef void compat_kmem_cache_ctor(compat_kmem_cache *, void *); #define COMPAT_KMEM_CACHE_CTOR_ARGS(arg) compat_kmem_cache *cache, \ void *arg #else typedef void compat_kmem_cache_ctor(void *); #define COMPAT_KMEM_CACHE_CTOR_ARGS(arg) void *arg #endif #endif /* __COMPAT_SLAB_H__ */ vsock-only/include/compat_spinlock.h0000444000000000000000000000460612025726715016654 0ustar rootroot/********************************************************* * Copyright (C) 2005 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_SPINLOCK_H__ # define __COMPAT_SPINLOCK_H__ /* * The spin_lock() API appeared in 2.1.25 in asm/smp_lock.h * It moved in 2.1.30 to asm/spinlock.h * It moved again in 2.3.18 to linux/spinlock.h * * --hpreg */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 18) # include #else # if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 1, 30) # include # else typedef struct {} spinlock_t; # define spin_lock_init(lock) # define spin_lock(lock) # define spin_unlock(lock) # define spin_lock_irqsave(lock, flags) do { \ save_flags(flags); \ cli(); \ spin_lock(lock); \ } while (0) # define spin_unlock_irqrestore(lock, flags) do { \ spin_unlock(lock); \ restore_flags(flags); \ } while (0) # endif #endif /* * Preempt support was added during 2.5.x development cycle, and later * it was backported to 2.4.x. In 2.4.x backport these definitions * live in linux/spinlock.h, that's why we put them here (in 2.6.x they * are defined in linux/preempt.h which is included by linux/spinlock.h). */ #ifdef CONFIG_PREEMPT #define compat_preempt_disable() preempt_disable() #define compat_preempt_enable() preempt_enable() #else #define compat_preempt_disable() do { } while (0) #define compat_preempt_enable() do { } while (0) #endif #endif /* __COMPAT_SPINLOCK_H__ */ vsock-only/include/compat_statfs.h0000444000000000000000000000230612025726715016331 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_STATFS_H__ # define __COMPAT_STATFS_H__ /* vfs.h simply include statfs.h, but it knows what directory statfs.h is in. */ #include /* 2.5.74 renamed struct statfs to kstatfs. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 74) #define compat_kstatfs kstatfs #else #define compat_kstatfs statfs #endif #endif /* __COMPAT_STATFS_H__ */ vsock-only/include/compat_string.h0000444000000000000000000000356312025726715016341 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_STRING_H__ # define __COMPAT_STRING_H__ #include /* * kstrdup was born in 2.6.13. This implementation is almost identical to the * one found there. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 13) #define compat_kstrdup(s, gfp) kstrdup(s, gfp) #else #define compat_kstrdup(s, gfp) \ ({ \ size_t len; \ char *buf; \ len = strlen(s) + 1; \ buf = kmalloc(len, gfp); \ memcpy(buf, s, len); \ buf; \ }) #endif #endif /* __COMPAT_STRING_H__ */ vsock-only/include/compat_uaccess.h0000444000000000000000000000606212025726715016456 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_UACCESS_H__ # define __COMPAT_UACCESS_H__ /* User space access functions moved in 2.1.7 to asm/uaccess.h --hpreg */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 1, 7) # include #else # include #endif /* get_user() API modified in 2.1.4 to take 2 arguments --hpreg */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 1, 4) # define compat_get_user get_user #else /* * We assign 0 to the variable in case of failure to prevent "`_var' might be * used uninitialized in this function" compiler warnings. I think it is OK, * because the hardware-based version in newer kernels probably has the same * semantics and does not guarantee that the value of _var will not be * modified, should the access fail --hpreg */ # define compat_get_user(_var, _uvAddr) ({ \ int _status; \ \ _status = verify_area(VERIFY_READ, _uvAddr, sizeof(*(_uvAddr))); \ if (_status == 0) { \ (_var) = get_user(_uvAddr); \ } else { \ (_var) = 0; \ } \ _status; \ }) #endif /* * The copy_from_user() API appeared in 2.1.4 * * The emulation is not perfect here, but it is conservative: on failure, we * always return the total size, instead of the potentially smaller faulty * size --hpreg * * Since 2.5.55 copy_from_user() is no longer macro. */ #if !defined(copy_from_user) && LINUX_VERSION_CODE < KERNEL_VERSION(2, 2, 0) # define copy_from_user(_to, _from, _size) ( \ verify_area(VERIFY_READ, _from, _size) \ ? (_size) \ : (memcpy_fromfs(_to, _from, _size), 0) \ ) # define copy_to_user(_to, _from, _size) ( \ verify_area(VERIFY_WRITE, _to, _size) \ ? (_size) \ : (memcpy_tofs(_to, _from, _size), 0) \ ) #endif #endif /* __COMPAT_UACCESS_H__ */ vsock-only/include/compat_version.h0000444000000000000000000000616512025726715016521 0ustar rootroot/********************************************************* * Copyright (C) 1998 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_VERSION_H__ # define __COMPAT_VERSION_H__ #define INCLUDE_ALLOW_VMMON #define INCLUDE_ALLOW_MODULE #define INCLUDE_ALLOW_VMCORE #define INCLUDE_ALLOW_VMNIXMOD #define INCLUDE_ALLOW_DISTRIBUTE #include "includeCheck.h" #ifndef __linux__ # error "linux-version.h" #endif #include /* Appeared in 2.1.90 --hpreg */ #ifndef KERNEL_VERSION # define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c)) #endif /* * Distinguish relevant classes of Linux kernels. * * The convention is that version X defines all * the KERNEL_Y symbols where Y <= X. * * XXX Do not add more definitions here. This way of doing things does not * scale, and we are going to phase it out soon --hpreg */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 1, 0) # define KERNEL_2_1 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 2, 0) # define KERNEL_2_2 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 1) # define KERNEL_2_3_1 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 15) /* new networking */ # define KERNEL_2_3_15 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 25) /* new procfs */ # define KERNEL_2_3_25 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 29) /* even newer procfs */ # define KERNEL_2_3_29 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 43) /* softnet changes */ # define KERNEL_2_3_43 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 47) /* more softnet changes */ # define KERNEL_2_3_47 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 99) /* name in netdevice struct is array and not pointer */ # define KERNEL_2_3_99 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 0) /* New 'owner' member at the beginning of struct file_operations */ # define KERNEL_2_4_0 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 8) /* New netif_rx_ni() --hpreg */ # define KERNEL_2_4_8 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 22) /* New vmap() */ # define KERNEL_2_4_22 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 2) /* New kdev_t, major()/minor() API --hpreg */ # define KERNEL_2_5_2 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 5) /* New sk_alloc(), pte_offset_map()/pte_unmap() --hpreg */ # define KERNEL_2_5_5 #endif #endif /* __COMPAT_VERSION_H__ */ vsock-only/include/compat_wait.h0000444000000000000000000001564212025726715016000 0ustar rootroot/********************************************************* * Copyright (C) 2002 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_WAIT_H__ # define __COMPAT_WAIT_H__ #include #include #include #include "compat_file.h" /* * The DECLARE_WAITQUEUE() API appeared in 2.3.1 * It was back ported in 2.2.18 * * --hpreg */ #ifndef DECLARE_WAITQUEUE typedef struct wait_queue *wait_queue_head_t; # define init_waitqueue_head(_headPtr) *(_headPtr) = NULL # define DECLARE_WAITQUEUE(_var, _task) \ struct wait_queue _var = {_task, NULL, } typedef struct wait_queue wait_queue_t; # define init_waitqueue_entry(_wait, _task) ((_wait)->task = (_task)) #endif /* * The 'struct poll_wqueues' appeared in 2.5.48, when global * /dev/epoll interface was added. It was backported to the * 2.4.20-wolk4.0s. */ #ifdef VMW_HAVE_EPOLL // { #define compat_poll_wqueues struct poll_wqueues #else // } { #define compat_poll_wqueues poll_table #endif // } #ifdef VMW_HAVE_EPOLL // { /* If prototype does not match, build will abort here */ extern void poll_initwait(compat_poll_wqueues *); #define compat_poll_initwait(wait, table) ( \ poll_initwait((table)), \ (wait) = &(table)->pt \ ) #define compat_poll_freewait(wait, table) ( \ poll_freewait((table)) \ ) #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 0) // { /* If prototype does not match, build will abort here */ extern void poll_initwait(compat_poll_wqueues *); #define compat_poll_initwait(wait, table) ( \ (wait) = (table), \ poll_initwait(wait) \ ) #define compat_poll_freewait(wait, table) ( \ poll_freewait((table)) \ ) #else // } { #define compat_poll_initwait(wait, table) ( \ (wait) = (table), /* confuse compiler */ \ (wait) = (poll_table *) __get_free_page(GFP_KERNEL), \ (wait)->nr = 0, \ (wait)->entry = (struct poll_table_entry *)((wait) + 1), \ (wait)->next = NULL \ ) static inline void poll_freewait(poll_table *wait) { while (wait) { struct poll_table_entry * entry; poll_table *old; entry = wait->entry + wait->nr; while (wait->nr > 0) { wait->nr--; entry--; remove_wait_queue(entry->wait_address, &entry->wait); compat_fput(entry->filp); } old = wait; wait = wait->next; free_page((unsigned long) old); } } #define compat_poll_freewait(wait, table) ( \ poll_freewait((wait)) \ ) #endif // } /* * The wait_event_interruptible_timeout() interface is not * defined in pre-2.6 kernels. */ #ifndef wait_event_interruptible_timeout #define __wait_event_interruptible_timeout(wq, condition, ret) \ do { \ wait_queue_t __wait; \ init_waitqueue_entry(&__wait, current); \ \ add_wait_queue(&wq, &__wait); \ for (;;) { \ set_current_state(TASK_INTERRUPTIBLE); \ if (condition) \ break; \ if (!signal_pending(current)) { \ ret = schedule_timeout(ret); \ if (!ret) \ break; \ continue; \ } \ ret = -ERESTARTSYS; \ break; \ } \ set_current_state(TASK_RUNNING); \ remove_wait_queue(&wq, &__wait); \ } while (0) #define wait_event_interruptible_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ if (!(condition)) \ __wait_event_interruptible_timeout(wq, condition, __ret); \ __ret; \ }) #endif /* * The wait_event_timeout() interface is not * defined in pre-2.6 kernels. */ #ifndef wait_event_timeout #define __wait_event_timeout(wq, condition, ret) \ do { \ wait_queue_t __wait; \ init_waitqueue_entry(&__wait, current); \ \ add_wait_queue(&wq, &__wait); \ for (;;) { \ set_current_state(TASK_UNINTERRUPTIBLE); \ if (condition) \ break; \ ret = schedule_timeout(ret); \ if (!ret) \ break; \ } \ set_current_state(TASK_RUNNING); \ remove_wait_queue(&wq, &__wait); \ } while (0) #define wait_event_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ if (!(condition)) \ __wait_event_timeout(wq, condition, __ret); \ __ret; \ }) #endif /* * DEFINE_WAIT() and friends were added in 2.5.39 and backported to 2.4.28. * * Unfortunately it is not true. While some distros may have done it the * change has never made it into vanilla 2.4 kernel. Instead of testing * particular kernel versions let's just test for presence of DEFINE_WAIT * when figuring out whether we need to provide replacement implementation * or simply alias existing one. */ #ifndef DEFINE_WAIT # define COMPAT_DEFINE_WAIT(_wait) \ DECLARE_WAITQUEUE(_wait, current) # define compat_init_prepare_to_wait(_sleep, _wait, _state) \ do { \ __set_current_state(_state); \ add_wait_queue(_sleep, _wait); \ } while (0) # define compat_cont_prepare_to_wait(_sleep, _wait, _state) \ set_current_state(_state) # define compat_finish_wait(_sleep, _wait, _state) \ do { \ __set_current_state(_state); \ remove_wait_queue(_sleep, _wait); \ } while (0) #else # define COMPAT_DEFINE_WAIT(_wait) \ DEFINE_WAIT(_wait) # define compat_init_prepare_to_wait(_sleep, _wait, _state) \ prepare_to_wait(_sleep, _wait, _state) # define compat_cont_prepare_to_wait(_sleep, _wait, _state) \ prepare_to_wait(_sleep, _wait, _state) # define compat_finish_wait(_sleep, _wait, _state) \ finish_wait(_sleep, _wait) #endif /* #ifndef DEFINE_WAIT */ #endif /* __COMPAT_WAIT_H__ */ vsock-only/include/compat_workqueue.h0000444000000000000000000001431112025726715017053 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_WORKQUEUE_H__ # define __COMPAT_WORKQUEUE_H__ #include #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 5, 41) # include #endif /* * * Work queues and delayed work queues. * * Prior to 2.5.41, the notion of work queues did not exist. Taskqueues are * used for work queues and timers are used for delayed work queues. * * After 2.6.20, normal work structs ("work_struct") and delayed work * ("delayed_work") structs were separated so that the work_struct could be * slimmed down. The interface was also changed such that the address of the * work_struct itself is passed in as the argument to the work function. This * requires that one embed the work struct in the larger struct containing the * information necessary to complete the work and use container_of() to obtain * the address of the containing structure. * * Users of these macros should embed a compat_work or compat_delayed_work in * a larger structure, then specify the larger structure as the _data argument * for the initialization functions, specify the work function to take * a compat_work_arg or compat_delayed_work_arg, then use the appropriate * _GET_DATA macro to obtain the reference to the structure passed in as _data. * An example is below. * * * typedef struct WorkData { * int data; * compat_work work; * } WorkData; * * * void * WorkFunc(compat_work_arg data) * { * WorkData *workData = COMPAT_WORK_GET_DATA(data, WorkData, work); * * ... * } * * * { * WorkData *workData = kmalloc(sizeof *workData, GFP_EXAMPLE); * if (!workData) { * return -ENOMEM; * } * * COMPAT_INIT_WORK(&workData->work, WorkFunc, workData); * compat_schedule_work(&workData->work); * } */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 41) /* { */ typedef struct tq_struct compat_work; typedef struct compat_delayed_work { struct tq_struct work; struct timer_list timer; } compat_delayed_work; typedef void * compat_work_arg; typedef void * compat_delayed_work_arg; /* * Delayed work queues need to run at some point in the future in process * context, but task queues don't support delaying the task one is scheduling. * Timers allow us to delay the execution of our work queue until the future, * but timer handlers run in bottom-half context. As such, we use both a timer * and task queue and use the timer handler below to schedule the task in * process context immediately. The timer lets us delay execution, and the * task queue lets us run in process context. * * Note that this is similar to how delayed_work is implemented with work * queues in later kernel versions. */ static inline void __compat_delayed_work_timer(unsigned long arg) { compat_delayed_work *dwork = (compat_delayed_work *)arg; if (dwork) { schedule_task(&dwork->work); } } # define COMPAT_INIT_WORK(_work, _func, _data) \ INIT_LIST_HEAD(&(_work)->list); \ (_work)->sync = 0; \ (_work)->routine = _func; \ (_work)->data = _data # define COMPAT_INIT_DELAYED_WORK(_work, _func, _data) \ COMPAT_INIT_WORK(&(_work)->work, _func, _data); \ init_timer(&(_work)->timer); \ (_work)->timer.expires = 0; \ (_work)->timer.function = __compat_delayed_work_timer; \ (_work)->timer.data = (unsigned long)_work # define compat_schedule_work(_work) \ schedule_task(_work) # define compat_schedule_delayed_work(_work, _delay) \ (_work)->timer.expires = jiffies + _delay; \ add_timer(&(_work)->timer) # define COMPAT_WORK_GET_DATA(_p, _type) \ (_type *)(_p) # define COMPAT_DELAYED_WORK_GET_DATA(_p, _type, _member) \ (_type *)(_p) #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) /* } { */ typedef struct work_struct compat_work; typedef struct work_struct compat_delayed_work; typedef void * compat_work_arg; typedef void * compat_delayed_work_arg; # define COMPAT_INIT_WORK(_work, _func, _data) \ INIT_WORK(_work, _func, _data) # define COMPAT_INIT_DELAYED_WORK(_work, _func, _data) \ INIT_WORK(_work, _func, _data) # define compat_schedule_work(_work) \ schedule_work(_work) # define compat_schedule_delayed_work(_work, _delay) \ schedule_delayed_work(_work, _delay) # define COMPAT_WORK_GET_DATA(_p, _type) \ (_type *)(_p) # define COMPAT_DELAYED_WORK_GET_DATA(_p, _type, _member) \ (_type *)(_p) #else /* } Linux >= 2.6.20 { */ typedef struct work_struct compat_work; typedef struct delayed_work compat_delayed_work; typedef struct work_struct * compat_work_arg; typedef struct work_struct * compat_delayed_work_arg; # define COMPAT_INIT_WORK(_work, _func, _data) \ INIT_WORK(_work, _func) # define COMPAT_INIT_DELAYED_WORK(_work, _func, _data) \ INIT_DELAYED_WORK(_work, _func) # define compat_schedule_work(_work) \ schedule_work(_work) # define compat_schedule_delayed_work(_work, _delay) \ schedule_delayed_work(_work, _delay) # define COMPAT_WORK_GET_DATA(_p, _type) \ container_of(_p, _type, work) # define COMPAT_DELAYED_WORK_GET_DATA(_p, _type, _member) \ container_of(_p, _type, _member.work) #endif /* } */ #endif /* __COMPAT_WORKQUEUE_H__ */ vsock-only/include/compat_sock.h0000444000000000000000000002201712025726715015765 0ustar rootroot/********************************************************* * Copyright (C) 2003 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #ifndef __COMPAT_SOCK_H__ # define __COMPAT_SOCK_H__ #include /* for NULL */ #include /* * Between 2.5.70 and 2.5.71 all sock members were renamed from XXX to sk_XXX. * * VMW_HAVE_SK_WMEM_ALLOC is defined in module Makefile if kernel's struct sock * has sk_wmem_alloc member. See vmnet's Makefile.kernel for details. * It also means that all modules including this file should do * * EXTRA_CFLAGS += $(call vm_check_build, $(SRCROOT)/socket.c, -DVMW_HAVE_SK_WMEM_ALLOC, ) * * in their Makefiles. */ #ifndef VMW_HAVE_SK_WMEM_ALLOC # define sk_wmem_alloc wmem_alloc #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 71) # define compat_sk_backlog_rcv backlog_rcv # define compat_sk_destruct destruct # define compat_sk_shutdown shutdown # define compat_sk_receive_queue receive_queue # define compat_sk_sleep sleep # define compat_sk_err err # define compat_sk_state_change state_change # define compat_sk_data_ready data_ready # define compat_sk_write_space write_space # define compat_sk_error_report error_report # define compat_sk_type type # define compat_sk_refcnt refcnt # define compat_sk_state state # define compat_sk_error_report error_report # define compat_sk_socket socket # define compat_sk_ack_backlog ack_backlog # define compat_sk_max_ack_backlog max_ack_backlog #else # define compat_sk_backlog_rcv sk_backlog_rcv # define compat_sk_destruct sk_destruct # define compat_sk_shutdown sk_shutdown # define compat_sk_receive_queue sk_receive_queue # define compat_sk_sleep sk_sleep # define compat_sk_err sk_err # define compat_sk_state_change sk_state_change # define compat_sk_data_ready sk_data_ready # define compat_sk_write_space sk_write_space # define compat_sk_error_report sk_error_report # define compat_sk_type sk_type # define compat_sk_refcnt sk_refcnt # define compat_sk_state sk_state # define compat_sk_error_report sk_error_report # define compat_sk_socket sk_socket # define compat_sk_ack_backlog sk_ack_backlog # define compat_sk_max_ack_backlog sk_max_ack_backlog #endif /* * Prior to 2.5.65, struct sock contained individual fields for certain * socket flags including SOCK_DONE. Between 2.5.65 and 2.5.71 these were * replaced with a bitmask but the generic bit test functions were used. * In 2.5.71, these were replaced with socket specific functions. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 71) # define compat_sock_test_done(sk) sock_flag(sk, SOCK_DONE) # define compat_sock_set_done(sk) sock_set_flag(sk, SOCK_DONE) # define compat_sock_reset_done(sk) sock_reset_flag(sk, SOCK_DONE) #elif LINUX_VERISON_CODE >= KERNEL_VERSION(2, 5, 65) # define compat_sock_test_done(sk) test_bit(SOCK_DONE, &(sk)->flags) # define compat_sock_set_done(sk) __set_bit(SOCK_DONE, &(sk)->flags) # define compat_sock_reset_done(sk) __clear_bit(SOCK_DONE, &(sk)->flags) #else # define compat_sock_test_done(sk) (sk)->done # define compat_sock_set_done(sk) ((sk)->done = 1) # define compat_sock_reset_done(sk) ((sk)->done = 0) #endif /* * Prior to 2.6.24, there was no sock network namespace member. In 2.6.26, it * was hidden behind accessor functions so that its behavior could vary * depending on the value of CONFIG_NET_NS. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 26) # define compat_sock_net(sk) sock_net(sk) #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) # define compat_sock_net(sk) sk->sk_net #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 42) # define compat_sock_owned_by_user(sk) ((sk)->lock.users != 0) #else # define compat_sock_owned_by_user(sk) sock_owned_by_user(sk) #endif /* * Up until 2.4.21 for the 2.4 series and 2.5.60 for the 2.5 series, * sk_filter() calls were protected with CONFIG_FILTER. Wrapping our compat * definition in a similar check allows us to build on those kernels. * */ #ifdef CONFIG_FILTER /* * Unfortunately backports for certain kernels require the use of an autoconf * program to check the interface for sk_filter(). */ # ifndef VMW_HAVE_NEW_SKFILTER /* * Up until 2.4.21 for the 2.4 series and 2.5.60 for the 2.5 series, * callers to sk->filter were responsible for ensuring that the filter * was not NULL. * Additionally, the new version of sk_filter returns 0 or -EPERM on error * while the old function returned 0 or 1. Return -EPERM here as well to * be consistent. */ # define compat_sk_filter(sk, skb, needlock) \ ({ \ int rc = 0; \ \ if ((sk)->filter) { \ rc = sk_filter(skb, (sk)->filter); \ if (rc) { \ rc = -EPERM; \ } \ } \ \ rc; \ }) # else # define compat_sk_filter(sk, skb, needlock) sk_filter(sk, skb, needlock) # endif #else # define compat_sk_filter(sk, skb, needlock) 0 #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 16) /* Taken from 2.6.16's sock.h and modified for macro. */ # define compat_sk_receive_skb(sk, skb, nested) \ ({ \ int rc = NET_RX_SUCCESS; \ \ if (compat_sk_filter(sk, skb, 0)) { \ kfree_skb(skb); \ sock_put(sk); \ } else { \ skb->dev = NULL; \ bh_lock_sock(sk); \ if (!compat_sock_owned_by_user(sk)) { \ rc = (sk)->compat_sk_backlog_rcv(sk, skb); \ } else { \ sk_add_backlog(sk, skb); \ } \ bh_unlock_sock(sk); \ sock_put(sk); \ } \ \ rc; \ }) #elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) # define compat_sk_receive_skb(sk, skb, nested) sk_receive_skb(sk, skb) #else # define compat_sk_receive_skb(sk, skb, nested) sk_receive_skb(sk, skb, nested) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 72) /* * Before 2.5.72, the helper socket functions for hlist management did not * exist, so we use the sklist_ functions instead. These are not ideal since * they grab a system-wide sklist lock despite not needing it since we provide * our own list. */ #define compat_sk_next next /* for when we find out it became sk_next */ # define compat_sklist_table struct sock * /* This isn't really used in the iterator, but we need something. */ # define compat_sklist_table_entry struct sock # define compat_sk_for_each(sk, node, list) \ for (sk = *(list), node = NULL; sk != NULL; sk = (sk)->compat_sk_next) # define compat_sk_add_node(sk, list) sklist_insert_socket(list, sk) # define compat_sk_del_node_init(sk, list) sklist_remove_socket(list, sk) #else # define compat_sklist_table struct hlist_head # define compat_sklist_table_entry struct hlist_node # define compat_sk_for_each(sk, node, list) sk_for_each(sk, node, list) # define compat_sk_add_node(sk, list) sk_add_node(sk, list) # define compat_sk_del_node_init(sk, list) sk_del_node_init(sk) #endif #endif /* __COMPAT_SOCK_H__ */ vsock-only/include/includeCheck.h0000444000000000000000000000000012025726715016030 0ustar rootrootvsock-only/autoconf/0000755000000000000000000000000012025726714013504 5ustar rootrootvsock-only/autoconf/geninclude.c0000444000000000000000000000226412025726715015770 0ustar rootroot/********************************************************* * Copyright (C) 2003 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #include #ifdef CONFIG_X86_VOYAGER APATH/mach-voyager #endif #ifdef CONFIG_X86_VISWS APATH/mach-visws #endif #ifdef CONFIG_X86_NUMAQ APATH/mach-numaq #endif #ifdef CONFIG_X86_BIGSMP APATH/mach-bigsmp #endif #ifdef CONFIG_X86_SUMMIT APATH/mach-summit #endif #ifdef CONFIG_X86_GENERICARCH APATH/mach-generic #endif APATH/mach-default vsock-only/autoconf/cachecreate.c0000444000000000000000000000320612025726715016077 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #include #include /* * All kernels before 2.6.22 take 6 arguments. All kernels since * 2.6.23-rc1 take 5 arguments. Only kernels between 2.6.22 and * 2.6.23-rc1 are questionable - we could ignore them if we wanted, * nobody cares about them even now. But unfortunately RedHat is * re-releasing 2.6.X-rc kernels under 2.6.(X-1) name, so they * are releasing 2.6.23-rc1 as 2.6.22-5055-something, so we have * to do autodetection for them. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) /* Success... */ #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) #error "This test intentionally fails on 2.6.23 and newer kernels." #else #include struct kmem_cache *kmemtest(void) { return kmem_cache_create("test", 12, 0, 0, NULL, NULL); } #endif vsock-only/autoconf/epoll.c0000444000000000000000000000235512025726715014767 0ustar rootroot/********************************************************* * Copyright (C) 2004 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * Detect whether we have 'struct poll_wqueues' * 2.6.x kernels always had this struct. Stock 2.4.x kernels * never had it, but some distros backported epoll patch. */ #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0) #include void poll_test(void) { struct poll_wqueues test; return poll_initwait(&test); } #endif vsock-only/autoconf/filldir1.c0000444000000000000000000000325612025726715015363 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) #include #include /* loff_t */ #include /* NULL */ /* * After 2.6.18, filldir and statfs were changed to send 64-bit inode * numbers to user space. Red Hat backported this behavior into a 2.6.17 * kernel. * * This test will fail on a kernel with such a patch. */ static int LinuxDriverFilldir(void *buf, const char *name, int namelen, loff_t offset, ino_t ino, unsigned int d_type) { return 0; } void test(void) { vfs_readdir(NULL, LinuxDriverFilldir, NULL); } #else #error "This test intentionally fails on 2.6.20 and newer kernels." #endif vsock-only/autoconf/getsb1.c0000444000000000000000000000307412025726715015040 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19) #include /* * Around 2.6.18, a pointer to a vfsmount was added to get_sb. Red Hat * backported this behavior into a 2.6.17 kernel. * * This test will fail on a kernel with such a patch. */ static struct super_block * LinuxDriverGetSb(struct file_system_type *fs_type, int flags, const char *dev_name, void *rawData) { return 0; } struct file_system_type fs_type = { .get_sb = LinuxDriverGetSb }; #else #error "This test intentionally fails on 2.6.19 or newer kernels." #endif vsock-only/autoconf/setnice.c0000444000000000000000000000216312025726715015303 0ustar rootroot/********************************************************* * Copyright (C) 2005 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * set_user_nice appeared in 2.4.21. But some distros * backported it to older kernels. */ #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 21) #include void test(void) { set_user_nice(current, -20); } #endif vsock-only/autoconf/skas1.c0000444000000000000000000000235312025726715014674 0ustar rootroot/********************************************************* * Copyright (C) 2004 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * SKAS patch adds 'struct mm *mm' as first argument to do_mmap_pgoff. * This patch never hit mainstream kernel. */ #include unsigned long check_do_mmap_pgoff(struct mm_struct *mm, struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { return do_mmap_pgoff(mm, file, addr, len, prot, flag, pgoff); } vsock-only/autoconf/statfs1.c0000444000000000000000000000266712025726715015247 0ustar rootroot/********************************************************* * Copyright (C) 2006 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19) #include /* * Around 2.6.18, the super_block pointer in statfs was changed to a dentry * pointer. Red Hat backported this behavior into a 2.6.17 kernel. * * This test will fail on a kernel with such a patch. */ static int LinuxDriverStatFs(struct super_block *sb, struct kstatfs *stat) { return 0; } struct super_operations super_ops = { .statfs = LinuxDriverStatFs }; #else #error "This test intentionally fails on 2.6.19 and newer kernels." #endif vsock-only/autoconf/sk_filter.c0000444000000000000000000000340412025726715015632 0ustar rootroot/********************************************************* * Copyright (C) 2007 VMware, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2 and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * *********************************************************/ /* * Detect whether the old or new sk_filter() interface is used. This was * changed in 2.4.21, but it's backported to some distro kernels. * * This test will fail to build on kernels with the new interface. */ #include #include /* * We'd restrict this test to 2.4.21 and earlier kernels, but Mandrake's * enterprise-2.4.21-013mdk-9.1 appears to really be 2.4.20 with some patches, * and not the patches we care about, so let's test on 2.4.21 kernels too. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 4, 22) #include #include #include #include struct sk_buff test_skbuff; struct sk_filter test_filter; int sk_filter_test(void) { struct sk_buff *skb = &test_skbuff; struct sk_filter *filter = &test_filter; return sk_filter(skb, filter); } #else #error "This test intentionally fails on 2.4.22 or newer kernels." #endif vsock-only/Makefile.normal0000444000000000000000000001007112025726715014613 0ustar rootroot#!/usr/bin/make -f ########################################################## # Copyright (C) 2007 VMware, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation version 2 and no later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ########################################################## vm_check_build = $(shell if $(CC) $(CC_OPTS) $(INCLUDE) -Werror -S -o /dev/null -xc $(1) \ > /dev/null 2>&1; then echo "$(2)"; else echo "$(3)"; fi) vm_product_defines = $(if $(findstring tools,$(1)), -DVMX86_TOOLS,) #### #### DESTDIR is where the module, object files, and dependencies are built #### DESTDIR := driver-$(VM_UNAME) #### #### DRIVERNAME should be untouched unless you have a good reason to change #### it. The form below is how the scripts expect it. #### DRIVERNAME := $(DRIVER)-xxx-$(VM_UNAME) ifneq (,$(filter x86_64%, $(shell $(CC) -dumpmachine))) MACHINE := x86_64 else MACHINE := x386 endif ifdef QUIET ECHO := @true else ECHO := @echo endif #### #### You must compile with at least -O level of optimization #### or the module won't load. #### If desparate, I think that bringing in might #### suffice. #### CC_WARNINGS := -Wall -Wstrict-prototypes # Don't use -pipe or egcs-2.91.66 (shipped with RedHat) will die CC_KFLAGS := -D__KERNEL__ -fno-strength-reduce -fno-omit-frame-pointer \ -fno-common -DKBUILD_MODNAME=$(DRIVER) CC_KFLAGS += $(call vm_check_gcc,-falign-loops=2 -falign-jumps=2 -falign-functions=2, \ -malign-loops=2 -malign-jumps=2 -malign-functions=2) CC_KFLAGS += $(call vm_check_gcc,-fno-strict-aliasing,) CC_KFLAGS += $(call vm_product_defines, $(PRODUCT)) ifeq ($(MACHINE),x86_64) CC_KFLAGS += -mno-red-zone -mcmodel=kernel else # Gcc 3.0 deprecates -m486 --hpreg CC_KFLAGS += -DCPU=586 $(call check_gcc,-march=i586,-m486) endif CC_OPTS := -O2 -DMODULE $(GLOBAL_DEFS) $(CC_KFLAGS) $(CC_WARNINGS) INCLUDE := -I$(SRCROOT)/include INCLUDE += -I$(SRCROOT)/linux INCLUDE += -I$(SRCROOT)/common INCLUDE += -I$(HEADER_DIR) INCLUDE += $(shell $(CC) $(INCLUDE) -E $(SRCROOT)/autoconf/geninclude.c \ | sed -n -e 's!^APATH!-I$(HEADER_DIR)/asm!p') CC_OPTS += $(call vm_check_build, $(SRCROOT)/autoconf/skas1.c, -DVMW_SKAS_MMAP, ) CC_OPTS += $(call vm_check_build, $(SRCROOT)/autoconf/epoll.c, -DVMW_HAVE_EPOLL, ) CC_OPTS += $(call vm_check_build, $(SRCROOT)/autoconf/setnice.c, -DVMW_HAVE_SET_USER_NICE, ) # This test is inverted. CC_OPTS += $(call vm_check_build, $(SRCROOT)/autoconf/sk_filter.c,, -DVMW_HAVE_NEW_SKFILTER ) CC_OPTS += -DVMW_KMEMCR_HAS_DTOR LINUX_OBJS := af_vsock.o LINUX_OBJS += vsockAddr.o LINUX_OBJS += util.o LINUX_OBJS += driverLog.o LINUX_DEPS := ${LINUX_OBJS:.o=.d} OBJS := $(LINUX_OBJS) #### #### Make Targets are beneath here. #### driver: setup deps $(MAKE) -C $(DESTDIR) -f ../Makefile SRCROOT=../$(SRCROOT) $(DRIVERNAME) \ INCLUDE_DEPS=1 setup: @if [ -d $(DESTDIR) ] ; then true ; else mkdir $(DESTDIR); chmod 755 $(DESTDIR) ; fi $(DRIVER): $(DRIVERNAME) cp -f $< $@ $(DRIVERNAME): $(OBJS) $(ECHO) "Building $(DRIVERNAME)" ld -r -o $(DRIVERNAME) $^ auto-build: $(MAKE) driver QUIET=1 cp -f $(DESTDIR)/$(DRIVERNAME) $(SRCROOT)/../$(DRIVER).o $(LINUX_OBJS): %.o: $(SRCROOT)/linux/%.c $(ECHO) "Compiling $<" $(CC) $(CC_OPTS) $(INCLUDE) -c $< clean: rm -rf $(DESTDIR)/ $(LINUX_DEPS): %.d: $(SRCROOT)/linux/%.c $(ECHO) "Dependencies for $<" $(CC) -MM $(CC_OPTS) $(INCLUDE) $< > $@ deps: setup $(MAKE) -C $(DESTDIR) -f ../Makefile SRCROOT=../$(SRCROOT) driver_deps driver_deps: ${OBJS:.o=.d} ifdef INCLUDE_DEPS include ${OBJS:.o=.d} endif .SILENT: vsock-only/Makefile.kernel0000444000000000000000000000434012025726715014605 0ustar rootroot#!/usr/bin/make -f ########################################################## # Copyright (C) 2007 VMware, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation version 2 and no later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ########################################################## vm_product_defines = $(if $(findstring tools,$(1)), -DVMX86_TOOLS,) CC_OPTS += $(call vm_product_defines, $(PRODUCT)) INCLUDE := -I. INCLUDE := -I$(SRCROOT)/include INCLUDE += -I$(SRCROOT)/linux INCLUDE += -I$(SRCROOT)/common EXTRA_CFLAGS := $(CC_OPTS) $(INCLUDE) EXTRA_CFLAGS += $(call vm_check_build, $(SRCROOT)/autoconf/epoll.c, -DVMW_HAVE_EPOLL, ) EXTRA_CFLAGS += $(call vm_check_build, $(SRCROOT)/autoconf/setnice.c, -DVMW_HAVE_SET_USER_NICE, ) # This test is inverted. EXTRA_CFLAGS += $(call vm_check_build, $(SRCROOT)/autoconf/sk_filter.c,, -DVMW_HAVE_NEW_SKFILTER ) T := /tmp MODPOST_VMCI_SYMVERS := $(wildcard $(T)/VMwareVMCIModule.symvers) obj-m += $(DRIVER).o $(DRIVER)-y := $(subst $(SRCROOT)/, , $(patsubst %.c, %.o, $(wildcard $(SRCROOT)/linux/*.c))) clean: rm -rf $(wildcard $(DRIVER).mod.c $(DRIVER).ko .tmp_versions \ Module.symvers Modules.symvers Module.markers modules.order \ $(foreach dir,./ linux/ \ ,$(addprefix $(dir),.*.cmd .*.o.flags *.o))) # # The VSock kernel module uses symbols from the VMCI kernel module. Copy the # Module.symvers file here so that the Vsock module knows about the VMCI version. # prebuild:: ifeq ($(MODPOST_VMCI_SYMVERS),) $(shell echo >&2 "Building VMCI Sockets without VMCI module symbols.") else $(shell echo >&2 "Building VMCI Sockets with VMCI module symbols.") cp -f $(MODPOST_VMCI_SYMVERS) $(SRCROOT)/Module.symvers endif vsock-only/README0000444000000000000000000000075512025726714012553 0ustar rootrootThis files in this directory and its subdirectories are the kernel module for the VMware VSockets module. In order to build, make certain the Makefile is correct and then just type make from this directory. A copy of the module will be left in driver-/vsock- (e.g. driver-up-2.4.20/vsock-up-2.4.20) for 2.4 series kernels and in ../vsock.o for 2.6 series kernels. If you have any problems or questions, send mail to support@vmware.com vsock-only/Makefile0000444000000000000000000000727112025726715013334 0ustar rootroot#!/usr/bin/make -f ########################################################## # Copyright (C) 1998 VMware, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation version 2 and no later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # ########################################################## #### #### VMware kernel module Makefile to be distributed externally #### #### #### SRCROOT _must_ be a relative path. #### SRCROOT = . VM_UNAME = $(shell uname -r) # Header directory for the running kernel HEADER_DIR = /lib/modules/$(VM_UNAME)/build/include BUILD_DIR = $(HEADER_DIR)/.. DRIVER := vsock PRODUCT := tools-for-linux # Grep program GREP = /bin/grep vm_check_gcc = $(shell if $(CC) $(1) -S -o /dev/null -xc /dev/null \ > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi) vm_check_file = $(shell if test -f $(1); then echo "yes"; else echo "no"; fi) ifndef VM_KBUILD VM_KBUILD := no ifeq ($(call vm_check_file,$(BUILD_DIR)/Makefile), yes) ifneq ($(call vm_check_file,$(BUILD_DIR)/Rules.make), yes) VM_KBUILD := 26 endif endif export VM_KBUILD endif ifndef VM_KBUILD_SHOWN ifeq ($(VM_KBUILD), no) VM_DUMMY := $(shell echo >&2 "Using standalone build system.") else ifeq ($(VM_KBUILD), 24) VM_DUMMY := $(shell echo >&2 "Using 2.4.x kernel build system.") else VM_DUMMY := $(shell echo >&2 "Using 2.6.x kernel build system.") endif endif VM_KBUILD_SHOWN := yes export VM_KBUILD_SHOWN endif ifneq ($(VM_KBUILD), no) VMCCVER := $(shell $(CC) -dumpversion) # If there is no version defined, we are in toplevel pass, not yet in kernel makefiles... ifeq ($(VERSION),) ifeq ($(VM_KBUILD), 24) DRIVER_KO := $(DRIVER).o else DRIVER_KO := $(DRIVER).ko endif .PHONY: $(DRIVER_KO) auto-build: $(DRIVER_KO) cp -f $< $(SRCROOT)/../$(DRIVER).o # $(DRIVER_KO) is a phony target, so compare file times explicitly $(DRIVER): $(DRIVER_KO) if [ $< -nt $@ ] || [ ! -e $@ ] ; then cp -f $< $@; fi # Pass gcc version down the chain, so we can detect if kernel attempts to use unapproved compiler VM_CCVER := $(VMCCVER) export VM_CCVER VM_CC := $(CC) export VM_CC MAKEOVERRIDES := $(filter-out CC=%,$(MAKEOVERRIDES)) # # Define a setup target that gets built before the actual driver. # This target may not be used at all, but if it is then it will be defined # in Makefile.kernel # prebuild:: ; postbuild:: ; $(DRIVER_KO): prebuild make -C $(BUILD_DIR) SUBDIRS=$$PWD SRCROOT=$$PWD/$(SRCROOT) modules make -C $$PWD SRCROOT=$$PWD/$(SRCROOT) postbuild endif vm_check_build = $(shell if $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) \ $(CPPFLAGS) $(CFLAGS) $(CFLAGS_KERNEL) $(LINUXINCLUDE) \ $(EXTRA_CFLAGS) -Iinclude2/asm/mach-default \ -DKBUILD_BASENAME=\"$(DRIVER)\" \ -Werror -S -o /dev/null -xc $(1) \ > /dev/null 2>&1; then echo "$(2)"; else echo "$(3)"; fi) CC_WARNINGS := -Wall -Wstrict-prototypes CC_OPTS := $(GLOBAL_DEFS) $(CC_WARNINGS) -DVMW_USING_KBUILD ifdef VMX86_DEVEL CC_OPTS += -DVMX86_DEVEL endif ifdef VMX86_DEBUG CC_OPTS += -DVMX86_DEBUG endif include $(SRCROOT)/Makefile.kernel ifdef TOPDIR ifeq ($(VM_KBUILD), 24) O_TARGET := $(DRIVER).o obj-y := $($(DRIVER)-y) include $(TOPDIR)/Rules.make endif endif else include $(SRCROOT)/Makefile.normal endif #.SILENT: vsock-only/COPYING0000444000000000000000000004310312025726714012720 0ustar rootroot GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License.