#include <linux/eventpoll.h>
#include <linux/mount.h>
#include <linux/bitops.h>
+#include <linux/mutex.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/io.h>
* LOCKING:
* There are three level of locking required by epoll :
*
- * 1) epsem (semaphore)
+ * 1) epmutex (mutex)
* 2) ep->sem (rw_semaphore)
* 3) ep->lock (rw_lock)
*
* if a file has been pushed inside an epoll set and it is then
* close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
* It is possible to drop the "ep->sem" and to use the global
- * semaphore "epsem" (together with "ep->lock") to have it working,
+ * semaphore "epmutex" (together with "ep->lock") to have it working,
* but having "ep->sem" will make the interface more scalable.
- * Events that require holding "epsem" are very rare, while for
+ * Events that require holding "epmutex" are very rare, while for
* normal operations the epoll private "ep->sem" will guarantee
* a greater scalability.
*/
/* Maximum number of poll wake up nests we are allowing */
#define EP_MAX_POLLWAKE_NESTS 4
+/* Maximum msec timeout value storeable in a long int */
+#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
+
+
struct epoll_filefd {
struct file *file;
int fd;
static void ep_poll_safewake_init(struct poll_safewake *psw);
static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile);
-static int ep_file_init(struct file *file);
+static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+ struct eventpoll *ep);
+static int ep_alloc(struct eventpoll **pep);
static void ep_free(struct eventpoll *ep);
static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
static void ep_use_epitem(struct epitem *epi);
/*
* This semaphore is used to serialize ep_free() and eventpoll_release_file().
*/
-static struct semaphore epsem;
+static struct mutex epmutex;
/* Safe wake up implementation */
static struct poll_safewake psw;
}
-/* Used to initialize the epoll bits inside the "struct file" */
-void eventpoll_init_file(struct file *file)
-{
-
- INIT_LIST_HEAD(&file->f_ep_links);
- spin_lock_init(&file->f_ep_lock);
-}
-
-
/*
* This is called from eventpoll_release() to unlink files from the eventpoll
* interface. We need to have this facility to cleanup correctly files that are
* cleanup path, and this means that noone is using this file anymore.
* The only hit might come from ep_free() but by holding the semaphore
* will correctly serialize the operation. We do need to acquire
- * "ep->sem" after "epsem" because ep_remove() requires it when called
+ * "ep->sem" after "epmutex" because ep_remove() requires it when called
* from anywhere but ep_free().
*/
- down(&epsem);
+ mutex_lock(&epmutex);
while (!list_empty(lsthead)) {
epi = list_entry(lsthead->next, struct epitem, fllink);
up_write(&ep->sem);
}
- up(&epsem);
+ mutex_unlock(&epmutex);
}
asmlinkage long sys_epoll_create(int size)
{
int error, fd;
+ struct eventpoll *ep;
struct inode *inode;
struct file *file;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
current, size));
- /* Sanity check on the size parameter */
+ /*
+ * Sanity check on the size parameter, and create the internal data
+ * structure ( "struct eventpoll" ).
+ */
error = -EINVAL;
- if (size <= 0)
+ if (size <= 0 || (error = ep_alloc(&ep)) != 0)
goto eexit_1;
/*
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure, and inode and a free file descriptor.
*/
- error = ep_getfd(&fd, &inode, &file);
- if (error)
- goto eexit_1;
-
- /* Setup the file internal data structure ( "struct eventpoll" ) */
- error = ep_file_init(file);
+ error = ep_getfd(&fd, &inode, &file, ep);
if (error)
goto eexit_2;
-
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
current, size, fd));
return fd;
eexit_2:
- sys_close(fd);
+ ep_free(ep);
+ kfree(ep);
eexit_1:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
current, size, error));
switch (op) {
case EPOLL_CTL_ADD:
if (!epi) {
- epds.events |= POLLERR | POLLHUP;
+ epds.events |= POLLERR | POLLHUP | POLLRDHUP;
error = ep_insert(ep, &epds, tfile, fd);
} else
break;
case EPOLL_CTL_MOD:
if (epi) {
- epds.events |= POLLERR | POLLHUP;
+ epds.events |= POLLERR | POLLHUP | POLLRDHUP;
error = ep_modify(ep, epi, &epds);
} else
error = -ENOENT;
/*
* Creates the file descriptor to be used by the epoll interface.
*/
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
+static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+ struct eventpoll *ep)
{
struct qstr this;
char name[32];
file->f_op = &eventpoll_fops;
file->f_mode = FMODE_READ;
file->f_version = 0;
- file->private_data = NULL;
+ file->private_data = ep;
/* Install the new setup file into the allocated fd. */
fd_install(fd, file);
}
-static int ep_file_init(struct file *file)
+static int ep_alloc(struct eventpoll **pep)
{
- struct eventpoll *ep;
+ struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL);
- if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL)))
+ if (!ep)
return -ENOMEM;
- memset(ep, 0, sizeof(*ep));
rwlock_init(&ep->lock);
init_rwsem(&ep->sem);
init_waitqueue_head(&ep->wq);
INIT_LIST_HEAD(&ep->rdllist);
ep->rbr = RB_ROOT;
- file->private_data = ep;
+ *pep = ep;
- DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n",
+ DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
current, ep));
return 0;
}
* We do not need to hold "ep->sem" here because the epoll file
* is on the way to be removed and no one has references to it
* anymore. The only hit might come from eventpoll_release_file() but
- * holding "epsem" is sufficent here.
+ * holding "epmutex" is sufficent here.
*/
- down(&epsem);
+ mutex_lock(&epmutex);
/*
* Walks through the whole tree by unregistering poll callbacks.
ep_remove(ep, epi);
}
- up(&epsem);
+ mutex_unlock(&epmutex);
}
* and the overflow condition. The passed timeout is in milliseconds,
* that why (t * HZ) / 1000.
*/
- jtimeout = timeout == -1 || timeout > (MAX_SCHEDULE_TIMEOUT - 1000) / HZ ?
- MAX_SCHEDULE_TIMEOUT: (timeout * HZ + 999) / 1000;
+ jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
+ MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
retry:
write_lock_irqsave(&ep->lock, flags);
{
int error;
- init_MUTEX(&epsem);
+ mutex_init(&epmutex);
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_poll_safewake_init(&psw);