2 * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
9 #include <linux/miscdevice.h>
10 #include <linux/init.h>
11 #include <linux/wait.h>
12 #include <linux/module.h>
13 #include <linux/file.h>
15 #include <linux/poll.h>
16 #include <linux/signal.h>
17 #include <linux/spinlock.h>
18 #include <linux/dlm.h>
19 #include <linux/dlm_device.h>
21 #include "dlm_internal.h"
22 #include "lockspace.h"
24 #include "lvb_table.h"
27 static const char *name_prefix="dlm";
28 static struct miscdevice ctl_device;
29 static const struct file_operations device_fops;
33 struct dlm_lock_params32 {
46 char lvb[DLM_USER_LVB_LEN];
50 struct dlm_write_request32 {
57 struct dlm_lock_params32 lock;
58 struct dlm_lspace_params lspace;
59 struct dlm_purge_params purge;
70 struct dlm_lock_result32 {
75 struct dlm_lksb32 lksb;
78 /* Offsets may be zero if no data is present */
82 static void compat_input(struct dlm_write_request *kb,
83 struct dlm_write_request32 *kb32)
85 kb->version[0] = kb32->version[0];
86 kb->version[1] = kb32->version[1];
87 kb->version[2] = kb32->version[2];
90 kb->is64bit = kb32->is64bit;
91 if (kb->cmd == DLM_USER_CREATE_LOCKSPACE ||
92 kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
93 kb->i.lspace.flags = kb32->i.lspace.flags;
94 kb->i.lspace.minor = kb32->i.lspace.minor;
95 strcpy(kb->i.lspace.name, kb32->i.lspace.name);
96 } else if (kb->cmd == DLM_USER_PURGE) {
97 kb->i.purge.nodeid = kb32->i.purge.nodeid;
98 kb->i.purge.pid = kb32->i.purge.pid;
100 kb->i.lock.mode = kb32->i.lock.mode;
101 kb->i.lock.namelen = kb32->i.lock.namelen;
102 kb->i.lock.flags = kb32->i.lock.flags;
103 kb->i.lock.lkid = kb32->i.lock.lkid;
104 kb->i.lock.parent = kb32->i.lock.parent;
105 kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
106 kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
107 kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
108 kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
109 kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
110 memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
111 memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen);
115 static void compat_output(struct dlm_lock_result *res,
116 struct dlm_lock_result32 *res32)
118 res32->length = res->length - (sizeof(struct dlm_lock_result) -
119 sizeof(struct dlm_lock_result32));
120 res32->user_astaddr = (__u32)(long)res->user_astaddr;
121 res32->user_astparam = (__u32)(long)res->user_astparam;
122 res32->user_lksb = (__u32)(long)res->user_lksb;
123 res32->bast_mode = res->bast_mode;
125 res32->lvb_offset = res->lvb_offset;
126 res32->length = res->length;
128 res32->lksb.sb_status = res->lksb.sb_status;
129 res32->lksb.sb_flags = res->lksb.sb_flags;
130 res32->lksb.sb_lkid = res->lksb.sb_lkid;
131 res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr;
135 /* we could possibly check if the cancel of an orphan has resulted in the lkb
136 being removed and then remove that lkb from the orphans list and free it */
138 void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
141 struct dlm_user_args *ua;
142 struct dlm_user_proc *proc;
143 int eol = 0, ast_type;
145 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
148 ls = lkb->lkb_resource->res_ls;
149 mutex_lock(&ls->ls_clear_proc_locks);
151 /* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
152 can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
153 lkb->ua so we can't try to use it. This second check is necessary
154 for cases where a completion ast is received for an operation that
155 began before clear_proc_locks did its cancel/unlock. */
157 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
160 DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb););
161 ua = (struct dlm_user_args *)lkb->lkb_astparam;
164 if (type == AST_BAST && ua->bastaddr == NULL)
167 spin_lock(&proc->asts_spin);
169 ast_type = lkb->lkb_ast_type;
170 lkb->lkb_ast_type |= type;
173 kref_get(&lkb->lkb_ref);
174 list_add_tail(&lkb->lkb_astqueue, &proc->asts);
175 wake_up_interruptible(&proc->wait);
177 if (type == AST_COMP && (ast_type & AST_COMP))
178 log_debug(ls, "ast overlap %x status %x %x",
179 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
181 /* Figure out if this lock is at the end of its life and no longer
182 available for the application to use. The lkb still exists until
183 the final ast is read. A lock becomes EOL in three situations:
184 1. a noqueue request fails with EAGAIN
185 2. an unlock completes with EUNLOCK
186 3. a cancel of a waiting request completes with ECANCEL
187 An EOL lock needs to be removed from the process's list of locks.
188 And we can't allow any new operation on an EOL lock. This is
189 not related to the lifetime of the lkb struct which is managed
190 entirely by refcount. */
192 if (type == AST_COMP &&
193 lkb->lkb_grmode == DLM_LOCK_IV &&
194 ua->lksb.sb_status == -EAGAIN)
196 else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
197 (ua->lksb.sb_status == -DLM_ECANCEL &&
198 lkb->lkb_grmode == DLM_LOCK_IV))
201 lkb->lkb_ast_type &= ~AST_BAST;
202 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
205 /* We want to copy the lvb to userspace when the completion
206 ast is read if the status is 0, the lock has an lvb and
207 lvb_ops says we should. We could probably have set_lvb_lock()
208 set update_user_lvb instead and not need old_mode */
210 if ((lkb->lkb_ast_type & AST_COMP) &&
211 (lkb->lkb_lksb->sb_status == 0) &&
212 lkb->lkb_lksb->sb_lvbptr &&
213 dlm_lvb_operations[ua->old_mode + 1][lkb->lkb_grmode + 1])
214 ua->update_user_lvb = 1;
216 ua->update_user_lvb = 0;
218 spin_unlock(&proc->asts_spin);
221 spin_lock(&ua->proc->locks_spin);
222 if (!list_empty(&lkb->lkb_ownqueue)) {
223 list_del_init(&lkb->lkb_ownqueue);
226 spin_unlock(&ua->proc->locks_spin);
229 mutex_unlock(&ls->ls_clear_proc_locks);
232 static int device_user_lock(struct dlm_user_proc *proc,
233 struct dlm_lock_params *params)
236 struct dlm_user_args *ua;
239 ls = dlm_find_lockspace_local(proc->lockspace);
243 if (!params->castaddr || !params->lksb) {
248 ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL);
252 ua->user_lksb = params->lksb;
253 ua->castparam = params->castparam;
254 ua->castaddr = params->castaddr;
255 ua->bastparam = params->bastparam;
256 ua->bastaddr = params->bastaddr;
258 if (params->flags & DLM_LKF_CONVERT)
259 error = dlm_user_convert(ls, ua,
260 params->mode, params->flags,
261 params->lkid, params->lvb);
263 error = dlm_user_request(ls, ua,
264 params->mode, params->flags,
265 params->name, params->namelen,
268 error = ua->lksb.sb_lkid;
271 dlm_put_lockspace(ls);
275 static int device_user_unlock(struct dlm_user_proc *proc,
276 struct dlm_lock_params *params)
279 struct dlm_user_args *ua;
282 ls = dlm_find_lockspace_local(proc->lockspace);
286 ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL);
290 ua->user_lksb = params->lksb;
291 ua->castparam = params->castparam;
292 ua->castaddr = params->castaddr;
294 if (params->flags & DLM_LKF_CANCEL)
295 error = dlm_user_cancel(ls, ua, params->flags, params->lkid);
297 error = dlm_user_unlock(ls, ua, params->flags, params->lkid,
300 dlm_put_lockspace(ls);
304 static int create_misc_device(struct dlm_ls *ls, char *name)
309 len = strlen(name) + strlen(name_prefix) + 2;
310 ls->ls_device.name = kzalloc(len, GFP_KERNEL);
311 if (!ls->ls_device.name)
314 snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
316 ls->ls_device.fops = &device_fops;
317 ls->ls_device.minor = MISC_DYNAMIC_MINOR;
319 error = misc_register(&ls->ls_device);
321 kfree(ls->ls_device.name);
327 static int device_user_purge(struct dlm_user_proc *proc,
328 struct dlm_purge_params *params)
333 ls = dlm_find_lockspace_local(proc->lockspace);
337 error = dlm_user_purge(ls, proc, params->nodeid, params->pid);
339 dlm_put_lockspace(ls);
343 static int device_create_lockspace(struct dlm_lspace_params *params)
345 dlm_lockspace_t *lockspace;
349 if (!capable(CAP_SYS_ADMIN))
352 error = dlm_new_lockspace(params->name, strlen(params->name),
353 &lockspace, 0, DLM_USER_LVB_LEN);
357 ls = dlm_find_lockspace_local(lockspace);
361 error = create_misc_device(ls, params->name);
362 dlm_put_lockspace(ls);
365 dlm_release_lockspace(lockspace, 0);
367 error = ls->ls_device.minor;
372 static int device_remove_lockspace(struct dlm_lspace_params *params)
374 dlm_lockspace_t *lockspace;
376 int error, force = 0;
378 if (!capable(CAP_SYS_ADMIN))
381 ls = dlm_find_lockspace_device(params->minor);
385 /* Deregister the misc device first, so we don't have
386 * a device that's not attached to a lockspace. If
387 * dlm_release_lockspace fails then we can recreate it
389 error = misc_deregister(&ls->ls_device);
391 dlm_put_lockspace(ls);
394 kfree(ls->ls_device.name);
396 if (params->flags & DLM_USER_LSFLG_FORCEFREE)
399 lockspace = ls->ls_local_handle;
401 /* dlm_release_lockspace waits for references to go to zero,
402 so all processes will need to close their device for the ls
403 before the release will procede */
405 dlm_put_lockspace(ls);
406 error = dlm_release_lockspace(lockspace, force);
408 create_misc_device(ls, ls->ls_name);
413 /* Check the user's version matches ours */
414 static int check_version(struct dlm_write_request *req)
416 if (req->version[0] != DLM_DEVICE_VERSION_MAJOR ||
417 (req->version[0] == DLM_DEVICE_VERSION_MAJOR &&
418 req->version[1] > DLM_DEVICE_VERSION_MINOR)) {
420 printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
421 "user (%d.%d.%d) kernel (%d.%d.%d)\n",
427 DLM_DEVICE_VERSION_MAJOR,
428 DLM_DEVICE_VERSION_MINOR,
429 DLM_DEVICE_VERSION_PATCH);
439 * dlm_user_request -> request_lock
440 * dlm_user_convert -> convert_lock
443 * dlm_user_unlock -> unlock_lock
444 * dlm_user_cancel -> cancel_lock
446 * device_create_lockspace
449 * device_remove_lockspace
450 * dlm_release_lockspace
453 /* a write to a lockspace device is a lock or unlock request, a write
454 to the control device is to create/remove a lockspace */
456 static ssize_t device_write(struct file *file, const char __user *buf,
457 size_t count, loff_t *ppos)
459 struct dlm_user_proc *proc = file->private_data;
460 struct dlm_write_request *kbuf;
461 sigset_t tmpsig, allsigs;
465 if (count < sizeof(struct dlm_write_request32))
467 if (count < sizeof(struct dlm_write_request))
471 kbuf = kmalloc(count, GFP_KERNEL);
475 if (copy_from_user(kbuf, buf, count)) {
480 if (check_version(kbuf)) {
486 if (!kbuf->is64bit) {
487 struct dlm_write_request32 *k32buf;
488 k32buf = (struct dlm_write_request32 *)kbuf;
489 kbuf = kmalloc(count + (sizeof(struct dlm_write_request) -
490 sizeof(struct dlm_write_request32)), GFP_KERNEL);
495 set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
496 compat_input(kbuf, k32buf);
501 /* do we really need this? can a write happen after a close? */
502 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
503 test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
506 sigfillset(&allsigs);
507 sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
515 log_print("no locking on control device");
518 error = device_user_lock(proc, &kbuf->i.lock);
521 case DLM_USER_UNLOCK:
523 log_print("no locking on control device");
526 error = device_user_unlock(proc, &kbuf->i.lock);
529 case DLM_USER_CREATE_LOCKSPACE:
531 log_print("create/remove only on control device");
534 error = device_create_lockspace(&kbuf->i.lspace);
537 case DLM_USER_REMOVE_LOCKSPACE:
539 log_print("create/remove only on control device");
542 error = device_remove_lockspace(&kbuf->i.lspace);
547 log_print("no locking on control device");
550 error = device_user_purge(proc, &kbuf->i.purge);
554 log_print("Unknown command passed to DLM device : %d\n",
559 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
566 /* Every process that opens the lockspace device has its own "proc" structure
567 hanging off the open file that's used to keep track of locks owned by the
568 process and asts that need to be delivered to the process. */
570 static int device_open(struct inode *inode, struct file *file)
572 struct dlm_user_proc *proc;
575 ls = dlm_find_lockspace_device(iminor(inode));
579 proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL);
581 dlm_put_lockspace(ls);
585 proc->lockspace = ls->ls_local_handle;
586 INIT_LIST_HEAD(&proc->asts);
587 INIT_LIST_HEAD(&proc->locks);
588 INIT_LIST_HEAD(&proc->unlocking);
589 spin_lock_init(&proc->asts_spin);
590 spin_lock_init(&proc->locks_spin);
591 init_waitqueue_head(&proc->wait);
592 file->private_data = proc;
597 static int device_close(struct inode *inode, struct file *file)
599 struct dlm_user_proc *proc = file->private_data;
601 sigset_t tmpsig, allsigs;
603 ls = dlm_find_lockspace_local(proc->lockspace);
607 sigfillset(&allsigs);
608 sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
610 set_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags);
612 dlm_clear_proc_locks(ls, proc);
614 /* at this point no more lkb's should exist for this lockspace,
615 so there's no chance of dlm_user_add_ast() being called and
616 looking for lkb->ua->proc */
619 file->private_data = NULL;
621 dlm_put_lockspace(ls);
622 dlm_put_lockspace(ls); /* for the find in device_open() */
624 /* FIXME: AUTOFREE: if this ls is no longer used do
625 device_remove_lockspace() */
627 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
633 static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
634 int bmode, char __user *buf, size_t count)
637 struct dlm_lock_result32 result32;
639 struct dlm_lock_result result;
645 memset(&result, 0, sizeof(struct dlm_lock_result));
646 memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
647 result.user_lksb = ua->user_lksb;
649 /* FIXME: dlm1 provides for the user's bastparam/addr to not be updated
650 in a conversion unless the conversion is successful. See code
651 in dlm_user_convert() for updating ua from ua_tmp. OpenVMS, though,
652 notes that a new blocking AST address and parameter are set even if
653 the conversion fails, so maybe we should just do that. */
655 if (type == AST_BAST) {
656 result.user_astaddr = ua->bastaddr;
657 result.user_astparam = ua->bastparam;
658 result.bast_mode = bmode;
660 result.user_astaddr = ua->castaddr;
661 result.user_astparam = ua->castparam;
666 len = sizeof(struct dlm_lock_result32);
669 len = sizeof(struct dlm_lock_result);
672 /* copy lvb to userspace if there is one, it's been updated, and
673 the user buffer has space for it */
675 if (ua->update_user_lvb && ua->lksb.sb_lvbptr &&
676 count >= len + DLM_USER_LVB_LEN) {
677 if (copy_to_user(buf+len, ua->lksb.sb_lvbptr,
683 result.lvb_offset = len;
684 len += DLM_USER_LVB_LEN;
691 compat_output(&result, &result32);
692 resultptr = &result32;
696 if (copy_to_user(buf, resultptr, struct_len))
704 /* a read returns a single ast described in a struct dlm_lock_result */
706 static ssize_t device_read(struct file *file, char __user *buf, size_t count,
709 struct dlm_user_proc *proc = file->private_data;
711 struct dlm_user_args *ua;
712 DECLARE_WAITQUEUE(wait, current);
713 int error, type=0, bmode=0, removed = 0;
716 if (count < sizeof(struct dlm_lock_result32))
718 if (count < sizeof(struct dlm_lock_result))
722 /* do we really need this? can a read happen after a close? */
723 if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
726 spin_lock(&proc->asts_spin);
727 if (list_empty(&proc->asts)) {
728 if (file->f_flags & O_NONBLOCK) {
729 spin_unlock(&proc->asts_spin);
733 add_wait_queue(&proc->wait, &wait);
736 set_current_state(TASK_INTERRUPTIBLE);
737 if (list_empty(&proc->asts) && !signal_pending(current)) {
738 spin_unlock(&proc->asts_spin);
740 spin_lock(&proc->asts_spin);
743 set_current_state(TASK_RUNNING);
744 remove_wait_queue(&proc->wait, &wait);
746 if (signal_pending(current)) {
747 spin_unlock(&proc->asts_spin);
752 if (list_empty(&proc->asts)) {
753 spin_unlock(&proc->asts_spin);
757 /* there may be both completion and blocking asts to return for
758 the lkb, don't remove lkb from asts list unless no asts remain */
760 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
762 if (lkb->lkb_ast_type & AST_COMP) {
763 lkb->lkb_ast_type &= ~AST_COMP;
765 } else if (lkb->lkb_ast_type & AST_BAST) {
766 lkb->lkb_ast_type &= ~AST_BAST;
768 bmode = lkb->lkb_bastmode;
771 if (!lkb->lkb_ast_type) {
772 list_del(&lkb->lkb_astqueue);
775 spin_unlock(&proc->asts_spin);
777 ua = (struct dlm_user_args *)lkb->lkb_astparam;
778 error = copy_result_to_user(ua,
779 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
780 type, bmode, buf, count);
782 /* removes reference for the proc->asts lists added by
783 dlm_user_add_ast() and may result in the lkb being freed */
790 static unsigned int device_poll(struct file *file, poll_table *wait)
792 struct dlm_user_proc *proc = file->private_data;
794 poll_wait(file, &proc->wait, wait);
796 spin_lock(&proc->asts_spin);
797 if (!list_empty(&proc->asts)) {
798 spin_unlock(&proc->asts_spin);
799 return POLLIN | POLLRDNORM;
801 spin_unlock(&proc->asts_spin);
805 static int ctl_device_open(struct inode *inode, struct file *file)
807 file->private_data = NULL;
811 static int ctl_device_close(struct inode *inode, struct file *file)
816 static const struct file_operations device_fops = {
818 .release = device_close,
820 .write = device_write,
822 .owner = THIS_MODULE,
825 static const struct file_operations ctl_device_fops = {
826 .open = ctl_device_open,
827 .release = ctl_device_close,
828 .write = device_write,
829 .owner = THIS_MODULE,
832 int dlm_user_init(void)
836 ctl_device.name = "dlm-control";
837 ctl_device.fops = &ctl_device_fops;
838 ctl_device.minor = MISC_DYNAMIC_MINOR;
840 error = misc_register(&ctl_device);
842 log_print("misc_register failed for control device");
847 void dlm_user_exit(void)
849 misc_deregister(&ctl_device);