1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
6 * debug functionality for the dlm
8 * Copyright (C) 2004 Oracle. All rights reserved.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
27 #include <linux/types.h>
28 #include <linux/slab.h>
29 #include <linux/highmem.h>
30 #include <linux/utsname.h>
31 #include <linux/sysctl.h>
32 #include <linux/spinlock.h>
33 #include <linux/debugfs.h>
35 #include "cluster/heartbeat.h"
36 #include "cluster/nodemanager.h"
37 #include "cluster/tcp.h"
40 #include "dlmcommon.h"
41 #include "dlmdomain.h"
44 #define MLOG_MASK_PREFIX ML_DLM
45 #include "cluster/masklog.h"
47 void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
49 mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n",
50 res->lockname.len, res->lockname.name,
51 res->owner, res->state);
52 spin_lock(&res->spinlock);
53 __dlm_print_one_lock_resource(res);
54 spin_unlock(&res->spinlock);
57 static void dlm_print_lockres_refmap(struct dlm_lock_resource *res)
60 assert_spin_locked(&res->spinlock);
62 mlog(ML_NOTICE, " refmap nodes: [ ");
65 bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
66 if (bit >= O2NM_MAX_NODES)
71 printk("], inflight=%u\n", res->inflight_locks);
74 void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
76 struct list_head *iter2;
77 struct dlm_lock *lock;
79 assert_spin_locked(&res->spinlock);
81 mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n",
82 res->lockname.len, res->lockname.name,
83 res->owner, res->state);
84 mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n",
85 res->last_used, list_empty(&res->purge) ? "no" : "yes");
86 dlm_print_lockres_refmap(res);
87 mlog(ML_NOTICE, " granted queue: \n");
88 list_for_each(iter2, &res->granted) {
89 lock = list_entry(iter2, struct dlm_lock, list);
90 spin_lock(&lock->spinlock);
91 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
92 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
93 lock->ml.type, lock->ml.convert_type, lock->ml.node,
94 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
95 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
96 list_empty(&lock->ast_list) ? 'y' : 'n',
97 lock->ast_pending ? 'y' : 'n',
98 list_empty(&lock->bast_list) ? 'y' : 'n',
99 lock->bast_pending ? 'y' : 'n');
100 spin_unlock(&lock->spinlock);
102 mlog(ML_NOTICE, " converting queue: \n");
103 list_for_each(iter2, &res->converting) {
104 lock = list_entry(iter2, struct dlm_lock, list);
105 spin_lock(&lock->spinlock);
106 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
107 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
108 lock->ml.type, lock->ml.convert_type, lock->ml.node,
109 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
110 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
111 list_empty(&lock->ast_list) ? 'y' : 'n',
112 lock->ast_pending ? 'y' : 'n',
113 list_empty(&lock->bast_list) ? 'y' : 'n',
114 lock->bast_pending ? 'y' : 'n');
115 spin_unlock(&lock->spinlock);
117 mlog(ML_NOTICE, " blocked queue: \n");
118 list_for_each(iter2, &res->blocked) {
119 lock = list_entry(iter2, struct dlm_lock, list);
120 spin_lock(&lock->spinlock);
121 mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
122 "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
123 lock->ml.type, lock->ml.convert_type, lock->ml.node,
124 dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
125 dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
126 list_empty(&lock->ast_list) ? 'y' : 'n',
127 lock->ast_pending ? 'y' : 'n',
128 list_empty(&lock->bast_list) ? 'y' : 'n',
129 lock->bast_pending ? 'y' : 'n');
130 spin_unlock(&lock->spinlock);
134 void dlm_print_one_lock(struct dlm_lock *lockid)
136 dlm_print_one_lock_resource(lockid->lockres);
138 EXPORT_SYMBOL_GPL(dlm_print_one_lock);
141 void dlm_dump_lock_resources(struct dlm_ctxt *dlm)
143 struct dlm_lock_resource *res;
144 struct hlist_node *iter;
145 struct hlist_head *bucket;
148 mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n",
149 dlm->name, dlm->node_num, dlm->key);
150 if (!dlm || !dlm->name) {
151 mlog(ML_ERROR, "dlm=%p\n", dlm);
155 spin_lock(&dlm->spinlock);
156 for (i=0; i<DLM_HASH_BUCKETS; i++) {
157 bucket = dlm_lockres_hash(dlm, i);
158 hlist_for_each_entry(res, iter, bucket, hash_node)
159 dlm_print_one_lock_resource(res);
161 spin_unlock(&dlm->spinlock);
165 static const char *dlm_errnames[] = {
166 [DLM_NORMAL] = "DLM_NORMAL",
167 [DLM_GRANTED] = "DLM_GRANTED",
168 [DLM_DENIED] = "DLM_DENIED",
169 [DLM_DENIED_NOLOCKS] = "DLM_DENIED_NOLOCKS",
170 [DLM_WORKING] = "DLM_WORKING",
171 [DLM_BLOCKED] = "DLM_BLOCKED",
172 [DLM_BLOCKED_ORPHAN] = "DLM_BLOCKED_ORPHAN",
173 [DLM_DENIED_GRACE_PERIOD] = "DLM_DENIED_GRACE_PERIOD",
174 [DLM_SYSERR] = "DLM_SYSERR",
175 [DLM_NOSUPPORT] = "DLM_NOSUPPORT",
176 [DLM_CANCELGRANT] = "DLM_CANCELGRANT",
177 [DLM_IVLOCKID] = "DLM_IVLOCKID",
178 [DLM_SYNC] = "DLM_SYNC",
179 [DLM_BADTYPE] = "DLM_BADTYPE",
180 [DLM_BADRESOURCE] = "DLM_BADRESOURCE",
181 [DLM_MAXHANDLES] = "DLM_MAXHANDLES",
182 [DLM_NOCLINFO] = "DLM_NOCLINFO",
183 [DLM_NOLOCKMGR] = "DLM_NOLOCKMGR",
184 [DLM_NOPURGED] = "DLM_NOPURGED",
185 [DLM_BADARGS] = "DLM_BADARGS",
186 [DLM_VOID] = "DLM_VOID",
187 [DLM_NOTQUEUED] = "DLM_NOTQUEUED",
188 [DLM_IVBUFLEN] = "DLM_IVBUFLEN",
189 [DLM_CVTUNGRANT] = "DLM_CVTUNGRANT",
190 [DLM_BADPARAM] = "DLM_BADPARAM",
191 [DLM_VALNOTVALID] = "DLM_VALNOTVALID",
192 [DLM_REJECTED] = "DLM_REJECTED",
193 [DLM_ABORT] = "DLM_ABORT",
194 [DLM_CANCEL] = "DLM_CANCEL",
195 [DLM_IVRESHANDLE] = "DLM_IVRESHANDLE",
196 [DLM_DEADLOCK] = "DLM_DEADLOCK",
197 [DLM_DENIED_NOASTS] = "DLM_DENIED_NOASTS",
198 [DLM_FORWARD] = "DLM_FORWARD",
199 [DLM_TIMEOUT] = "DLM_TIMEOUT",
200 [DLM_IVGROUPID] = "DLM_IVGROUPID",
201 [DLM_VERS_CONFLICT] = "DLM_VERS_CONFLICT",
202 [DLM_BAD_DEVICE_PATH] = "DLM_BAD_DEVICE_PATH",
203 [DLM_NO_DEVICE_PERMISSION] = "DLM_NO_DEVICE_PERMISSION",
204 [DLM_NO_CONTROL_DEVICE ] = "DLM_NO_CONTROL_DEVICE ",
205 [DLM_RECOVERING] = "DLM_RECOVERING",
206 [DLM_MIGRATING] = "DLM_MIGRATING",
207 [DLM_MAXSTATS] = "DLM_MAXSTATS",
210 static const char *dlm_errmsgs[] = {
211 [DLM_NORMAL] = "request in progress",
212 [DLM_GRANTED] = "request granted",
213 [DLM_DENIED] = "request denied",
214 [DLM_DENIED_NOLOCKS] = "request denied, out of system resources",
215 [DLM_WORKING] = "async request in progress",
216 [DLM_BLOCKED] = "lock request blocked",
217 [DLM_BLOCKED_ORPHAN] = "lock request blocked by a orphan lock",
218 [DLM_DENIED_GRACE_PERIOD] = "topological change in progress",
219 [DLM_SYSERR] = "system error",
220 [DLM_NOSUPPORT] = "unsupported",
221 [DLM_CANCELGRANT] = "can't cancel convert: already granted",
222 [DLM_IVLOCKID] = "bad lockid",
223 [DLM_SYNC] = "synchronous request granted",
224 [DLM_BADTYPE] = "bad resource type",
225 [DLM_BADRESOURCE] = "bad resource handle",
226 [DLM_MAXHANDLES] = "no more resource handles",
227 [DLM_NOCLINFO] = "can't contact cluster manager",
228 [DLM_NOLOCKMGR] = "can't contact lock manager",
229 [DLM_NOPURGED] = "can't contact purge daemon",
230 [DLM_BADARGS] = "bad api args",
231 [DLM_VOID] = "no status",
232 [DLM_NOTQUEUED] = "NOQUEUE was specified and request failed",
233 [DLM_IVBUFLEN] = "invalid resource name length",
234 [DLM_CVTUNGRANT] = "attempted to convert ungranted lock",
235 [DLM_BADPARAM] = "invalid lock mode specified",
236 [DLM_VALNOTVALID] = "value block has been invalidated",
237 [DLM_REJECTED] = "request rejected, unrecognized client",
238 [DLM_ABORT] = "blocked lock request cancelled",
239 [DLM_CANCEL] = "conversion request cancelled",
240 [DLM_IVRESHANDLE] = "invalid resource handle",
241 [DLM_DEADLOCK] = "deadlock recovery refused this request",
242 [DLM_DENIED_NOASTS] = "failed to allocate AST",
243 [DLM_FORWARD] = "request must wait for primary's response",
244 [DLM_TIMEOUT] = "timeout value for lock has expired",
245 [DLM_IVGROUPID] = "invalid group specification",
246 [DLM_VERS_CONFLICT] = "version conflicts prevent request handling",
247 [DLM_BAD_DEVICE_PATH] = "Locks device does not exist or path wrong",
248 [DLM_NO_DEVICE_PERMISSION] = "Client has insufficient perms for device",
249 [DLM_NO_CONTROL_DEVICE] = "Cannot set options on opened device ",
250 [DLM_RECOVERING] = "lock resource being recovered",
251 [DLM_MIGRATING] = "lock resource being migrated",
252 [DLM_MAXSTATS] = "invalid error number",
255 const char *dlm_errmsg(enum dlm_status err)
257 if (err >= DLM_MAXSTATS || err < 0)
258 return dlm_errmsgs[DLM_MAXSTATS];
259 return dlm_errmsgs[err];
261 EXPORT_SYMBOL_GPL(dlm_errmsg);
263 const char *dlm_errname(enum dlm_status err)
265 if (err >= DLM_MAXSTATS || err < 0)
266 return dlm_errnames[DLM_MAXSTATS];
267 return dlm_errnames[err];
269 EXPORT_SYMBOL_GPL(dlm_errname);
272 #ifdef CONFIG_DEBUG_FS
274 static struct dentry *dlm_debugfs_root = NULL;
276 #define DLM_DEBUGFS_DIR "o2dlm"
277 #define DLM_DEBUGFS_DLM_STATE "dlm_state"
279 /* begin - utils funcs */
280 static void dlm_debug_free(struct kref *kref)
282 struct dlm_debug_ctxt *dc;
284 dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt);
289 void dlm_debug_put(struct dlm_debug_ctxt *dc)
292 kref_put(&dc->debug_refcnt, dlm_debug_free);
295 static void dlm_debug_get(struct dlm_debug_ctxt *dc)
297 kref_get(&dc->debug_refcnt);
300 static int stringify_nodemap(unsigned long *nodemap, int maxnodes,
306 while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes)
307 out += snprintf(buf + out, len - out, "%d ", i);
312 static struct debug_buffer *debug_buffer_allocate(void)
314 struct debug_buffer *db = NULL;
316 db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL);
321 db->buf = kmalloc(db->len, GFP_KERNEL);
331 static ssize_t debug_buffer_read(struct file *file, char __user *buf,
332 size_t nbytes, loff_t *ppos)
334 struct debug_buffer *db = file->private_data;
336 return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len);
339 static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence)
341 struct debug_buffer *db = file->private_data;
349 new = file->f_pos + off;
353 if (new < 0 || new > db->len)
356 return (file->f_pos = new);
359 static int debug_buffer_release(struct inode *inode, struct file *file)
361 struct debug_buffer *db = (struct debug_buffer *)file->private_data;
369 /* end - util funcs */
371 /* begin - debug state funcs */
372 static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
375 struct dlm_reco_node_data *node;
377 int lres, rres, ures, tres;
379 lres = atomic_read(&dlm->local_resources);
380 rres = atomic_read(&dlm->remote_resources);
381 ures = atomic_read(&dlm->unknown_resources);
382 tres = lres + rres + ures;
384 spin_lock(&dlm->spinlock);
386 switch (dlm->dlm_state) {
388 state = "NEW"; break;
389 case DLM_CTXT_JOINED:
390 state = "JOINED"; break;
391 case DLM_CTXT_IN_SHUTDOWN:
392 state = "SHUTDOWN"; break;
393 case DLM_CTXT_LEAVING:
394 state = "LEAVING"; break;
396 state = "UNKNOWN"; break;
399 /* Domain: xxxxxxxxxx Key: 0xdfbac769 */
400 out += snprintf(db->buf + out, db->len - out,
401 "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key);
403 /* Thread Pid: xxx Node: xxx State: xxxxx */
404 out += snprintf(db->buf + out, db->len - out,
405 "Thread Pid: %d Node: %d State: %s\n",
406 dlm->dlm_thread_task->pid, dlm->node_num, state);
408 /* Number of Joins: xxx Joining Node: xxx */
409 out += snprintf(db->buf + out, db->len - out,
410 "Number of Joins: %d Joining Node: %d\n",
411 dlm->num_joins, dlm->joining_node);
413 /* Domain Map: xx xx xx */
414 out += snprintf(db->buf + out, db->len - out, "Domain Map: ");
415 out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
416 db->buf + out, db->len - out);
417 out += snprintf(db->buf + out, db->len - out, "\n");
419 /* Live Map: xx xx xx */
420 out += snprintf(db->buf + out, db->len - out, "Live Map: ");
421 out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
422 db->buf + out, db->len - out);
423 out += snprintf(db->buf + out, db->len - out, "\n");
425 /* Mastered Resources Total: xxx Locally: xxx Remotely: ... */
426 out += snprintf(db->buf + out, db->len - out,
427 "Mastered Resources Total: %d Locally: %d "
428 "Remotely: %d Unknown: %d\n",
429 tres, lres, rres, ures);
431 /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
432 out += snprintf(db->buf + out, db->len - out,
433 "Lists: Dirty=%s Purge=%s PendingASTs=%s "
434 "PendingBASTs=%s Master=%s\n",
435 (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
436 (list_empty(&dlm->purge_list) ? "Empty" : "InUse"),
437 (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"),
438 (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"),
439 (list_empty(&dlm->master_list) ? "Empty" : "InUse"));
441 /* Purge Count: xxx Refs: xxx */
442 out += snprintf(db->buf + out, db->len - out,
443 "Purge Count: %d Refs: %d\n", dlm->purge_count,
444 atomic_read(&dlm->dlm_refs.refcount));
447 out += snprintf(db->buf + out, db->len - out,
448 "Dead Node: %d\n", dlm->reco.dead_node);
450 /* What about DLM_RECO_STATE_FINALIZE? */
451 if (dlm->reco.state == DLM_RECO_STATE_ACTIVE)
456 /* Recovery Pid: xxxx Master: xxx State: xxxx */
457 out += snprintf(db->buf + out, db->len - out,
458 "Recovery Pid: %d Master: %d State: %s\n",
459 dlm->dlm_reco_thread_task->pid,
460 dlm->reco.new_master, state);
462 /* Recovery Map: xx xx */
463 out += snprintf(db->buf + out, db->len - out, "Recovery Map: ");
464 out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
465 db->buf + out, db->len - out);
466 out += snprintf(db->buf + out, db->len - out, "\n");
468 /* Recovery Node State: */
469 out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n");
470 list_for_each_entry(node, &dlm->reco.node_data, list) {
471 switch (node->state) {
472 case DLM_RECO_NODE_DATA_INIT:
475 case DLM_RECO_NODE_DATA_REQUESTING:
476 state = "REQUESTING";
478 case DLM_RECO_NODE_DATA_DEAD:
481 case DLM_RECO_NODE_DATA_RECEIVING:
484 case DLM_RECO_NODE_DATA_REQUESTED:
487 case DLM_RECO_NODE_DATA_DONE:
490 case DLM_RECO_NODE_DATA_FINALIZE_SENT:
491 state = "FINALIZE-SENT";
497 out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n",
498 node->node_num, state);
501 spin_unlock(&dlm->spinlock);
506 static int debug_state_open(struct inode *inode, struct file *file)
508 struct dlm_ctxt *dlm = inode->i_private;
509 struct debug_buffer *db = NULL;
511 db = debug_buffer_allocate();
515 db->len = debug_state_print(dlm, db);
517 file->private_data = db;
524 static struct file_operations debug_state_fops = {
525 .open = debug_state_open,
526 .release = debug_buffer_release,
527 .read = debug_buffer_read,
528 .llseek = debug_buffer_llseek,
530 /* end - debug state funcs */
532 /* files in subroot */
533 int dlm_debug_init(struct dlm_ctxt *dlm)
535 struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
537 /* for dumping dlm_ctxt */
538 dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE,
540 dlm->dlm_debugfs_subroot,
541 dlm, &debug_state_fops);
542 if (!dc->debug_state_dentry) {
551 dlm_debug_shutdown(dlm);
555 void dlm_debug_shutdown(struct dlm_ctxt *dlm)
557 struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
560 if (dc->debug_state_dentry)
561 debugfs_remove(dc->debug_state_dentry);
566 /* subroot - domain dir */
567 int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
569 dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
571 if (!dlm->dlm_debugfs_subroot) {
576 dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
578 if (!dlm->dlm_debug_ctxt) {
582 kref_init(&dlm->dlm_debug_ctxt->debug_refcnt);
586 dlm_destroy_debugfs_subroot(dlm);
590 void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
592 if (dlm->dlm_debugfs_subroot)
593 debugfs_remove(dlm->dlm_debugfs_subroot);
597 int dlm_create_debugfs_root(void)
599 dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL);
600 if (!dlm_debugfs_root) {
607 void dlm_destroy_debugfs_root(void)
609 if (dlm_debugfs_root)
610 debugfs_remove(dlm_debugfs_root);
612 #endif /* CONFIG_DEBUG_FS */