X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=fs%2Fdlm%2Fmember.c;h=26133f05ae3a7690911815358ad1f350343564b6;hb=5ec7f8c7d14a3ea6bf920b3350f5c5d3527cb837;hp=d09977528f692c7e133a9d7702cae88411536e75;hpb=ef1c5339d9413ab57c9b2aa768f3c67485d11acb;p=linux-2.6-omap-h63xx.git diff --git a/fs/dlm/member.c b/fs/dlm/member.c index d09977528f6..26133f05ae3 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -18,10 +18,6 @@ #include "rcom.h" #include "config.h" -/* - * Following called by dlm_recoverd thread - */ - static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) { struct dlm_member *memb = NULL; @@ -74,7 +70,7 @@ static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb) ls->ls_num_nodes--; } -static int dlm_is_member(struct dlm_ls *ls, int nodeid) +int dlm_is_member(struct dlm_ls *ls, int nodeid) { struct dlm_member *memb; @@ -214,6 +210,23 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) } } + /* Add an entry to ls_nodes_gone for members that were removed and + then added again, so that previous state for these nodes will be + cleared during recovery. */ + + for (i = 0; i < rv->new_count; i++) { + if (!dlm_is_member(ls, rv->new[i])) + continue; + log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); + + memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + if (!memb) + return -ENOMEM; + memb->nodeid = rv->new[i]; + list_add_tail(&memb->list, &ls->ls_nodes_gone); + neg++; + } + /* add new members to ls_nodes */ for (i = 0; i < rv->node_count; i++) { @@ -250,18 +263,30 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) return error; } -/* - * Following called from lockspace.c - */ +/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before + dlm_ls_start() is called on any of them to start the new recovery. */ int dlm_ls_stop(struct dlm_ls *ls) { int new; /* - * A stop cancels any recovery that's in progress (see RECOVERY_STOP, - * dlm_recovery_stopped()) and prevents any new locks from being - * processed (see RUNNING, dlm_locking_stopped()). + * Prevent dlm_recv from being in the middle of something when we do + * the stop. This includes ensuring dlm_recv isn't processing a + * recovery message (rcom), while dlm_recoverd is aborting and + * resetting things from an in-progress recovery. i.e. we want + * dlm_recoverd to abort its recovery without worrying about dlm_recv + * processing an rcom at the same time. Stopping dlm_recv also makes + * it easy for dlm_receive_message() to check locking stopped and add a + * message to the requestqueue without races. + */ + + down_write(&ls->ls_recv_active); + + /* + * Abort any recovery that's in progress (see RECOVERY_STOP, + * dlm_recovery_stopped()) and tell any other threads running in the + * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). */ spin_lock(&ls->ls_recover_lock); @@ -270,9 +295,15 @@ int dlm_ls_stop(struct dlm_ls *ls) ls->ls_recover_seq++; spin_unlock(&ls->ls_recover_lock); + /* + * Let dlm_recv run again, now any normal messages will be saved on the + * requestqueue for later. + */ + + up_write(&ls->ls_recv_active); + /* * This in_recovery lock does two things: - * * 1) Keeps this function from returning until all threads are out * of locking routines and locking is truely stopped. * 2) Keeps any new requests from being processed until it's unlocked @@ -284,9 +315,8 @@ int dlm_ls_stop(struct dlm_ls *ls) /* * The recoverd suspend/resume makes sure that dlm_recoverd (if - * running) has noticed the clearing of RUNNING above and quit - * processing the previous recovery. This will be true for all nodes - * before any nodes start the new recovery. + * running) has noticed RECOVERY_STOP above and quit processing the + * previous recovery. */ dlm_recoverd_suspend(ls); @@ -301,15 +331,16 @@ int dlm_ls_stop(struct dlm_ls *ls) int dlm_ls_start(struct dlm_ls *ls) { struct dlm_recover *rv = NULL, *rv_old; - int *ids = NULL; - int error, count; + int *ids = NULL, *new = NULL; + int error, ids_count = 0, new_count = 0; rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); if (!rv) return -ENOMEM; - error = count = dlm_nodeid_list(ls->ls_name, &ids); - if (error <= 0) + error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, + &new, &new_count); + if (error < 0) goto fail; spin_lock(&ls->ls_recover_lock); @@ -324,14 +355,19 @@ int dlm_ls_start(struct dlm_ls *ls) } rv->nodeids = ids; - rv->node_count = count; + rv->node_count = ids_count; + rv->new = new; + rv->new_count = new_count; rv->seq = ++ls->ls_recover_seq; rv_old = ls->ls_recover_args; ls->ls_recover_args = rv; spin_unlock(&ls->ls_recover_lock); if (rv_old) { + log_error(ls, "unused recovery %llx %d", + (unsigned long long)rv_old->seq, rv_old->node_count); kfree(rv_old->nodeids); + kfree(rv_old->new); kfree(rv_old); } @@ -341,6 +377,7 @@ int dlm_ls_start(struct dlm_ls *ls) fail: kfree(rv); kfree(ids); + kfree(new); return error; }