]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/misc/sgi-xp/xpc_main.c
Merge branch 'hotfixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris...
[linux-2.6-omap-h63xx.git] / drivers / misc / sgi-xp / xpc_main.c
index 579b01ff82d4ed73580365334f1790b29e70805d..46325fc848114cb5c87658f3095dff0371b43370 100644 (file)
  *
  *     Caveats:
  *
- *       . We currently have no way to determine which nasid an IPI came
- *         from. Thus, xpc_IPI_send() does a remote AMO write followed by
- *         an IPI. The AMO indicates where data is to be pulled from, so
- *         after the IPI arrives, the remote partition checks the AMO word.
- *         The IPI can actually arrive before the AMO however, so other code
- *         must periodically check for this case. Also, remote AMO operations
- *         do not reliably time out. Thus we do a remote PIO read solely to
- *         know whether the remote partition is down and whether we should
- *         stop sending IPIs to it. This remote PIO read operation is set up
- *         in a special nofault region so SAL knows to ignore (and cleanup)
- *         any errors due to the remote AMO write, PIO read, and/or PIO
- *         write operations.
+ *       . Currently on sn2, we have no way to determine which nasid an IRQ
+ *         came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
+ *         followed by an IPI. The amo indicates where data is to be pulled
+ *         from, so after the IPI arrives, the remote partition checks the amo
+ *         word. The IPI can actually arrive before the amo however, so other
+ *         code must periodically check for this case. Also, remote amo
+ *         operations do not reliably time out. Thus we do a remote PIO read
+ *         solely to know whether the remote partition is down and whether we
+ *         should stop sending IPIs to it. This remote PIO read operation is
+ *         set up in a special nofault region so SAL knows to ignore (and
+ *         cleanup) any errors due to the remote amo write, PIO read, and/or
+ *         PIO write operations.
  *
  *         If/when new hardware solves this IPI problem, we should abandon
  *         the current approach.
  *
  */
 
-#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
+#include <linux/sysctl.h>
+#include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/reboot.h>
-#include <linux/completion.h>
 #include <linux/kdebug.h>
 #include <linux/kthread.h>
-#include <linux/uaccess.h>
-#include <asm/sn/intr.h>
-#include <asm/sn/sn_sal.h>
 #include "xpc.h"
 
 /* define two XPC debug device structures to be used with dev_dbg() et al */
@@ -89,9 +83,9 @@ static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
 static int xpc_hb_check_min_interval = 10;
 static int xpc_hb_check_max_interval = 120;
 
-int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
-static int xpc_disengage_request_min_timelimit;        /* = 0 */
-static int xpc_disengage_request_max_timelimit = 120;
+int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
+static int xpc_disengage_min_timelimit;        /* = 0 */
+static int xpc_disengage_max_timelimit = 120;
 
 static ctl_table xpc_sys_xpc_hb_dir[] = {
        {
@@ -124,14 +118,14 @@ static ctl_table xpc_sys_xpc_dir[] = {
         .child = xpc_sys_xpc_hb_dir},
        {
         .ctl_name = CTL_UNNUMBERED,
-        .procname = "disengage_request_timelimit",
-        .data = &xpc_disengage_request_timelimit,
+        .procname = "disengage_timelimit",
+        .data = &xpc_disengage_timelimit,
         .maxlen = sizeof(int),
         .mode = 0644,
         .proc_handler = &proc_dointvec_minmax,
         .strategy = &sysctl_intvec,
-        .extra1 = &xpc_disengage_request_min_timelimit,
-        .extra2 = &xpc_disengage_request_max_timelimit},
+        .extra1 = &xpc_disengage_min_timelimit,
+        .extra2 = &xpc_disengage_max_timelimit},
        {}
 };
 static ctl_table xpc_sys_dir[] = {
@@ -144,16 +138,19 @@ static ctl_table xpc_sys_dir[] = {
 };
 static struct ctl_table_header *xpc_sysctl;
 
-/* non-zero if any remote partition disengage request was timed out */
-int xpc_disengage_request_timedout;
+/* non-zero if any remote partition disengage was timed out */
+int xpc_disengage_timedout;
 
-/* #of IRQs received */
-static atomic_t xpc_act_IRQ_rcvd;
+/* #of activate IRQs received and not yet processed */
+int xpc_activate_IRQ_rcvd;
+DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
 
 /* IRQ handler notifies this wait queue on receipt of an IRQ */
-static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
+DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
 
 static unsigned long xpc_hb_check_timeout;
+static struct timer_list xpc_hb_timer;
+void *xpc_heartbeating_to_mask;
 
 /* notification that the xpc_hb_checker thread has exited */
 static DECLARE_COMPLETION(xpc_hb_checker_exited);
@@ -161,8 +158,6 @@ static DECLARE_COMPLETION(xpc_hb_checker_exited);
 /* notification that the xpc_discovery thread has exited */
 static DECLARE_COMPLETION(xpc_discovery_exited);
 
-static struct timer_list xpc_hb_timer;
-
 static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
 
 static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
@@ -175,31 +170,76 @@ static struct notifier_block xpc_die_notifier = {
        .notifier_call = xpc_system_die,
 };
 
+int (*xpc_setup_partitions_sn) (void);
+enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
+                                                 unsigned long *rp_pa,
+                                                 size_t *len);
+int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
+void (*xpc_heartbeat_init) (void);
+void (*xpc_heartbeat_exit) (void);
+void (*xpc_increment_heartbeat) (void);
+void (*xpc_offline_heartbeat) (void);
+void (*xpc_online_heartbeat) (void);
+enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
+
+enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
+void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
+u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
+enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
+void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
+void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
+int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
+void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
+
+void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
+                                         unsigned long remote_rp_pa,
+                                         int nasid);
+void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
+void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
+void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
+
+void (*xpc_process_activate_IRQ_rcvd) (void);
+enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
+void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
+
+void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
+int (*xpc_partition_engaged) (short partid);
+int (*xpc_any_partition_engaged) (void);
+void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
+void (*xpc_assume_partition_disengaged) (short partid);
+
+void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
+                                    unsigned long *irq_flags);
+void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
+                                  unsigned long *irq_flags);
+void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
+                                   unsigned long *irq_flags);
+void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
+                                 unsigned long *irq_flags);
+
+void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
+                                    unsigned long msgqueue_pa);
+
+enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
+                                   void *payload, u16 payload_size,
+                                   u8 notify_type, xpc_notify_func func,
+                                   void *key);
+void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
+
 /*
- * Timer function to enforce the timelimit on the partition disengage request.
+ * Timer function to enforce the timelimit on the partition disengage.
  */
 static void
-xpc_timeout_partition_disengage_request(unsigned long data)
+xpc_timeout_partition_disengage(unsigned long data)
 {
        struct xpc_partition *part = (struct xpc_partition *)data;
 
-       DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
+       DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
 
        (void)xpc_partition_disengaged(part);
 
-       DBUG_ON(part->disengage_request_timeout != 0);
-       DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
-}
-
-/*
- * Notify the heartbeat check thread that an IRQ has been received.
- */
-static irqreturn_t
-xpc_act_IRQ_handler(int irq, void *dev_id)
-{
-       atomic_inc(&xpc_act_IRQ_rcvd);
-       wake_up_interruptible(&xpc_act_IRQ_wq);
-       return IRQ_HANDLED;
+       DBUG_ON(part->disengage_timeout != 0);
+       DBUG_ON(xpc_partition_engaged(XPC_PARTID(part)));
 }
 
 /*
@@ -210,15 +250,63 @@ xpc_act_IRQ_handler(int irq, void *dev_id)
 static void
 xpc_hb_beater(unsigned long dummy)
 {
-       xpc_vars->heartbeat++;
+       xpc_increment_heartbeat();
 
-       if (time_after_eq(jiffies, xpc_hb_check_timeout))
-               wake_up_interruptible(&xpc_act_IRQ_wq);
+       if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
+               wake_up_interruptible(&xpc_activate_IRQ_wq);
 
        xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
        add_timer(&xpc_hb_timer);
 }
 
+static void
+xpc_start_hb_beater(void)
+{
+       xpc_heartbeat_init();
+       init_timer(&xpc_hb_timer);
+       xpc_hb_timer.function = xpc_hb_beater;
+       xpc_hb_beater(0);
+}
+
+static void
+xpc_stop_hb_beater(void)
+{
+       del_timer_sync(&xpc_hb_timer);
+       xpc_heartbeat_exit();
+}
+
+/*
+ * At periodic intervals, scan through all active partitions and ensure
+ * their heartbeat is still active.  If not, the partition is deactivated.
+ */
+static void
+xpc_check_remote_hb(void)
+{
+       struct xpc_partition *part;
+       short partid;
+       enum xp_retval ret;
+
+       for (partid = 0; partid < xp_max_npartitions; partid++) {
+
+               if (xpc_exiting)
+                       break;
+
+               if (partid == xp_partition_id)
+                       continue;
+
+               part = &xpc_partitions[partid];
+
+               if (part->act_state == XPC_P_AS_INACTIVE ||
+                   part->act_state == XPC_P_AS_DEACTIVATING) {
+                       continue;
+               }
+
+               ret = xpc_get_remote_heartbeat(part);
+               if (ret != xpSuccess)
+                       XPC_DEACTIVATE_PARTITION(part, ret);
+       }
+}
+
 /*
  * This thread is responsible for nearly all of the partition
  * activation/deactivation.
@@ -226,67 +314,57 @@ xpc_hb_beater(unsigned long dummy)
 static int
 xpc_hb_checker(void *ignore)
 {
-       int last_IRQ_count = 0;
-       int new_IRQ_count;
        int force_IRQ = 0;
-       cpumask_of_cpu_ptr(cpumask, XPC_HB_CHECK_CPU);
 
        /* this thread was marked active by xpc_hb_init() */
 
-       set_cpus_allowed_ptr(current, cpumask);
+       set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU));
 
        /* set our heartbeating to other partitions into motion */
        xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
-       xpc_hb_beater(0);
+       xpc_start_hb_beater();
 
        while (!xpc_exiting) {
 
                dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
                        "been received\n",
                        (int)(xpc_hb_check_timeout - jiffies),
-                       atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
+                       xpc_activate_IRQ_rcvd);
 
                /* checking of remote heartbeats is skewed by IRQ handling */
-               if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
+               if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
+                       xpc_hb_check_timeout = jiffies +
+                           (xpc_hb_check_interval * HZ);
+
                        dev_dbg(xpc_part, "checking remote heartbeats\n");
                        xpc_check_remote_hb();
 
                        /*
-                        * We need to periodically recheck to ensure no
-                        * IPI/AMO pairs have been missed.  That check
-                        * must always reset xpc_hb_check_timeout.
+                        * On sn2 we need to periodically recheck to ensure no
+                        * IRQ/amo pairs have been missed.
                         */
-                       force_IRQ = 1;
+                       if (is_shub())
+                               force_IRQ = 1;
                }
 
                /* check for outstanding IRQs */
-               new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
-               if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
+               if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
                        force_IRQ = 0;
-
-                       dev_dbg(xpc_part, "found an IRQ to process; will be "
-                               "resetting xpc_hb_check_timeout\n");
-
-                       last_IRQ_count += xpc_identify_act_IRQ_sender();
-                       if (last_IRQ_count < new_IRQ_count) {
-                               /* retry once to help avoid missing AMO */
-                               (void)xpc_identify_act_IRQ_sender();
-                       }
-                       last_IRQ_count = new_IRQ_count;
-
-                       xpc_hb_check_timeout = jiffies +
-                           (xpc_hb_check_interval * HZ);
+                       dev_dbg(xpc_part, "processing activate IRQs "
+                               "received\n");
+                       xpc_process_activate_IRQ_rcvd();
                }
 
                /* wait for IRQ or timeout */
-               (void)wait_event_interruptible(xpc_act_IRQ_wq,
-                                              (last_IRQ_count <
-                                               atomic_read(&xpc_act_IRQ_rcvd)
-                                               || time_after_eq(jiffies,
-                                                       xpc_hb_check_timeout) ||
+               (void)wait_event_interruptible(xpc_activate_IRQ_wq,
+                                              (time_is_before_eq_jiffies(
+                                               xpc_hb_check_timeout) ||
+                                               xpc_activate_IRQ_rcvd > 0 ||
                                                xpc_exiting));
        }
 
+       xpc_stop_hb_beater();
+
        dev_dbg(xpc_part, "heartbeat checker is exiting\n");
 
        /* mark this thread as having exited */
@@ -311,38 +389,9 @@ xpc_initiate_discovery(void *ignore)
        return 0;
 }
 
-/*
- * Establish first contact with the remote partititon. This involves pulling
- * the XPC per partition variables from the remote partition and waiting for
- * the remote partition to pull ours.
- */
-static enum xp_retval
-xpc_make_first_contact(struct xpc_partition *part)
-{
-       enum xp_retval ret;
-
-       while ((ret = xpc_pull_remote_vars_part(part)) != xpSuccess) {
-               if (ret != xpRetry) {
-                       XPC_DEACTIVATE_PARTITION(part, ret);
-                       return ret;
-               }
-
-               dev_dbg(xpc_chan, "waiting to make first contact with "
-                       "partition %d\n", XPC_PARTID(part));
-
-               /* wait a 1/4 of a second or so */
-               (void)msleep_interruptible(250);
-
-               if (part->act_state == XPC_P_DEACTIVATING)
-                       return part->reason;
-       }
-
-       return xpc_mark_partition_active(part);
-}
-
 /*
  * The first kthread assigned to a newly activated partition is the one
- * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
+ * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
  * that kthread until the partition is brought down, at which time that kthread
  * returns back to XPC HB. (The return of that kthread will signify to XPC HB
  * that XPC has dismantled all communication infrastructure for the associated
@@ -355,11 +404,11 @@ xpc_make_first_contact(struct xpc_partition *part)
 static void
 xpc_channel_mgr(struct xpc_partition *part)
 {
-       while (part->act_state != XPC_P_DEACTIVATING ||
+       while (part->act_state != XPC_P_AS_DEACTIVATING ||
               atomic_read(&part->nchannels_active) > 0 ||
               !xpc_partition_disengaged(part)) {
 
-               xpc_process_channel_activity(part);
+               xpc_process_sent_chctl_flags(part);
 
                /*
                 * Wait until we've been requested to activate kthreads or
@@ -377,8 +426,8 @@ xpc_channel_mgr(struct xpc_partition *part)
                atomic_dec(&part->channel_mgr_requests);
                (void)wait_event_interruptible(part->channel_mgr_wq,
                                (atomic_read(&part->channel_mgr_requests) > 0 ||
-                                part->local_IPI_amo != 0 ||
-                                (part->act_state == XPC_P_DEACTIVATING &&
+                                part->chctl.all_flags != 0 ||
+                                (part->act_state == XPC_P_AS_DEACTIVATING &&
                                 atomic_read(&part->nchannels_active) == 0 &&
                                 xpc_partition_disengaged(part))));
                atomic_set(&part->channel_mgr_requests, 1);
@@ -386,47 +435,163 @@ xpc_channel_mgr(struct xpc_partition *part)
 }
 
 /*
- * When XPC HB determines that a partition has come up, it will create a new
- * kthread and that kthread will call this function to attempt to set up the
- * basic infrastructure used for Cross Partition Communication with the newly
- * upped partition.
- *
- * The kthread that was created by XPC HB and which setup the XPC
- * infrastructure will remain assigned to the partition until the partition
- * goes down. At which time the kthread will teardown the XPC infrastructure
- * and then exit.
- *
- * XPC HB will put the remote partition's XPC per partition specific variables
- * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
- * calling xpc_partition_up().
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
  */
-static void
-xpc_partition_up(struct xpc_partition *part)
+void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+       /* see if kzalloc will give us cachline aligned memory by default */
+       *base = kzalloc(size, flags);
+       if (*base == NULL)
+               return NULL;
+
+       if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
+               return *base;
+
+       kfree(*base);
+
+       /* nope, we'll have to do it ourselves */
+       *base = kzalloc(size + L1_CACHE_BYTES, flags);
+       if (*base == NULL)
+               return NULL;
+
+       return (void *)L1_CACHE_ALIGN((u64)*base);
+}
+
+/*
+ * Setup the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static enum xp_retval
+xpc_setup_ch_structures(struct xpc_partition *part)
 {
+       enum xp_retval ret;
+       int ch_number;
+       struct xpc_channel *ch;
+       short partid = XPC_PARTID(part);
+
+       /*
+        * Allocate all of the channel structures as a contiguous chunk of
+        * memory.
+        */
        DBUG_ON(part->channels != NULL);
+       part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
+                                GFP_KERNEL);
+       if (part->channels == NULL) {
+               dev_err(xpc_chan, "can't get memory for channels\n");
+               return xpNoMemory;
+       }
 
-       dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
+       /* allocate the remote open and close args */
 
-       if (xpc_setup_infrastructure(part) != xpSuccess)
-               return;
+       part->remote_openclose_args =
+           xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
+                                         GFP_KERNEL, &part->
+                                         remote_openclose_args_base);
+       if (part->remote_openclose_args == NULL) {
+               dev_err(xpc_chan, "can't get memory for remote connect args\n");
+               ret = xpNoMemory;
+               goto out_1;
+       }
+
+       part->chctl.all_flags = 0;
+       spin_lock_init(&part->chctl_lock);
+
+       atomic_set(&part->channel_mgr_requests, 1);
+       init_waitqueue_head(&part->channel_mgr_wq);
+
+       part->nchannels = XPC_MAX_NCHANNELS;
+
+       atomic_set(&part->nchannels_active, 0);
+       atomic_set(&part->nchannels_engaged, 0);
+
+       for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
+               ch = &part->channels[ch_number];
+
+               ch->partid = partid;
+               ch->number = ch_number;
+               ch->flags = XPC_C_DISCONNECTED;
+
+               atomic_set(&ch->kthreads_assigned, 0);
+               atomic_set(&ch->kthreads_idle, 0);
+               atomic_set(&ch->kthreads_active, 0);
+
+               atomic_set(&ch->references, 0);
+               atomic_set(&ch->n_to_notify, 0);
+
+               spin_lock_init(&ch->lock);
+               init_completion(&ch->wdisconnect_wait);
+
+               atomic_set(&ch->n_on_msg_allocate_wq, 0);
+               init_waitqueue_head(&ch->msg_allocate_wq);
+               init_waitqueue_head(&ch->idle_wq);
+       }
+
+       ret = xpc_setup_ch_structures_sn(part);
+       if (ret != xpSuccess)
+               goto out_2;
+
+       /*
+        * With the setting of the partition setup_state to XPC_P_SS_SETUP,
+        * we're declaring that this partition is ready to go.
+        */
+       part->setup_state = XPC_P_SS_SETUP;
+
+       return xpSuccess;
+
+       /* setup of ch structures failed */
+out_2:
+       kfree(part->remote_openclose_args_base);
+       part->remote_openclose_args = NULL;
+out_1:
+       kfree(part->channels);
+       part->channels = NULL;
+       return ret;
+}
+
+/*
+ * Teardown the channel structures necessary to support XPartition Communication
+ * between the specified remote partition and the local one.
+ */
+static void
+xpc_teardown_ch_structures(struct xpc_partition *part)
+{
+       DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
+       DBUG_ON(atomic_read(&part->nchannels_active) != 0);
 
        /*
-        * The kthread that XPC HB called us with will become the
-        * channel manager for this partition. It will not return
-        * back to XPC HB until the partition's XPC infrastructure
-        * has been dismantled.
+        * Make this partition inaccessible to local processes by marking it
+        * as no longer setup. Then wait before proceeding with the teardown
+        * until all existing references cease.
         */
+       DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
+       part->setup_state = XPC_P_SS_WTEARDOWN;
 
-       (void)xpc_part_ref(part);       /* this will always succeed */
+       wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
 
-       if (xpc_make_first_contact(part) == xpSuccess)
-               xpc_channel_mgr(part);
+       /* now we can begin tearing down the infrastructure */
 
-       xpc_part_deref(part);
+       xpc_teardown_ch_structures_sn(part);
 
-       xpc_teardown_infrastructure(part);
+       kfree(part->remote_openclose_args_base);
+       part->remote_openclose_args = NULL;
+       kfree(part->channels);
+       part->channels = NULL;
+
+       part->setup_state = XPC_P_SS_TORNDOWN;
 }
 
+/*
+ * When XPC HB determines that a partition has come up, it will create a new
+ * kthread and that kthread will call this function to attempt to set up the
+ * basic infrastructure used for Cross Partition Communication with the newly
+ * upped partition.
+ *
+ * The kthread that was created by XPC HB and which setup the XPC
+ * infrastructure will remain assigned to the partition becoming the channel
+ * manager for that partition until the partition is deactivating, at which
+ * time the kthread will teardown the XPC infrastructure and then exit.
+ */
 static int
 xpc_activating(void *__partid)
 {
@@ -434,64 +599,47 @@ xpc_activating(void *__partid)
        struct xpc_partition *part = &xpc_partitions[partid];
        unsigned long irq_flags;
 
-       DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
+       DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 
        spin_lock_irqsave(&part->act_lock, irq_flags);
 
-       if (part->act_state == XPC_P_DEACTIVATING) {
-               part->act_state = XPC_P_INACTIVE;
+       if (part->act_state == XPC_P_AS_DEACTIVATING) {
+               part->act_state = XPC_P_AS_INACTIVE;
                spin_unlock_irqrestore(&part->act_lock, irq_flags);
                part->remote_rp_pa = 0;
                return 0;
        }
 
        /* indicate the thread is activating */
-       DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
-       part->act_state = XPC_P_ACTIVATING;
+       DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
+       part->act_state = XPC_P_AS_ACTIVATING;
 
        XPC_SET_REASON(part, 0, 0);
        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 
-       dev_dbg(xpc_part, "bringing partition %d up\n", partid);
+       dev_dbg(xpc_part, "activating partition %d\n", partid);
 
-       /*
-        * Register the remote partition's AMOs with SAL so it can handle
-        * and cleanup errors within that address range should the remote
-        * partition go down. We don't unregister this range because it is
-        * difficult to tell when outstanding writes to the remote partition
-        * are finished and thus when it is safe to unregister. This should
-        * not result in wasted space in the SAL xp_addr_region table because
-        * we should get the same page for remote_amos_page_pa after module
-        * reloads and system reboots.
-        */
-       if (sn_register_xp_addr_region(part->remote_amos_page_pa,
-                                      PAGE_SIZE, 1) < 0) {
-               dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
-                        "xp_addr region\n", partid);
+       xpc_allow_hb(partid);
 
-               spin_lock_irqsave(&part->act_lock, irq_flags);
-               part->act_state = XPC_P_INACTIVE;
-               XPC_SET_REASON(part, xpPhysAddrRegFailed, __LINE__);
-               spin_unlock_irqrestore(&part->act_lock, irq_flags);
-               part->remote_rp_pa = 0;
-               return 0;
-       }
+       if (xpc_setup_ch_structures(part) == xpSuccess) {
+               (void)xpc_part_ref(part);       /* this will always succeed */
 
-       xpc_allow_hb(partid, xpc_vars);
-       xpc_IPI_send_activated(part);
+               if (xpc_make_first_contact(part) == xpSuccess) {
+                       xpc_mark_partition_active(part);
+                       xpc_channel_mgr(part);
+                       /* won't return until partition is deactivating */
+               }
 
-       /*
-        * xpc_partition_up() holds this thread and marks this partition as
-        * XPC_P_ACTIVE by calling xpc_hb_mark_active().
-        */
-       (void)xpc_partition_up(part);
+               xpc_part_deref(part);
+               xpc_teardown_ch_structures(part);
+       }
 
-       xpc_disallow_hb(partid, xpc_vars);
+       xpc_disallow_hb(partid);
        xpc_mark_partition_inactive(part);
 
        if (part->reason == xpReactivating) {
                /* interrupting ourselves results in activating partition */
-               xpc_IPI_send_reactivate(part);
+               xpc_request_partition_reactivation(part);
        }
 
        return 0;
@@ -506,9 +654,9 @@ xpc_activate_partition(struct xpc_partition *part)
 
        spin_lock_irqsave(&part->act_lock, irq_flags);
 
-       DBUG_ON(part->act_state != XPC_P_INACTIVE);
+       DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
 
-       part->act_state = XPC_P_ACTIVATION_REQ;
+       part->act_state = XPC_P_AS_ACTIVATION_REQ;
        XPC_SET_REASON(part, xpCloneKThread, __LINE__);
 
        spin_unlock_irqrestore(&part->act_lock, irq_flags);
@@ -517,62 +665,12 @@ xpc_activate_partition(struct xpc_partition *part)
                              partid);
        if (IS_ERR(kthread)) {
                spin_lock_irqsave(&part->act_lock, irq_flags);
-               part->act_state = XPC_P_INACTIVE;
+               part->act_state = XPC_P_AS_INACTIVE;
                XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
                spin_unlock_irqrestore(&part->act_lock, irq_flags);
        }
 }
 
-/*
- * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
- * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
- * than one partition, we use an AMO_t structure per partition to indicate
- * whether a partition has sent an IPI or not.  If it has, then wake up the
- * associated kthread to handle it.
- *
- * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
- * running on other partitions.
- *
- * Noteworthy Arguments:
- *
- *     irq - Interrupt ReQuest number. NOT USED.
- *
- *     dev_id - partid of IPI's potential sender.
- */
-irqreturn_t
-xpc_notify_IRQ_handler(int irq, void *dev_id)
-{
-       short partid = (short)(u64)dev_id;
-       struct xpc_partition *part = &xpc_partitions[partid];
-
-       DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
-
-       if (xpc_part_ref(part)) {
-               xpc_check_for_channel_activity(part);
-
-               xpc_part_deref(part);
-       }
-       return IRQ_HANDLED;
-}
-
-/*
- * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
- * because the write to their associated IPI amo completed after the IRQ/IPI
- * was received.
- */
-void
-xpc_dropped_IPI_check(struct xpc_partition *part)
-{
-       if (xpc_part_ref(part)) {
-               xpc_check_for_channel_activity(part);
-
-               part->dropped_IPI_timer.expires = jiffies +
-                   XPC_P_DROPPED_IPI_WAIT;
-               add_timer(&part->dropped_IPI_timer);
-               xpc_part_deref(part);
-       }
-}
-
 void
 xpc_activate_kthreads(struct xpc_channel *ch, int needed)
 {
@@ -617,9 +715,9 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
        do {
                /* deliver messages to their intended recipients */
 
-               while (ch->w_local_GP.get < ch->w_remote_GP.put &&
+               while (xpc_n_of_deliverable_payloads(ch) > 0 &&
                       !(ch->flags & XPC_C_DISCONNECTING)) {
-                       xpc_deliver_msg(ch);
+                       xpc_deliver_payload(ch);
                }
 
                if (atomic_inc_return(&ch->kthreads_idle) >
@@ -633,7 +731,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
                        "wait_event_interruptible_exclusive()\n");
 
                (void)wait_event_interruptible_exclusive(ch->idle_wq,
-                               (ch->w_local_GP.get < ch->w_remote_GP.put ||
+                               (xpc_n_of_deliverable_payloads(ch) > 0 ||
                                 (ch->flags & XPC_C_DISCONNECTING)));
 
                atomic_dec(&ch->kthreads_idle);
@@ -678,7 +776,7 @@ xpc_kthread_start(void *args)
                         * additional kthreads to help deliver them. We only
                         * need one less than total #of messages to deliver.
                         */
-                       n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
+                       n_needed = xpc_n_of_deliverable_payloads(ch) - 1;
                        if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
                                xpc_activate_kthreads(ch, n_needed);
 
@@ -704,11 +802,9 @@ xpc_kthread_start(void *args)
        }
        spin_unlock_irqrestore(&ch->lock, irq_flags);
 
-       if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
-               if (atomic_dec_return(&part->nchannels_engaged) == 0) {
-                       xpc_mark_partition_disengaged(part);
-                       xpc_IPI_send_disengage(part);
-               }
+       if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+           atomic_dec_return(&part->nchannels_engaged) == 0) {
+               xpc_indicate_partition_disengaged(part);
        }
 
        xpc_msgqueue_deref(ch);
@@ -759,9 +855,9 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
                } else if (ch->flags & XPC_C_DISCONNECTING) {
                        break;
 
-               } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
-                       if (atomic_inc_return(&part->nchannels_engaged) == 1)
-                               xpc_mark_partition_engaged(part);
+               } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+                          atomic_inc_return(&part->nchannels_engaged) == 1) {
+                               xpc_indicate_partition_engaged(part);
                }
                (void)xpc_part_ref(part);
                xpc_msgqueue_ref(ch);
@@ -783,8 +879,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 
                        if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
                            atomic_dec_return(&part->nchannels_engaged) == 0) {
-                               xpc_mark_partition_disengaged(part);
-                               xpc_IPI_send_disengage(part);
+                               xpc_indicate_partition_disengaged(part);
                        }
                        xpc_msgqueue_deref(ch);
                        xpc_part_deref(part);
@@ -816,7 +911,7 @@ xpc_disconnect_wait(int ch_number)
        int wakeup_channel_mgr;
 
        /* now wait for all callouts to the caller's function to cease */
-       for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+       for (partid = 0; partid < xp_max_npartitions; partid++) {
                part = &xpc_partitions[partid];
 
                if (!xpc_part_ref(part))
@@ -835,16 +930,15 @@ xpc_disconnect_wait(int ch_number)
                DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
                wakeup_channel_mgr = 0;
 
-               if (ch->delayed_IPI_flags) {
-                       if (part->act_state != XPC_P_DEACTIVATING) {
-                               spin_lock(&part->IPI_lock);
-                               XPC_SET_IPI_FLAGS(part->local_IPI_amo,
-                                                 ch->number,
-                                                 ch->delayed_IPI_flags);
-                               spin_unlock(&part->IPI_lock);
+               if (ch->delayed_chctl_flags) {
+                       if (part->act_state != XPC_P_AS_DEACTIVATING) {
+                               spin_lock(&part->chctl_lock);
+                               part->chctl.flags[ch->number] |=
+                                   ch->delayed_chctl_flags;
+                               spin_unlock(&part->chctl_lock);
                                wakeup_channel_mgr = 1;
                        }
-                       ch->delayed_IPI_flags = 0;
+                       ch->delayed_chctl_flags = 0;
                }
 
                ch->flags &= ~XPC_C_WDISCONNECT;
@@ -857,13 +951,63 @@ xpc_disconnect_wait(int ch_number)
        }
 }
 
+static int
+xpc_setup_partitions(void)
+{
+       short partid;
+       struct xpc_partition *part;
+
+       xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
+                                xp_max_npartitions, GFP_KERNEL);
+       if (xpc_partitions == NULL) {
+               dev_err(xpc_part, "can't get memory for partition structure\n");
+               return -ENOMEM;
+       }
+
+       /*
+        * The first few fields of each entry of xpc_partitions[] need to
+        * be initialized now so that calls to xpc_connect() and
+        * xpc_disconnect() can be made prior to the activation of any remote
+        * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
+        * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
+        * PARTITION HAS BEEN ACTIVATED.
+        */
+       for (partid = 0; partid < xp_max_npartitions; partid++) {
+               part = &xpc_partitions[partid];
+
+               DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
+
+               part->activate_IRQ_rcvd = 0;
+               spin_lock_init(&part->act_lock);
+               part->act_state = XPC_P_AS_INACTIVE;
+               XPC_SET_REASON(part, 0, 0);
+
+               init_timer(&part->disengage_timer);
+               part->disengage_timer.function =
+                   xpc_timeout_partition_disengage;
+               part->disengage_timer.data = (unsigned long)part;
+
+               part->setup_state = XPC_P_SS_UNSET;
+               init_waitqueue_head(&part->teardown_wq);
+               atomic_set(&part->references, 0);
+       }
+
+       return xpc_setup_partitions_sn();
+}
+
+static void
+xpc_teardown_partitions(void)
+{
+       kfree(xpc_partitions);
+}
+
 static void
 xpc_do_exit(enum xp_retval reason)
 {
        short partid;
        int active_part_count, printed_waiting_msg = 0;
        struct xpc_partition *part;
-       unsigned long printmsg_time, disengage_request_timeout = 0;
+       unsigned long printmsg_time, disengage_timeout = 0;
 
        /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
        DBUG_ON(xpc_exiting == 1);
@@ -874,10 +1018,7 @@ xpc_do_exit(enum xp_retval reason)
         * the heartbeat checker thread in case it's sleeping.
         */
        xpc_exiting = 1;
-       wake_up_interruptible(&xpc_act_IRQ_wq);
-
-       /* ignore all incoming interrupts */
-       free_irq(SGI_XPC_ACTIVATE, NULL);
+       wake_up_interruptible(&xpc_activate_IRQ_wq);
 
        /* wait for the discovery thread to exit */
        wait_for_completion(&xpc_discovery_exited);
@@ -890,17 +1031,17 @@ xpc_do_exit(enum xp_retval reason)
 
        /* wait for all partitions to become inactive */
 
-       printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
-       xpc_disengage_request_timedout = 0;
+       printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
+       xpc_disengage_timedout = 0;
 
        do {
                active_part_count = 0;
 
-               for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+               for (partid = 0; partid < xp_max_npartitions; partid++) {
                        part = &xpc_partitions[partid];
 
                        if (xpc_partition_disengaged(part) &&
-                           part->act_state == XPC_P_INACTIVE) {
+                           part->act_state == XPC_P_AS_INACTIVE) {
                                continue;
                        }
 
@@ -908,36 +1049,32 @@ xpc_do_exit(enum xp_retval reason)
 
                        XPC_DEACTIVATE_PARTITION(part, reason);
 
-                       if (part->disengage_request_timeout >
-                           disengage_request_timeout) {
-                               disengage_request_timeout =
-                                   part->disengage_request_timeout;
-                       }
+                       if (part->disengage_timeout > disengage_timeout)
+                               disengage_timeout = part->disengage_timeout;
                }
 
-               if (xpc_partition_engaged(-1UL)) {
-                       if (time_after(jiffies, printmsg_time)) {
+               if (xpc_any_partition_engaged()) {
+                       if (time_is_before_jiffies(printmsg_time)) {
                                dev_info(xpc_part, "waiting for remote "
-                                        "partitions to disengage, timeout in "
-                                        "%ld seconds\n",
-                                        (disengage_request_timeout - jiffies)
-                                        / HZ);
+                                        "partitions to deactivate, timeout in "
+                                        "%ld seconds\n", (disengage_timeout -
+                                        jiffies) / HZ);
                                printmsg_time = jiffies +
-                                   (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+                                   (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
                                printed_waiting_msg = 1;
                        }
 
                } else if (active_part_count > 0) {
                        if (printed_waiting_msg) {
                                dev_info(xpc_part, "waiting for local partition"
-                                        " to disengage\n");
+                                        " to deactivate\n");
                                printed_waiting_msg = 0;
                        }
 
                } else {
-                       if (!xpc_disengage_request_timedout) {
+                       if (!xpc_disengage_timedout) {
                                dev_info(xpc_part, "all partitions have "
-                                        "disengaged\n");
+                                        "deactivated\n");
                        }
                        break;
                }
@@ -947,33 +1084,28 @@ xpc_do_exit(enum xp_retval reason)
 
        } while (1);
 
-       DBUG_ON(xpc_partition_engaged(-1UL));
+       DBUG_ON(xpc_any_partition_engaged());
+       DBUG_ON(xpc_any_hbs_allowed() != 0);
 
-       /* indicate to others that our reserved page is uninitialized */
-       xpc_rsvd_page->vars_pa = 0;
-
-       /* now it's time to eliminate our heartbeat */
-       del_timer_sync(&xpc_hb_timer);
-       DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
+       xpc_teardown_rsvd_page();
 
        if (reason == xpUnloading) {
-               /* take ourselves off of the reboot_notifier_list */
-               (void)unregister_reboot_notifier(&xpc_reboot_notifier);
-
-               /* take ourselves off of the die_notifier list */
                (void)unregister_die_notifier(&xpc_die_notifier);
+               (void)unregister_reboot_notifier(&xpc_reboot_notifier);
        }
 
-       /* close down protections for IPI operations */
-       xpc_restrict_IPI_ops();
-
        /* clear the interface to XPC's functions */
        xpc_clear_interface();
 
        if (xpc_sysctl)
                unregister_sysctl_table(xpc_sysctl);
 
-       kfree(xpc_remote_copy_buffer_base);
+       xpc_teardown_partitions();
+
+       if (is_shub())
+               xpc_exit_sn2();
+       else
+               xpc_exit_uv();
 }
 
 /*
@@ -1003,60 +1135,57 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
 }
 
 /*
- * Notify other partitions to disengage from all references to our memory.
+ * Notify other partitions to deactivate from us by first disengaging from all
+ * references to our memory.
  */
 static void
-xpc_die_disengage(void)
+xpc_die_deactivate(void)
 {
        struct xpc_partition *part;
        short partid;
-       unsigned long engaged;
-       long time, printmsg_time, disengage_request_timeout;
+       int any_engaged;
+       long keep_waiting;
+       long wait_to_print;
 
        /* keep xpc_hb_checker thread from doing anything (just in case) */
        xpc_exiting = 1;
 
-       xpc_vars->heartbeating_to_mask = 0;     /* indicate we're deactivated */
+       xpc_disallow_all_hbs(); /*indicate we're deactivated */
 
-       for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+       for (partid = 0; partid < xp_max_npartitions; partid++) {
                part = &xpc_partitions[partid];
 
-               if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
-                   remote_vars_version)) {
-
-                       /* just in case it was left set by an earlier XPC */
-                       xpc_clear_partition_engaged(1UL << partid);
-                       continue;
-               }
-
-               if (xpc_partition_engaged(1UL << partid) ||
-                   part->act_state != XPC_P_INACTIVE) {
-                       xpc_request_partition_disengage(part);
-                       xpc_mark_partition_disengaged(part);
-                       xpc_IPI_send_disengage(part);
+               if (xpc_partition_engaged(partid) ||
+                   part->act_state != XPC_P_AS_INACTIVE) {
+                       xpc_request_partition_deactivation(part);
+                       xpc_indicate_partition_disengaged(part);
                }
        }
 
-       time = rtc_time();
-       printmsg_time = time +
-           (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
-       disengage_request_timeout = time +
-           (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
-
-       /* wait for all other partitions to disengage from us */
+       /*
+        * Though we requested that all other partitions deactivate from us,
+        * we only wait until they've all disengaged or we've reached the
+        * defined timelimit.
+        *
+        * Given that one iteration through the following while-loop takes
+        * approximately 200 microseconds, calculate the #of loops to take
+        * before bailing and the #of loops before printing a waiting message.
+        */
+       keep_waiting = xpc_disengage_timelimit * 1000 * 5;
+       wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
 
        while (1) {
-               engaged = xpc_partition_engaged(-1UL);
-               if (!engaged) {
-                       dev_info(xpc_part, "all partitions have disengaged\n");
+               any_engaged = xpc_any_partition_engaged();
+               if (!any_engaged) {
+                       dev_info(xpc_part, "all partitions have deactivated\n");
                        break;
                }
 
-               time = rtc_time();
-               if (time >= disengage_request_timeout) {
-                       for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-                               if (engaged & (1UL << partid)) {
-                                       dev_info(xpc_part, "disengage from "
+               if (!keep_waiting--) {
+                       for (partid = 0; partid < xp_max_npartitions;
+                            partid++) {
+                               if (xpc_partition_engaged(partid)) {
+                                       dev_info(xpc_part, "deactivate from "
                                                 "remote partition %d timed "
                                                 "out\n", partid);
                                }
@@ -1064,15 +1193,15 @@ xpc_die_disengage(void)
                        break;
                }
 
-               if (time >= printmsg_time) {
+               if (!wait_to_print--) {
                        dev_info(xpc_part, "waiting for remote partitions to "
-                                "disengage, timeout in %ld seconds\n",
-                                (disengage_request_timeout - time) /
-                                sn_rtc_cycles_per_second);
-                       printmsg_time = time +
-                           (XPC_DISENGAGE_PRINTMSG_INTERVAL *
-                            sn_rtc_cycles_per_second);
+                                "deactivate, timeout in %ld seconds\n",
+                                keep_waiting / (1000 * 5));
+                       wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
+                           1000 * 5;
                }
+
+               udelay(200);
        }
 }
 
@@ -1087,10 +1216,11 @@ xpc_die_disengage(void)
 static int
 xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 {
+#ifdef CONFIG_IA64             /* !!! temporary kludge */
        switch (event) {
        case DIE_MACHINE_RESTART:
        case DIE_MACHINE_HALT:
-               xpc_die_disengage();
+               xpc_die_deactivate();
                break;
 
        case DIE_KDEBUG_ENTER:
@@ -1101,8 +1231,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
                /* fall through */
        case DIE_MCA_MONARCH_ENTER:
        case DIE_INIT_MONARCH_ENTER:
-               xpc_vars->heartbeat++;
-               xpc_vars->heartbeat_offline = 1;
+               xpc_offline_heartbeat();
                break;
 
        case DIE_KDEBUG_LEAVE:
@@ -1113,10 +1242,12 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
                /* fall through */
        case DIE_MCA_MONARCH_LEAVE:
        case DIE_INIT_MONARCH_LEAVE:
-               xpc_vars->heartbeat++;
-               xpc_vars->heartbeat_offline = 0;
+               xpc_online_heartbeat();
                break;
        }
+#else
+       xpc_die_deactivate();
+#endif
 
        return NOTIFY_DONE;
 }
@@ -1125,105 +1256,52 @@ int __init
 xpc_init(void)
 {
        int ret;
-       short partid;
-       struct xpc_partition *part;
        struct task_struct *kthread;
-       size_t buf_size;
-
-       if (!ia64_platform_is("sn2"))
-               return -ENODEV;
-
-       buf_size = max(XPC_RP_VARS_SIZE,
-                      XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
-       xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
-                                                              GFP_KERNEL,
-                                                 &xpc_remote_copy_buffer_base);
-       if (xpc_remote_copy_buffer == NULL)
-               return -ENOMEM;
 
        snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
        snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
 
-       xpc_sysctl = register_sysctl_table(xpc_sys_dir);
-
-       /*
-        * The first few fields of each entry of xpc_partitions[] need to
-        * be initialized now so that calls to xpc_connect() and
-        * xpc_disconnect() can be made prior to the activation of any remote
-        * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
-        * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
-        * PARTITION HAS BEEN ACTIVATED.
-        */
-       for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
-               part = &xpc_partitions[partid];
-
-               DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
-
-               part->act_IRQ_rcvd = 0;
-               spin_lock_init(&part->act_lock);
-               part->act_state = XPC_P_INACTIVE;
-               XPC_SET_REASON(part, 0, 0);
+       if (is_shub()) {
+               /*
+                * The ia64-sn2 architecture supports at most 64 partitions.
+                * And the inability to unregister remote amos restricts us
+                * further to only support exactly 64 partitions on this
+                * architecture, no less.
+                */
+               if (xp_max_npartitions != 64) {
+                       dev_err(xpc_part, "max #of partitions not set to 64\n");
+                       ret = -EINVAL;
+               } else {
+                       ret = xpc_init_sn2();
+               }
 
-               init_timer(&part->disengage_request_timer);
-               part->disengage_request_timer.function =
-                   xpc_timeout_partition_disengage_request;
-               part->disengage_request_timer.data = (unsigned long)part;
+       } else if (is_uv()) {
+               ret = xpc_init_uv();
 
-               part->setup_state = XPC_P_UNSET;
-               init_waitqueue_head(&part->teardown_wq);
-               atomic_set(&part->references, 0);
+       } else {
+               ret = -ENODEV;
        }
 
-       /*
-        * Open up protections for IPI operations (and AMO operations on
-        * Shub 1.1 systems).
-        */
-       xpc_allow_IPI_ops();
-
-       /*
-        * Interrupts being processed will increment this atomic variable and
-        * awaken the heartbeat thread which will process the interrupts.
-        */
-       atomic_set(&xpc_act_IRQ_rcvd, 0);
+       if (ret != 0)
+               return ret;
 
-       /*
-        * This is safe to do before the xpc_hb_checker thread has started
-        * because the handler releases a wait queue.  If an interrupt is
-        * received before the thread is waiting, it will not go to sleep,
-        * but rather immediately process the interrupt.
-        */
-       ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
-                         "xpc hb", NULL);
+       ret = xpc_setup_partitions();
        if (ret != 0) {
-               dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
-                       "errno=%d\n", -ret);
-
-               xpc_restrict_IPI_ops();
-
-               if (xpc_sysctl)
-                       unregister_sysctl_table(xpc_sysctl);
-
-               kfree(xpc_remote_copy_buffer_base);
-               return -EBUSY;
+               dev_err(xpc_part, "can't get memory for partition structure\n");
+               goto out_1;
        }
 
+       xpc_sysctl = register_sysctl_table(xpc_sys_dir);
+
        /*
         * Fill the partition reserved page with the information needed by
         * other partitions to discover we are alive and establish initial
         * communications.
         */
-       xpc_rsvd_page = xpc_rsvd_page_init();
-       if (xpc_rsvd_page == NULL) {
-               dev_err(xpc_part, "could not setup our reserved page\n");
-
-               free_irq(SGI_XPC_ACTIVATE, NULL);
-               xpc_restrict_IPI_ops();
-
-               if (xpc_sysctl)
-                       unregister_sysctl_table(xpc_sysctl);
-
-               kfree(xpc_remote_copy_buffer_base);
-               return -EBUSY;
+       ret = xpc_setup_rsvd_page();
+       if (ret != 0) {
+               dev_err(xpc_part, "can't setup our reserved page\n");
+               goto out_2;
        }
 
        /* add ourselves to the reboot_notifier_list */
@@ -1236,9 +1314,6 @@ xpc_init(void)
        if (ret != 0)
                dev_warn(xpc_part, "can't register die notifier\n");
 
-       init_timer(&xpc_hb_timer);
-       xpc_hb_timer.function = xpc_hb_beater;
-
        /*
         * The real work-horse behind xpc.  This processes incoming
         * interrupts and monitors remote heartbeats.
@@ -1246,25 +1321,8 @@ xpc_init(void)
        kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
        if (IS_ERR(kthread)) {
                dev_err(xpc_part, "failed while forking hb check thread\n");
-
-               /* indicate to others that our reserved page is uninitialized */
-               xpc_rsvd_page->vars_pa = 0;
-
-               /* take ourselves off of the reboot_notifier_list */
-               (void)unregister_reboot_notifier(&xpc_reboot_notifier);
-
-               /* take ourselves off of the die_notifier list */
-               (void)unregister_die_notifier(&xpc_die_notifier);
-
-               del_timer_sync(&xpc_hb_timer);
-               free_irq(SGI_XPC_ACTIVATE, NULL);
-               xpc_restrict_IPI_ops();
-
-               if (xpc_sysctl)
-                       unregister_sysctl_table(xpc_sysctl);
-
-               kfree(xpc_remote_copy_buffer_base);
-               return -EBUSY;
+               ret = -EBUSY;
+               goto out_3;
        }
 
        /*
@@ -1286,11 +1344,28 @@ xpc_init(void)
 
        /* set the interface to point at XPC's functions */
        xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
-                         xpc_initiate_allocate, xpc_initiate_send,
-                         xpc_initiate_send_notify, xpc_initiate_received,
-                         xpc_initiate_partid_to_nasids);
+                         xpc_initiate_send, xpc_initiate_send_notify,
+                         xpc_initiate_received, xpc_initiate_partid_to_nasids);
 
        return 0;
+
+       /* initialization was not successful */
+out_3:
+       xpc_teardown_rsvd_page();
+
+       (void)unregister_die_notifier(&xpc_die_notifier);
+       (void)unregister_reboot_notifier(&xpc_reboot_notifier);
+out_2:
+       if (xpc_sysctl)
+               unregister_sysctl_table(xpc_sysctl);
+
+       xpc_teardown_partitions();
+out_1:
+       if (is_shub())
+               xpc_exit_sn2();
+       else
+               xpc_exit_uv();
+       return ret;
 }
 
 module_init(xpc_init);
@@ -1315,9 +1390,9 @@ module_param(xpc_hb_check_interval, int, 0);
 MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
                 "heartbeat checks.");
 
-module_param(xpc_disengage_request_timelimit, int, 0);
-MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
-                "for disengage request to complete.");
+module_param(xpc_disengage_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
+                "for disengage to complete.");
 
 module_param(xpc_kdebug_ignore, int, 0);
 MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "