+#ifndef MODULE
+#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
+#define PER_CPU_ATTRIBUTES
+#else
+/*
+ * To calculate addresses of locally defined variables, GCC uses 32-bit
+ * displacement from the GP. Which doesn't work for per cpu variables in
+ * modules, as an offset to the kernel per cpu area is way above 4G.
+ *
+ * This forces allocation of a GOT entry for per cpu variable using
+ * ldq instruction with a 'literal' relocation.
+ */
+#define SHIFT_PERCPU_PTR(var, offset) ({ \
+ extern int simple_identifier_##var(void); \
+ unsigned long __ptr, tmp_gp; \
+ asm ( "br %1, 1f \n\
+ 1: ldgp %1, 0(%1) \n\
+ ldq %0, per_cpu__" #var"(%1)\t!literal" \
+ : "=&r"(__ptr), "=&r"(tmp_gp)); \
+ (typeof(&per_cpu_var(var)))(__ptr + (offset)); })
+
+#define PER_CPU_ATTRIBUTES __used
+
+#endif /* MODULE */
+
+/*
+ * A percpu variable may point to a discarded regions. The following are
+ * established ways to produce a usable pointer from the percpu variable
+ * offset.
+ */
+#define per_cpu(var, cpu) \
+ (*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu)))
+#define __get_cpu_var(var) \
+ (*SHIFT_PERCPU_PTR(var, my_cpu_offset))
+#define __raw_get_cpu_var(var) \
+ (*SHIFT_PERCPU_PTR(var, __my_cpu_offset))
+
+#else /* ! SMP */
+
+#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var)))
+#define __get_cpu_var(var) per_cpu_var(var)
+#define __raw_get_cpu_var(var) per_cpu_var(var)
+
+#define PER_CPU_ATTRIBUTES
+
+#endif /* SMP */