return GRU_HANDLE_BYTES;
}
-/* rewrite in assembly & use lots of prefetch */
-static void gru_load_context_data(void *save, void *grubase, int ctxnum,
- unsigned long cbrmap, unsigned long dsrmap)
+static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap,
+ unsigned long length)
{
- void *gseg, *cb, *cbe;
- unsigned long length;
int i, scr;
- gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
- length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
GRU_CACHE_LINE_BYTES);
- cb = gseg + GRU_CB_BASE;
- cbe = grubase + GRU_CBE_BASE;
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
GRU_CACHE_LINE_BYTES);
cb += GRU_HANDLE_STRIDE;
}
+}
+
+static void gru_load_context_data(void *save, void *grubase, int ctxnum,
+ unsigned long cbrmap, unsigned long dsrmap)
+{
+ void *gseg, *cb, *cbe;
+ unsigned long length;
+ int i, scr;
+ gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
cb = gseg + GRU_CB_BASE;
+ cbe = grubase + GRU_CBE_BASE;
+ length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+ gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
save += gru_copy_handle(cb, save);
save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
int i, scr;
gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
-
cb = gseg + GRU_CB_BASE;
cbe = grubase + GRU_CBE_BASE;
+ length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+ gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
save += gru_copy_handle(save, cb);
save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
cb += GRU_HANDLE_STRIDE;
}
- length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
memcpy(save, gseg + GRU_DS_BASE, length);
}