X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fmca_drv.c;h=fab1d21a4f2c1cfe4443aa8812518d4a6bfeaeb6;hb=99c3563e64cf887cca0b181149c0f85c39569276;hp=a45009d2bc90149e5199a97c9eff86a8d552f5b0;hpb=ebdea46fecae40c4d7effcd33f40918a37a1df4b;p=linux-2.6-omap-h63xx.git diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index a45009d2bc9..fab1d21a4f2 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -3,7 +3,7 @@ * Purpose: Generic MCA handling layer * * Copyright (C) 2004 FUJITSU LIMITED - * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) + * Copyright (C) 2004 Hidetoshi Seto * Copyright (C) 2005 Silicon Graphics, Inc * Copyright (C) 2005 Keith Owens * Copyright (C) 2006 Russ Anderson @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -434,6 +433,50 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci, return MCA_IS_GLOBAL; } +/** + * get_target_identifier - Get the valid Cache or Bus check target identifier. + * @peidx: pointer of index of processor error section + * + * Return value: + * target address on Success / 0 on Failure + */ +static u64 +get_target_identifier(peidx_table_t *peidx) +{ + u64 target_address = 0; + sal_log_mod_error_info_t *smei; + pal_cache_check_info_t *pcci; + int i, level = 9; + + /* + * Look through the cache checks for a valid target identifier + * If more than one valid target identifier, return the one + * with the lowest cache level. + */ + for (i = 0; i < peidx_cache_check_num(peidx); i++) { + smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i); + if (smei->valid.target_identifier && smei->target_identifier) { + pcci = (pal_cache_check_info_t *)&(smei->check_info); + if (!target_address || (pcci->level < level)) { + target_address = smei->target_identifier; + level = pcci->level; + continue; + } + } + } + if (target_address) + return target_address; + + /* + * Look at the bus check for a valid target identifier + */ + smei = peidx_bus_check(peidx, 0); + if (smei && smei->valid.target_identifier) + return smei->target_identifier; + + return 0; +} + /** * recover_from_read_error - Try to recover the errors which type are "read"s. * @slidx: pointer of index of SAL error record @@ -450,13 +493,14 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci, struct ia64_sal_os_state *sos) { - sal_log_mod_error_info_t *smei; + u64 target_identifier; pal_min_state_area_t *pmsa; struct ia64_psr *psr1, *psr2; ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; /* Is target address valid? */ - if (!pbci->tv) + target_identifier = get_target_identifier(peidx); + if (!target_identifier) return fatal_mca("target address not valid"); /* @@ -487,32 +531,28 @@ recover_from_read_error(slidx_table_t *slidx, pmsa = sos->pal_min_state; if (psr1->cpl != 0 || ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) { - smei = peidx_bus_check(peidx, 0); - if (smei->valid.target_identifier) { - /* - * setup for resume to bottom half of MCA, - * "mca_handler_bhhook" - */ - /* pass to bhhook as argument (gr8, ...) */ - pmsa->pmsa_gr[8-1] = smei->target_identifier; - pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; - pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; - /* set interrupted return address (but no use) */ - pmsa->pmsa_br0 = pmsa->pmsa_iip; - /* change resume address to bottom half */ - pmsa->pmsa_iip = mca_hdlr_bh->fp; - pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; - /* set cpl with kernel mode */ - psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; - psr2->cpl = 0; - psr2->ri = 0; - psr2->bn = 1; - psr2->i = 0; - - return mca_recovered("user memory corruption. " + /* + * setup for resume to bottom half of MCA, + * "mca_handler_bhhook" + */ + /* pass to bhhook as argument (gr8, ...) */ + pmsa->pmsa_gr[8-1] = target_identifier; + pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; + pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; + /* set interrupted return address (but no use) */ + pmsa->pmsa_br0 = pmsa->pmsa_iip; + /* change resume address to bottom half */ + pmsa->pmsa_iip = mca_hdlr_bh->fp; + pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; + /* set cpl with kernel mode */ + psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; + psr2->cpl = 0; + psr2->ri = 0; + psr2->bn = 1; + psr2->i = 0; + + return mca_recovered("user memory corruption. " "kill affected process - recovered."); - } - } return fatal_mca("kernel context not recovered, iip 0x%lx\n", @@ -561,11 +601,40 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, default: break; } + } else if (psp->cc && !psp->bc) { /* Cache error */ + status = recover_from_read_error(slidx, peidx, pbci, sos); } return status; } +/* + * recover_from_tlb_check + * @peidx: pointer of index of processor error section + * + * Return value: + * 1 on Success / 0 on Failure + */ +static int +recover_from_tlb_check(peidx_table_t *peidx) +{ + sal_log_mod_error_info_t *smei; + pal_tlb_check_info_t *ptci; + + smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0); + ptci = (pal_tlb_check_info_t *)&(smei->check_info); + + /* + * Look for signature of a duplicate TLB DTC entry, which is + * a SW bug and always fatal. + */ + if (ptci->op == PAL_TLB_CHECK_OP_PURGE + && !(ptci->itr || ptci->dtc || ptci->itc)) + return fatal_mca("Duplicate TLB entry"); + + return mca_recovered("TLB check recovered"); +} + /** * recover_from_processor_error * @platform: whether there are some platform error section or not @@ -577,13 +646,6 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, * Return value: * 1 on Success / 0 on Failure */ -/* - * Later we try to recover when below all conditions are satisfied. - * 1. Only one processor error section is exist. - * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK) - * 3. The entry of BUS_CHECK_INFO is 1. - * 4. "External bus error" flag is set and the others are not set. - */ static int recover_from_processor_error(int platform, slidx_table_t *slidx, @@ -610,39 +672,40 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, if (psp->us || psp->ci == 0) return fatal_mca("error not contained"); + /* + * Look for recoverable TLB check + */ + if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) + return recover_from_tlb_check(peidx); + /* * The cache check and bus check bits have four possible states * cc bc - * 0 0 Weird record, not recovered - * 1 0 Cache error, not recovered - * 0 1 I/O error, attempt recovery * 1 1 Memory error, attempt recovery + * 1 0 Cache error, attempt recovery + * 0 1 I/O error, attempt recovery + * 0 0 Other error type, not recovered */ - if (psp->bc == 0 || pbci == NULL) - return fatal_mca("No bus check"); + if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL)) + return fatal_mca("No cache or bus check"); /* - * Sorry, we cannot handle so many. + * Cannot handle more than one bus check. */ if (peidx_bus_check_num(peidx) > 1) return fatal_mca("Too many bus checks"); - /* - * Well, here is only one bus error. - */ + if (pbci->ib) return fatal_mca("Internal Bus error"); - if (pbci->cc) - return fatal_mca("Cache-cache error"); if (pbci->eb && pbci->bsi > 0) return fatal_mca("External bus check fatal status"); /* - * This is a local MCA and estimated as recoverble external bus error. - * (e.g. a load from poisoned memory) - * This means "there are some platform errors". + * This is a local MCA and estimated as a recoverable error. */ if (platform) return recover_from_platform_error(slidx, peidx, pbci, sos); + /* * On account of strange SAL error record, we cannot recover. */