# quick whole-sheet checks for duplicate cells, order, merged payloads import zipfile,re,xml.etree.ElementTree as ET,collections from pathlib import Path p=Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049/wanchuan_anhui_v9_v2_1_long_asset_R29_R169_asset_schedule_atomic_EXCEL_REPAIR_SAFE_20260513-094726.xlsx') S='长期资产循环_全量穿透校验'; NS={'m':'http://schemas.openxmlformats.org/spreadsheetml/2006/main','r':'http://schemas.openxmlformats.org/officeDocument/2006/relationships'} def locate(z): wb=ET.fromstring(z.read('xl/workbook.xml')); rels=ET.fromstring(z.read('xl/_rels/workbook.xml.rels')) ridmap={r.attrib['Id']:r.attrib['Target'] for r in rels} for sh in wb.find('m:sheets',NS): if sh.attrib.get('name')==S: return 'xl/'+ridmap[sh.attrib['{'+NS['r']+'}id']].lstrip('/') def colnum(col): n=0 for ch in col: n=n*26+ord(ch)-64 return n def split(ref): m=re.match(r'([A-Z]+)(\d+)',ref); return colnum(m.group(1)),int(m.group(2)),m.group(1) def expand(rng): a,b=rng.split(':') if ':' in rng else (rng,rng); c1,r1,_=split(a); c2,r2,_=split(b); return c1,r1,c2,r2,a with zipfile.ZipFile(p) as z: xml=z.read(locate(z)).decode('utf-8') refs=re.findall(r']*\br="([A-Z]+\d+)"',xml); cnt=collections.Counter(refs) print('dups',[(r,n) for r,n in cnt.items() if n>1][:20], 'count',sum(1 for n in cnt.values() if n>1)) # merged payloads whole sheet viol=[] merged=re.findall(r']*ref="([^"]+)"', xml) cell_pat = r']*\br="([A-Z]+\d+)")[^>]*/>|]*\br="([A-Z]+\d+)")[^>]*>.*?' for cm in re.finditer(cell_pat, xml, re.S): ref=cm.group(1) or cm.group(2); c,r,_=split(ref); cell=cm.group(0) if not any(x in cell for x in ['