# quick whole-sheet checks for duplicate cells, order, merged payloads
import zipfile,re,xml.etree.ElementTree as ET,collections
from pathlib import Path
p=Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049/wanchuan_anhui_v9_v2_1_long_asset_R29_R169_asset_schedule_atomic_EXCEL_REPAIR_SAFE_20260513-094726.xlsx')
S='长期资产循环_全量穿透校验'; NS={'m':'http://schemas.openxmlformats.org/spreadsheetml/2006/main','r':'http://schemas.openxmlformats.org/officeDocument/2006/relationships'}
def locate(z):
    wb=ET.fromstring(z.read('xl/workbook.xml')); rels=ET.fromstring(z.read('xl/_rels/workbook.xml.rels'))
    ridmap={r.attrib['Id']:r.attrib['Target'] for r in rels}
    for sh in wb.find('m:sheets',NS):
        if sh.attrib.get('name')==S: return 'xl/'+ridmap[sh.attrib['{'+NS['r']+'}id']].lstrip('/')
def colnum(col):
    n=0
    for ch in col: n=n*26+ord(ch)-64
    return n
def split(ref):
    m=re.match(r'([A-Z]+)(\d+)',ref); return colnum(m.group(1)),int(m.group(2)),m.group(1)
def expand(rng):
    a,b=rng.split(':') if ':' in rng else (rng,rng); c1,r1,_=split(a); c2,r2,_=split(b); return c1,r1,c2,r2,a
with zipfile.ZipFile(p) as z:
    xml=z.read(locate(z)).decode('utf-8')
refs=re.findall(r'<c\b[^>]*\br="([A-Z]+\d+)"',xml); cnt=collections.Counter(refs)
print('dups',[(r,n) for r,n in cnt.items() if n>1][:20], 'count',sum(1 for n in cnt.values() if n>1))
# merged payloads whole sheet
viol=[]
merged=re.findall(r'<mergeCell\b[^>]*ref="([^"]+)"', xml)
cell_pat = r'<c\b(?=[^>]*\br="([A-Z]+\d+)")[^>]*/>|<c\b(?=[^>]*\br="([A-Z]+\d+)")[^>]*>.*?</c>'
for cm in re.finditer(cell_pat, xml, re.S):
    ref=cm.group(1) or cm.group(2); c,r,_=split(ref); cell=cm.group(0)
    if not any(x in cell for x in ['<f','<v','<is']): continue
    for rng in merged:
        c1,r1,c2,r2,top=expand(rng); tc,tr,_=split(top)
        if c1<=c<=c2 and r1<=r<=r2 and not(c==tc and r==tr):
            viol.append((ref,rng,cell[:120])); break
print('merged_payload_violations_whole_sheet',len(viol))
for v in viol[:50]: print(v)
