import zipfile,re,xml.etree.ElementTree as ET,json
from pathlib import Path
bad=Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049/wanchuan_anhui_v9_v2_1_long_asset_R29_R169_asset_schedule_atomic_SAFE_20260513-092323.xlsx')
base=Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049/wanchuan_anhui_v9_v2_1_long_asset_R29_R58_asset_class_atomic_SAFE_20260513-085849.xlsx')
SHEET='长期资产循环_全量穿透校验'
NS={'m':'http://schemas.openxmlformats.org/spreadsheetml/2006/main','r':'http://schemas.openxmlformats.org/officeDocument/2006/relationships'}
def locate(z):
    wb=ET.fromstring(z.read('xl/workbook.xml')); rels=ET.fromstring(z.read('xl/_rels/workbook.xml.rels'))
    ridmap={r.attrib['Id']:r.attrib['Target'] for r in rels}
    for sh in wb.find('m:sheets',NS):
        if sh.attrib.get('name')==SHEET: return 'xl/'+ridmap[sh.attrib['{'+NS['r']+'}id']].lstrip('/')

def colnum(col):
    n=0
    for ch in col: n=n*26+ord(ch)-64
    return n
def split(ref):
    m=re.match(r'([A-Z]+)(\d+)',ref); return colnum(m.group(1)),int(m.group(2)),m.group(1)
def expand_range(rng):
    a,b=rng.split(':') if ':' in rng else (rng,rng)
    c1,r1,_=split(a); c2,r2,_=split(b)
    return c1,r1,c2,r2
def cells(xml):
    return {m.group(1):m.group(0) for m in re.finditer(r'<c\b(?=[^>]*\br="([A-Z]+\d+)")[^>]*(?:/>|>.*?</c>)', xml, re.S)}
with zipfile.ZipFile(base) as z: base_xml=z.read(locate(z)).decode('utf-8')
with zipfile.ZipFile(bad) as z: bad_xml=z.read(locate(z)).decode('utf-8')
# changed cell refs
bc=cells(base_xml); dc=cells(bad_xml)
changed=[r for r in sorted(set(bc)|set(dc), key=lambda x:(split(x)[1],split(x)[0])) if bc.get(r)!=dc.get(r)]
# merged ranges
merged=re.findall(r'<mergeCell\b[^>]*ref="([^"]+)"', bad_xml)
viol=[]
for rng in merged:
    c1,r1,c2,r2=expand_range(rng)
    if c1==c2 and r1==r2: continue
    top=(c1,r1)
    for ref in changed:
        c,r,_=split(ref)
        if c1<=c<=c2 and r1<=r<=r2 and (c,r)!=top:
            viol.append((ref,rng,dc.get(ref,'')[:240],bc.get(ref,'')[:160]))
print('changed count',len(changed))
print('merged ranges count',len(merged))
print('changed non-top-left merged violations',len(viol))
for v in viol[:200]: print('\nVIOL',v[0], 'in', v[1], '\n bad=',v[2], '\n base=',v[3])
print('\nmerged ranges 64-180:')
for rng in merged:
    c1,r1,c2,r2=expand_range(rng)
    if r2>=64 and r1<=180:
        print(rng)
