import zipfile,re,xml.etree.ElementTree as ET,hashlib,json,shutil,subprocess
from pathlib import Path
import openpyxl
BASE=Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049/wanchuan_anhui_v9_v2_1_long_asset_R29_R58_asset_class_atomic_SAFE_20260513-085849.xlsx')
INP=Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049/wanchuan_anhui_v9_v2_1_long_asset_R29_R169_asset_schedule_atomic_EXCEL_REPAIR_SAFE_20260513-094726.xlsx')
ORIG=Path('/Users/neo/.hermes/cache/documents/doc_259cbd37e734_万传安徽_V9_v2.1_R66拆3sub+12sub行全拆开_20260512.xlsx')
OUTDIR=Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049')
STAMP='20260513-102004'
OUT=OUTDIR/f'wanchuan_anhui_v9_v2_1_long_asset_R29_R169_asset_schedule_atomic_EXCEL_STRICT_SAFE_{STAMP}.xlsx'
VERIFY=OUTDIR/f'phase12_excel_repair_strict_merged_followers_{STAMP}.md'
S='长期资产循环_全量穿透校验'
NS={'m':'http://schemas.openxmlformats.org/spreadsheetml/2006/main','r':'http://schemas.openxmlformats.org/officeDocument/2006/relationships'}

def sha(p):
    h=hashlib.sha256();
    with open(p,'rb') as f:
        for b in iter(lambda:f.read(1024*1024),b''): h.update(b)
    return h.hexdigest()
def locate(z):
    wb=ET.fromstring(z.read('xl/workbook.xml')); rels=ET.fromstring(z.read('xl/_rels/workbook.xml.rels'))
    ridmap={r.attrib['Id']:r.attrib['Target'] for r in rels}
    for sh in wb.find('m:sheets',NS):
        if sh.attrib.get('name')==S: return 'xl/'+ridmap[sh.attrib['{'+NS['r']+'}id']].lstrip('/')
    raise KeyError(S)
def colnum(col):
    n=0
    for ch in col: n=n*26+ord(ch)-64
    return n
def split(ref):
    m=re.match(r'([A-Z]+)(\d+)$',ref); return colnum(m.group(1)),int(m.group(2)),m.group(1)
def expand(rng):
    a,b=rng.split(':') if ':' in rng else (rng,rng); c1,r1,_=split(a); c2,r2,_=split(b); return c1,r1,c2,r2,a
def cell_pat():
    return r'<c\b(?=[^>]*\br="([A-Z]+\d+)")[^>]*/>|<c\b(?=[^>]*\br="([A-Z]+\d+)")[^>]*>.*?</c>'
def cells(xml):
    d={}
    for m in re.finditer(cell_pat(),xml,re.S): d[m.group(1) or m.group(2)]=m.group(0)
    return d
def get_xml(p):
    with zipfile.ZipFile(p) as z: return z.read(locate(z)).decode('utf-8')
def changed_merged_followers(xml, base_xml, min_row=64, max_row=180):
    cc=cells(xml); bc=cells(base_xml)
    merged=re.findall(r'<mergeCell\b[^>]*ref="([^"]+)"', base_xml)
    out=[]
    for rng in merged:
        c1,r1,c2,r2,top=expand(rng); tc,tr,_=split(top)
        if r2<min_row or r1>max_row: continue
        refs=set()
        for d in (cc,bc):
            for ref in d:
                c,r,_=split(ref)
                if c1<=c<=c2 and r1<=r<=r2 and not(c==tc and r==tr): refs.add(ref)
        for ref in sorted(refs,key=lambda x:split(x)[:2][::-1]):
            if cc.get(ref)!=bc.get(ref): out.append((ref,rng,bc.get(ref,''),cc.get(ref,'')))
    return out
def whole_merged_payload_violations(xml):
    merged=re.findall(r'<mergeCell\b[^>]*ref="([^"]+)"', xml)
    out=[]
    for m in re.finditer(cell_pat(),xml,re.S):
        ref=m.group(1) or m.group(2); cell=m.group(0); c,r,_=split(ref)
        if not any(x in cell for x in ['<f','<v','<is']): continue
        for rng in merged:
            c1,r1,c2,r2,top=expand(rng); tc,tr,_=split(top)
            if c1<=c<=c2 and r1<=r<=r2 and not(c==tc and r==tr):
                out.append((ref,rng,cell[:180])); break
    return out

def replace_or_insert_cell(xml, ref, base_cell):
    pat=cell_pat()
    found=False
    def repl(m):
        nonlocal found
        r=m.group(1) or m.group(2)
        if r==ref:
            found=True; return base_cell if base_cell is not None else ''
        return m.group(0)
    xml2=re.sub(pat,repl,xml,flags=re.S)
    if found or base_cell is None:
        return xml2
    # insert base cell in row order if missing in input but present in base
    col,row_letters=None,None
    cnum,row,col_letters=split(ref)
    row_pat=re.compile(r'(<row\b(?=[^>]*\br="'+str(row)+r'")[^>]*>)(.*?)(</row>)',re.S)
    mm=row_pat.search(xml2)
    if not mm: raise KeyError('row '+str(row))
    prefix,body,suffix=mm.groups(); insert_pos=len(body)
    for cm in re.finditer(cell_pat(),body,re.S):
        r=cm.group(1) or cm.group(2); cc,rr,_=split(r)
        if rr==row and cc>cnum:
            insert_pos=cm.start(); break
    new_body=body[:insert_pos]+base_cell+body[insert_pos:]
    return xml2[:mm.start()]+prefix+new_body+suffix+xml2[mm.end():]

base_xml=get_xml(BASE)
part=None
orig_before=sha(ORIG); base_before=sha(BASE); inp_before=sha(INP)
with zipfile.ZipFile(INP,'r') as zin:
    part=locate(zin)
    xml=zin.read(part).decode('utf-8')
    base_cells=cells(base_xml)
    before=changed_merged_followers(xml,base_xml)
    xml2=xml
    # exact revert all non-top-left merged followers in 64:180 to BASE cell fragment (including style s=2133) or absence.
    for ref,rng,base_cell,input_cell in before:
        xml2=replace_or_insert_cell(xml2,ref,base_cell if base_cell else None)
    after=changed_merged_followers(xml2,base_xml)
    payloads=whole_merged_payload_violations(xml2)
    ET.fromstring(xml2.encode('utf-8'))
    tmp=OUT.with_suffix('.tmp.xlsx')
    with zipfile.ZipFile(tmp,'w',compression=zipfile.ZIP_DEFLATED) as zout:
        for name in zin.namelist():
            data=zin.read(name)
            if name==part: data=xml2.encode('utf-8')
            zout.writestr(zin.getinfo(name),data)
    if OUT.exists(): OUT.unlink()
    tmp.replace(OUT)

with zipfile.ZipFile(OUT) as z:
    zip_test=z.testzip(); out_xml=z.read(part).decode('utf-8')
    calc_present='xl/calcChain.xml' in z.namelist()
    prefixes=all(x in out_xml for x in ['mc:Ignorable="x14ac xr xr2 xr3"','xmlns:x14ac=','xmlns:xr=','xmlns:xr2=','xmlns:xr3='])
    refs=re.findall(r'<c\b[^>]*\br="([A-Z]+\d+)"',out_xml)
    dup_count=sum(1 for k,v in __import__('collections').Counter(refs).items() if v>1)
    shared=[]
    for m in re.finditer(r'<c\b(?=[^>]*\br="([A-Z]+)(\d+)")[^>]*>.*?<f\b[^>]*\bt="shared"[^>]*>.*?</c>',out_xml,re.S):
        row=int(m.group(2))
        if 64<=row<=180: shared.append(m.group(1)+m.group(2))
    payloads_after=whole_merged_payload_violations(out_xml)
    changed_followers_after=changed_merged_followers(out_xml,base_xml)
changed_parts=[]
with zipfile.ZipFile(INP) as z1, zipfile.ZipFile(OUT) as z2:
    for n in sorted(set(z1.namelist())|set(z2.namelist())):
        d1=z1.read(n) if n in z1.namelist() else None
        d2=z2.read(n) if n in z2.namelist() else None
        if d1!=d2: changed_parts.append(n)
rows={}; load_formula_ok=load_value_ok=False
try:
    wbf=openpyxl.load_workbook(OUT,data_only=False,read_only=False,keep_links=True); load_formula_ok=True
except Exception as e: formula_err=repr(e)
try:
    wbv=openpyxl.load_workbook(OUT,data_only=True,read_only=False,keep_links=True); load_value_ok=True
    ws=wbv[S]; wsf=wbf[S] if load_formula_ok else None
    for r in [66,71,105,112,137,169,171,172,32,33,45,49,50,56,57,58]:
        rows[r]={'K':ws[f'K{r}'].value,'L':ws[f'L{r}'].value,'M':ws[f'M{r}'].value,'N':ws[f'N{r}'].value,'K_formula':wsf[f'K{r}'].value if wsf else None}
except Exception as e: value_err=repr(e)
lo_status='SKIP'
try:
    lo_dir=OUTDIR/f'lo_phase12_{STAMP}'
    if lo_dir.exists(): shutil.rmtree(lo_dir)
    lo_dir.mkdir(parents=True,exist_ok=True)
    soffice=shutil.which('soffice') or shutil.which('libreoffice') or '/Applications/LibreOffice.app/Contents/MacOS/soffice'
    if soffice and Path(soffice).exists():
        cp=subprocess.run([soffice,'--headless','--convert-to','xlsx','--outdir',str(lo_dir),str(OUT)],stdout=subprocess.PIPE,stderr=subprocess.PIPE,text=True,timeout=120)
        lo_status=f'exit={cp.returncode}; stdout={cp.stdout.strip()}; stderr={cp.stderr.strip()}'
        cands=list(lo_dir.glob('*.xlsx'))
        if cands:
            with zipfile.ZipFile(cands[0]) as z: lo_status += f'; roundtrip_zip_test={z.testzip()}'
except Exception as e: lo_status='ERROR '+repr(e)
main_rows_pass=all(rows.get(r,{}).get('N')=='✓' and abs(float(rows.get(r,{}).get('M') or 0))<0.001 for r in [66,71,105,112,137,169,171,172])
verify={
 'input':str(INP),'output':str(OUT),'verify':str(VERIFY),'worksheet_part':part,
 'root_cause_refined':'Phase11 removed payload but left merged follower style changes (M/N in A92:N92 etc changed from base s=2133 to s=13). This strict build restores every non-top-left merged follower in R64:R180 exactly to the base workbook fragment, including style IDs.',
 'changed_merged_followers_vs_base_before_count':len(before),'changed_merged_followers_vs_base_before':before,
 'changed_merged_followers_vs_base_after_count':len(changed_followers_after),'changed_merged_followers_vs_base_after':changed_followers_after,
 'merged_payload_violations_whole_sheet_after_count':len(payloads_after),'merged_payload_violations_whole_sheet_after':payloads_after[:20],
 'duplicate_cell_refs_sheet26':dup_count,'zip_testzip':zip_test,'calcChain_present':calc_present,'prefixes_preserved':prefixes,'shared_formula_cells_R64_R180':shared,
 'changed_parts':changed_parts,'orig_sha_before_after_same':orig_before==sha(ORIG),'base_sha_before_after_same':base_before==sha(BASE),'input_sha_before_after_same':inp_before==sha(INP),
 'openpyxl_formula_ok':load_formula_ok,'openpyxl_value_ok':load_value_ok,'main_rows_pass':main_rows_pass,'rows':rows,'libreoffice_smoke':lo_status
}
lines=[]
lines.append(f'# Phase 12 strict Excel repair attempt ({STAMP})')
lines.append('')
lines.append('## Refined root cause')
lines.append('上一版只删除了 merged follower 的 `inlineStr` payload，但仍把部分合并区非左上角 follower 的 style 从基准文件的 `s="2133"` 改成了 `s="13"`。Excel 仍然报 `Removed Records: Cell information`，说明在这类复杂 sheet 中，**merged follower 不仅不能有 payload，连 cell fragment/style 也应尽量保持原 workbook 原样**。')
lines.append('')
lines.append('本版把 `R64:R180` 内所有合并区非左上角 cell 与 Phase9 基准逐个比较，并把差异全部恢复成基准文件原始 fragment。')
lines.append('')
lines.append('## Output')
lines.append('- `'+str(OUT)+'`')
lines.append('')
lines.append('## Verification')
for k in ['changed_merged_followers_vs_base_before_count','changed_merged_followers_vs_base_after_count','merged_payload_violations_whole_sheet_after_count','duplicate_cell_refs_sheet26','zip_testzip','calcChain_present','prefixes_preserved','shared_formula_cells_R64_R180','changed_parts','orig_sha_before_after_same','base_sha_before_after_same','input_sha_before_after_same','openpyxl_formula_ok','openpyxl_value_ok','main_rows_pass','libreoffice_smoke']:
    lines.append(f'- `{k}`: `{verify[k]}`')
lines.append('')
lines.append('## Main row closure')
lines.append('| Row | K | L | M | N |')
lines.append('|---:|---:|---:|---:|:--:|')
for r in [66,71,105,112,137,169,171,172,32,33,45,49,50,56,57,58]:
    rc=rows.get(r,{})
    lines.append(f"| {r} | {rc.get('K')} | {rc.get('L')} | {rc.get('M')} | {rc.get('N')} |")
lines.append('')
lines.append('## JSON')
lines.append('```json')
lines.append(json.dumps(verify,ensure_ascii=False,indent=2,default=str))
lines.append('```')
VERIFY.write_text('\n'.join(lines),encoding='utf-8')
print(json.dumps(verify,ensure_ascii=False,indent=2,default=str))
