from __future__ import annotations

from pathlib import Path
import zipfile, re, html, shutil, hashlib, json, math
from collections import defaultdict
import openpyxl
from openpyxl.utils import get_column_letter, column_index_from_string

SRC = Path('/Users/neo/.hermes/cache/documents/doc_1d136388d7ce_wanchuan_anhui_v9_v2_1_long_asset_R29_R169_asset_schedule_atomic_EXCEL_REPAIR_SAFE_20260513-094726v1.xlsx')
TRACE = Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049/cashflow_D38_D47_full_trace_workpaper_20260513-111246.xlsx')
OUTDIR = Path('/Users/neo/.hermes/audits/cashflow-asset-hook-20260512-083049')
STAMP = '20260513-121715'
OUT = OUTDIR / f'wanchuan_anhui_v9_v2_1_long_asset_CF_bottom_wide_schemeB_{STAMP}.xlsx'
REPORT_JSON = OUTDIR / f'wide_schemeB_generation_summary_{STAMP}.json'

TARGET_SHEET = '长期资产循环_全量穿透校验'
WIDE_SHEET = '长期资产循环_CF底层宽表'
MAP_SHEET = 'CF底层公式映射明细'

# Existing style ids observed in the uploaded Excel-saved workbook. They are reused only for readability;
# no merged ranges are added in the new sheets.
STYLE_TITLE = 2115
STYLE_HEADER = 367
STYLE_NUM = 267
STYLE_TEXT = 5
STYLE_CHECK = 367
STYLE_DEFAULT = 0

# Load values/formulas from source and trace.
wbf = openpyxl.load_workbook(SRC, data_only=False, read_only=True)
wbv = openpyxl.load_workbook(SRC, data_only=True, read_only=True)
wsf = wbf[TARGET_SHEET]
wsv = wbv[TARGET_SHEET]

# Trace workbook used for helper sheet only.
trace_wb = openpyxl.load_workbook(TRACE, data_only=True, read_only=True)
trace_ws = trace_wb['02_全量明细_长表']

# New wide columns: each original asset category has 账面 / CF底层 / 差异.
CATS = [
    ('fixed', '固定资产', 'C', 'C', 'D', 'E'),
    ('cip', '在建工程', 'D', 'F', 'G', 'H'),
    ('intang', '无形资产', 'E', 'I', 'J', 'K'),
    ('deferred', '长期待摊费用', 'F', 'L', 'M', 'N'),
    ('iprop', '投资性房地产', 'G', 'O', 'P', 'Q'),
    ('rou', '使用权资产', 'H', 'R', 'S', 'T'),
    ('dev', '开发支出', 'I', 'U', 'V', 'W'),
    ('other', '其他/抵消', 'J', 'X', 'Y', 'Z'),
]
CAT_BY_KEY = {k: (name, orig, book, cf, diff) for k, name, orig, book, cf, diff in CATS}
BOOK_COLS = [book for _, _, _, book, _, _ in CATS]
CF_COLS = [cf for _, _, _, _, cf, _ in CATS]
DIFF_COLS = [diff for _, _, _, _, _, diff in CATS]

MAIN_BENCH = {
    66: 'D39',
    71: 'D40',
    104: 'D41',
    111: 'D42',
    136: 'D43',
    169: 'D44',
    171: 'D46',
    172: 'D47',
}
SECTION_ROWS = {61, 62, 63, 65, 70, 72, 91, 103, 110, 112, 125, 135, 137, 147, 159, 168, 170, 173, 174, 175, 176, 177, 178, 180}
DISPLAY_ROWS = set(range(93, 103)) | set(range(105, 109)) | {113,114} | set(range(116,135)) | {139} | set(range(149,159)) | set(range(161,168))


def qsheet(name: str) -> str:
    return "'" + name.replace("'", "''") + "'"


def fref(sheet: str, cell: str) -> str:
    return f"{qsheet(sheet)}!{cell}"


def nref(sheet: str, cell: str) -> str:
    return f"N({fref(sheet, cell)})"


def quote_formula(formula: str | None) -> str | None:
    if formula is None:
        return None
    formula = str(formula)
    if not formula:
        return None
    return formula[1:] if formula.startswith('=') else formula


def get_cell_value(sheet: str, cell: str):
    try:
        return wbv[sheet][cell].value
    except Exception:
        return None


def to_num(v) -> float:
    if v is None:
        return 0.0
    if isinstance(v, bool):
        return 1.0 if v else 0.0
    if isinstance(v, (int, float)) and not isinstance(v, bool):
        if math.isnan(v) if isinstance(v, float) else False:
            return 0.0
        return float(v)
    try:
        if isinstance(v, str):
            s = v.strip().replace(',', '')
            if s in ('', '—', '-', '✓', '✗', 'DISPLAY'):
                return 0.0
            return float(s)
    except Exception:
        return 0.0
    return 0.0

# Formulas in CF底层 columns by row/category. Formulas are stored without leading '='.
cf_formula = defaultdict(dict)
cf_note = {}

def setcf(row: int, cat: str, formula: str):
    cf_formula[row][cat] = quote_formula(formula)

# D39 年初数: bottom-ish cash-flow chain by asset class.
setcf(67, 'fixed', f"={fref('数据源_CF小合并','F49')}")
setcf(67, 'cip', f"={fref('在建工程情况表','B24')}")
setcf(67, 'intang', f"={fref('数据源_CF小合并','F60')}")
setcf(67, 'deferred', f"={fref('数据源_CF小合并','F68')}")
setcf(67, 'iprop', f"={fref('试算表','B38')}+{fref('试算表','B39')}")
setcf(67, 'rou', f"={fref('数据源_CF小合并','F57')}")
setcf(67, 'dev', f"={fref('数据源_CF小合并','F64')}")
setcf(67, 'other', f"={fref('附注','F364')}")
setcf(68, 'fixed', f"=-{fref('数据源_CF小合并','F50')}")
setcf(68, 'intang', f"=-{fref('数据源_CF小合并','F61')}")
setcf(68, 'iprop', f"=-{fref('试算表','B40')}")
setcf(68, 'rou', f"=-{fref('数据源_CF小合并','F58')}")
setcf(69, 'fixed', f"=-{fref('数据源_CF小合并','F51')}")
setcf(69, 'cip', f"=-{fref('数据源_CF小合并','F55')}")
setcf(69, 'intang', f"=-{fref('试算表','B69')}")
setcf(69, 'iprop', f"=-{fref('试算表','B41')}")
setcf(69, 'rou', f"=-{fref('试算表','B65')}")
for cat, (_, _, _, cfcol, _) in CAT_BY_KEY.items():
    setcf(66, cat, f"={cfcol}67+{cfcol}68+{cfcol}69")
cf_note[66] = 'D39年初数：CF底层为试算表/数据源/附注拆到资产类别层。'
cf_note[67] = 'D39原值层：固定/在建/无形/长摊/投房/使权/开发/其他按CF底层来源列示。'
cf_note[68] = 'D39累计折旧/摊销层：按CF贡献方向为负。'
cf_note[69] = 'D39减值准备层：按CF贡献方向为负。'

# D40 本年增加: natural cash-flow split.
setcf(73, 'fixed', f"={fref('固定资产','H7')}")
setcf(73, 'cip', f"={fref('在建工程情况表','D24')}")
setcf(73, 'intang', f"={fref('无形资产变动表','I7')}")
setcf(73, 'deferred', f"={fref('长期待摊费用变动明细表','F17')}")
setcf(73, 'iprop', f"={fref('投资性房地产','D8')}")
setcf(73, 'rou', "=0")
setcf(73, 'other', f"=-{fref('在建工程情况表','Q24')}")
setcf(74, 'dev', f"={fref('附注','C1205')}")
setcf(75, 'other', f"=-{fref('附注','G1205')}")
setcf(76, 'other', f"={nref('现金流量表底稿','C240')}-{nref('附注','C691')}")
setcf(77, 'cip', f"=-{fref('在建工程情况表','E24')}")
setcf(77, 'other', f"={fref('在建工程情况表','E24')}")
setcf(78, 'fixed', f"={fref('附注','E1224')}")
setcf(78, 'intang', f"=-N({fref('附注','E1224')})")
setcf(79, 'fixed', f"=-{fref('附注','F1224')}")
setcf(79, 'deferred', f"=N({fref('附注','F1224')})")
setcf(80, 'fixed', f"={fref('附注','C1239')}")
setcf(80, 'intang', f"=-N({fref('附注','C1239')})")
setcf(81, 'fixed', f"=-{fref('附注','B1239')}")
setcf(81, 'deferred', f"=N({fref('附注','B1239')})")
setcf(82, 'other', f"=N({fref('附注','E364')})")
setcf(83, 'other', f"=-N({fref('附注','F364')})")
setcf(84, 'fixed', f"={fref('附注','D756')}")
setcf(85, 'fixed', f"=-{fref('附注','C756')}")
setcf(86, 'rou', f"=-{fref('长期应付款明细','G23')}")
setcf(87, 'rou', f"={fref('长期应付款明细','H23')}")
setcf(88, 'other', f"=-{fref('附注','H1244')}")
setcf(89, 'other', f"={fref('附注','I1244')}")
setcf(90, 'rou', f"={fref('现金流量表底稿','C248')}")
for cat, (_, _, _, cfcol, _) in CAT_BY_KEY.items():
    setcf(71, cat, f"=SUM({cfcol}73:{cfcol}90)")
for r in range(73, 91):
    cf_note[r] = 'D40现金流自然拆；CF底层列按现金流量表D40 -> D28/C251/C238:C250链条列示。'
cf_note[71] = 'D40主行：CF底层合计只汇总R73:R90自然现金流拆分；R93:R102为资产展示段，不重复进CF底层。'

# D41: cash-flow D41 is zero/blank in source.
for cat in CAT_BY_KEY:
    setcf(104, cat, '=0')
    setcf(109, cat, '=0')
cf_note[104] = 'D41源现金流为0；资产展示行不直接进入CF底层。'
cf_note[109] = '综合调整：CF底层按0列示。'

# D42: current-period depreciation/amortization only, signed as cash-flow contribution.
setcf(115, 'fixed', f"=-{fref('固定资产','H30')}")
setcf(115, 'intang', f"=-{fref('无形资产变动表','I27')}")
setcf(115, 'deferred', f"=-{fref('长期待摊费用变动明细表','L17')}")
setcf(115, 'iprop', f"=-{fref('投资性房地产','D29')}")
setcf(115, 'rou', f"=-{fref('使用权资产 ','G21')}")
for cat, (_, _, _, cfcol, _) in CAT_BY_KEY.items():
    setcf(111, cat, f"={cfcol}115")
cf_note[111] = 'D42主行：CF底层为本期计提折旧/摊销的负向贡献。'
cf_note[115] = 'D42本期计提：固定H30/无形I27/长摊L17/投房D29/使用权G21，按现金流D42方向为负。'

# D43: disposal/decrease chain, signed as D43 formula contribution.
setcf(138, 'fixed', f"=-{fref('固定资产','H15')}")
setcf(138, 'intang', f"=-{fref('无形资产变动表','I15')}")
setcf(138, 'deferred', f"=-{nref('现金流量表底稿','C193')}")
setcf(138, 'iprop', f"=-{fref('投资性房地产','D15')}")
setcf(138, 'rou', f"=-{fref('使用权资产 ','G12')}")
setcf(138, 'other', f"=-{nref('现金流量表底稿','C203')}")
setcf(140, 'fixed', f"={fref('固定资产','H37')}")
setcf(140, 'intang', f"={fref('无形资产变动表','I33')}")
setcf(140, 'iprop', f"={fref('投资性房地产','D35')}")
setcf(140, 'rou', f"={fref('使用权资产 ','G26')}")
setcf(141, 'fixed', f"={fref('固定资产','H54')}")
setcf(141, 'intang', f"={fref('无形资产变动表','I47')}")
setcf(141, 'iprop', f"={fref('投资性房地产','D51')}")
# Disposal gain/loss and VAT chains are netted to zero here; full leaves are in helper sheet.
for cat in CAT_BY_KEY:
    setcf(142, cat, '=0')
    setcf(144, cat, '=0')
    setcf(145, cat, '=0')
# Manual/non-cash bottom leaves C201/C202 are true residual contributors to D43.
setcf(143, 'other', f"=-{nref('现金流量表底稿','C201')}-{nref('现金流量表底稿','C202')}")
setcf(146, 'fixed', f"=-{nref('资产减值明细表','C23')}")
setcf(146, 'cip', f"=-{nref('资产减值明细表','C24')}")
setcf(146, 'intang', f"=-{nref('资产减值明细表','C25')}")
for cat, (_, _, _, cfcol, _) in CAT_BY_KEY.items():
    setcf(136, cat, f"=SUM({cfcol}138:{cfcol}146)")
for r in [138, 140, 141, 142, 143, 144, 145, 146]:
    cf_note[r] = 'D43现金流自然拆；按D43公式方向列示原值减少、累折/减值回转、抵消链和减值项。'
cf_note[136] = 'D43主行：CF底层合计汇总R138:R146；R149:R167为资产展示/减值展示段，不重复进CF底层。'

# D44/D46/D47.
for cat, (_, _, _, cfcol, _) in CAT_BY_KEY.items():
    setcf(169, cat, f"={cfcol}66+{cfcol}71+{cfcol}104+{cfcol}111+{cfcol}136")
setcf(171, 'fixed', f"={fref('数据源_CF小合并','E49')}-{fref('数据源_CF小合并','E50')}-{fref('数据源_CF小合并','E51')}")
setcf(171, 'cip', f"={fref('试算表','H57')}")
setcf(171, 'intang', f"={fref('试算表','H70')}")
setcf(171, 'deferred', f"={fref('试算表','H75')}")
setcf(171, 'iprop', f"={fref('试算表','H42')}")
setcf(171, 'rou', f"={fref('试算表','H66')}")
setcf(171, 'dev', f"={fref('试算表','H71')}")
setcf(171, 'other', f"=N({fref('附注','E364')})")
for cat, (_, _, _, cfcol, _) in CAT_BY_KEY.items():
    setcf(172, cat, f"={cfcol}169-{cfcol}171")
cf_note[169] = 'D44：CF底层按D39+D40+D41+D42+D43滚动推算。'
cf_note[171] = 'D46：CF底层按现金流D46公式下钻到期末账面/试算表/数据源。'
cf_note[172] = 'D47：CF底层按D44-D46计算。'

# Notes for display-only rows.
for r in DISPLAY_ROWS:
    cf_note.setdefault(r, 'DISPLAY_ONLY：资产sheet展示/内部转换/明细段，不直接进入现金流D列底层公式；CF底层列留空，避免重复计算。')
for r in SECTION_ROWS:
    cf_note.setdefault(r, '分隔/说明行。')

# XML cell model.
cells = defaultdict(dict)  # row -> col -> {kind, value/formula/cache/style}
wide_values = {}  # (col,row) -> cached value
cell_formulas = {}  # (col,row) -> formula without =


def set_text(row, col, text, style=STYLE_DEFAULT):
    if text is None or text == '':
        return
    cells[row][col] = {'type': 'text', 'value': str(text), 'style': style}
    wide_values[(col, row)] = text


def set_number(row, col, value, style=STYLE_NUM):
    if value is None:
        return
    cells[row][col] = {'type': 'number', 'value': value, 'style': style}
    wide_values[(col, row)] = value


def set_formula(row, col, formula, cache=None, style=STYLE_NUM, str_result=False):
    if not formula:
        return
    f = quote_formula(formula)
    cells[row][col] = {'type': 'formula', 'formula': f, 'cache': cache, 'style': style, 'str_result': str_result}
    cell_formulas[(col, row)] = f
    if cache is not None:
        wide_values[(col, row)] = cache


def original_val(row, orig_col):
    return wsv[f'{orig_col}{row}'].value


def original_formula_or_val(row, orig_col):
    return wsf[f'{orig_col}{row}'].value

# Build rows.
# Title/notes rows.
set_text(61, 'A', '【★★ 方案B：长期资产循环 — 账面取数 vs 现金流D38:D47底层公式 宽表对照 ★★】', STYLE_TITLE)
set_text(62, 'A', '左侧每个资产类别“账面取数”引用原《长期资产循环_全量穿透校验》61-180已修公式；中间“CF底层”按现金流量表D38:D47递归底层链列示；差异=账面-CF。', STYLE_TITLE)
set_text(63, 'A', '注：DISPLAY_ONLY行是资产sheet展示/内部转换/明细段，不直接进入现金流D列底层公式，CF列留空，避免重复计算；完整CF叶子见《CF底层公式映射明细》。', STYLE_TITLE)

# Header row 64.
headers = {'A': '编号', 'B': '项目'}
for key, name, orig, book, cf, diff in CATS:
    headers[book] = f'{name}_账面取数'
    headers[cf] = f'{name}_CF底层'
    headers[diff] = f'{name}_差异'
headers.update({
    'AA': '账面合计', 'AB': 'CF底层合计', 'AC': '账面-CF差异', 'AD': '现金流D列benchmark', 'AE': 'CF-D列差异', 'AF': 'Check', 'AG': 'CF路径/说明'
})
for col, text in headers.items():
    set_text(64, col, text, STYLE_HEADER)

# Data rows 65-180.
for r in range(65, 181):
    a_val = wsv[f'A{r}'].value if wsv[f'A{r}'].value is not None else wsf[f'A{r}'].value
    b_val = wsv[f'B{r}'].value if wsv[f'B{r}'].value is not None else wsf[f'B{r}'].value
    style = STYLE_TITLE if r in SECTION_ROWS else STYLE_TEXT
    set_text(r, 'A', a_val, style)
    set_text(r, 'B', b_val, style)
    # Category account / CF / diff columns.
    any_cf = False
    for key, name, orig, book, cf, diff in CATS:
        ov = original_val(r, orig)
        of = original_formula_or_val(r, orig)
        # 账面取数: reference original sheet if numeric or formula exists; show dash/text as text.
        if ov is None and of is None:
            pass
        elif isinstance(ov, (int, float)) and not isinstance(ov, bool):
            set_formula(r, book, f"={fref(TARGET_SHEET, orig+str(r))}", cache=ov, style=STYLE_NUM)
        elif of and isinstance(of, str) and of.startswith('='):
            # formula cached to text or blank: preserve as a reference if value is non-empty, else keep blank/text.
            if ov is not None and ov not in ('', None):
                if isinstance(ov, str):
                    set_formula(r, book, f"={fref(TARGET_SHEET, orig+str(r))}", cache=ov, style=STYLE_TEXT, str_result=True)
                else:
                    set_formula(r, book, f"={fref(TARGET_SHEET, orig+str(r))}", cache=ov, style=STYLE_NUM)
        else:
            if ov is not None:
                set_text(r, book, ov, STYLE_TEXT)
        # CF底层 formula if mapped.
        f = cf_formula.get(r, {}).get(key)
        if f:
            set_formula(r, cf, f, cache=None, style=STYLE_NUM)
            any_cf = True
            # 差异 = 账面 - CF, even when the book side is blank; N() makes text/blank zero.
            set_formula(r, diff, f"=N({book}{r})-N({cf}{r})", cache=None, style=STYLE_NUM)
    # Row totals / benchmark / check.
    if r not in SECTION_ROWS:
        set_formula(r, 'AA', '=' + '+'.join(f'N({c}{r})' for c in BOOK_COLS), cache=None, style=STYLE_NUM)
        set_formula(r, 'AB', '=' + '+'.join(f'N({c}{r})' for c in CF_COLS), cache=None, style=STYLE_NUM)
        set_formula(r, 'AC', f'=AA{r}-AB{r}', cache=None, style=STYLE_NUM)
        if r in MAIN_BENCH:
            root = MAIN_BENCH[r]
            set_formula(r, 'AD', f"={fref('现金流量表', root)}", cache=get_cell_value('现金流量表', root), style=STYLE_NUM)
            set_formula(r, 'AE', f'=AB{r}-AD{r}', cache=None, style=STYLE_NUM)
            set_formula(r, 'AF', f'=IF(AND(ABS(AC{r})<0.001,ABS(AE{r})<0.001),"✓","✗")', cache=None, style=STYLE_CHECK, str_result=True)
        elif any_cf:
            set_formula(r, 'AF', f'=IF(ABS(AC{r})<0.001,"✓","✗")', cache=None, style=STYLE_CHECK, str_result=True)
        elif r in DISPLAY_ROWS:
            set_text(r, 'AF', 'DISPLAY', STYLE_CHECK)
    set_text(r, 'AG', cf_note.get(r), STYLE_TEXT)

# Formula evaluator for cached values.
cell_ref_re = re.compile(r"(?<![A-Za-z0-9_])([A-Z]{1,3})([0-9]{1,7})(?![A-Za-z0-9_])")
quoted_ref_re = re.compile(r"'((?:[^']|'')+)'!\$?([A-Z]{1,3})\$?([0-9]{1,7})")
sum_re = re.compile(r"SUM\(([^()]+)\)", re.I)
n_re = re.compile(r"N\(([^()]+)\)", re.I)


def cell_num(col: str, row: int) -> float:
    return to_num(wide_values.get((col, row)))


def eval_range_same_sheet(a: str, b: str) -> float:
    m1 = re.match(r'([A-Z]+)(\d+)$', a)
    m2 = re.match(r'([A-Z]+)(\d+)$', b)
    if not m1 or not m2:
        return 0.0
    c1, r1 = column_index_from_string(m1.group(1)), int(m1.group(2))
    c2, r2 = column_index_from_string(m2.group(1)), int(m2.group(2))
    total = 0.0
    for rr in range(min(r1, r2), max(r1, r2)+1):
        for cc in range(min(c1, c2), max(c1, c2)+1):
            total += cell_num(get_column_letter(cc), rr)
    return total


def eval_arg(arg: str) -> float:
    arg = arg.strip()
    if ':' in arg and '!' not in arg:
        a, b = [x.strip().replace('$','') for x in arg.split(':', 1)]
        return eval_range_same_sheet(a, b)
    return eval_formula_numeric(arg)


def eval_formula_numeric(formula: str | None) -> float:
    if formula is None:
        return 0.0
    expr = str(formula).strip()
    if expr.startswith('='):
        expr = expr[1:]
    if not expr:
        return 0.0
    # IF checks are handled elsewhere.
    if expr.upper().startswith('IF('):
        return 0.0
    # Evaluate SUM() calls first.
    while True:
        m = sum_re.search(expr)
        if not m:
            break
        args = [x.strip() for x in m.group(1).split(',')]
        val = sum(eval_arg(a) for a in args if a)
        expr = expr[:m.start()] + str(val) + expr[m.end():]
    # Evaluate N() calls.
    while True:
        m = n_re.search(expr)
        if not m:
            break
        val = eval_arg(m.group(1))
        expr = expr[:m.start()] + str(val) + expr[m.end():]
    # Replace quoted sheet refs.
    def repl_q(m):
        sheet = m.group(1).replace("''", "'")
        cell = f"{m.group(2)}{m.group(3)}"
        return str(to_num(get_cell_value(sheet, cell)))
    expr = quoted_ref_re.sub(repl_q, expr)
    # Replace same-sheet refs.
    def repl_cell(m):
        return str(cell_num(m.group(1), int(m.group(2))))
    expr = cell_ref_re.sub(repl_cell, expr)
    expr = expr.replace('^', '**')
    # Strip Excel string literals if any, not expected in numeric formulas.
    try:
        return float(eval(expr, {"__builtins__": {}}, {}))
    except Exception:
        return 0.0

# Iteratively compute cached numeric values for formula cells.
for _ in range(10):
    for (col, row), formula in list(cell_formulas.items()):
        cell = cells[row][col]
        if cell.get('str_result'):
            # Check formulas; set after numeric cells below.
            continue
        val = eval_formula_numeric(formula)
        cell['cache'] = val
        wide_values[(col, row)] = val

# Compute check formula caches.
for (col, row), formula in list(cell_formulas.items()):
    cell = cells[row][col]
    if not cell.get('str_result'):
        continue
    if formula.upper().startswith('IF('):
        ac = abs(cell_num('AC', row))
        ae = abs(cell_num('AE', row)) if ('AE', row) in wide_values else 0.0
        if row in MAIN_BENCH:
            cache = '✓' if ac < 0.001 and ae < 0.001 else '✗'
        else:
            cache = '✓' if ac < 0.001 else '✗'
        cell['cache'] = cache
        wide_values[(col, row)] = cache

# Helper sheet data copied from the full trace workbook.
helper_rows = []
trace_headers = [trace_ws.cell(4, c).value for c in range(1, 15)]
helper_keep = ['Path_ID','Root','Root项目','Depth','Source_Cell','Node_Type','Raw_Value','Formula_or_Raw','Label','Note','是否金额型','说明']
idx = {h:i for i,h in enumerate(trace_headers)}
helper_rows.append(helper_keep)
for vals in trace_ws.iter_rows(min_row=5, values_only=True):
    row = []
    for h in helper_keep:
        row.append(vals[idx[h]] if h in idx and idx[h] < len(vals) else None)
    helper_rows.append(row)

# XML escaping/building.
def esc(s) -> str:
    return html.escape(str(s), quote=True)


def is_num(v) -> bool:
    return isinstance(v, (int, float)) and not isinstance(v, bool) and (not (isinstance(v, float) and math.isnan(v)))


def cell_xml(col: str, row: int, cell: dict) -> str:
    ref = f'{col}{row}'
    sattr = f' s="{cell.get("style", STYLE_DEFAULT)}"' if cell.get('style', STYLE_DEFAULT) is not None else ''
    typ = cell['type']
    if typ == 'text':
        txt = str(cell.get('value',''))
        space = ' xml:space="preserve"' if txt != txt.strip() or '\n' in txt else ''
        return f'<c r="{ref}"{sattr} t="inlineStr"><is><t{space}>{esc(txt)}</t></is></c>'
    if typ == 'number':
        return f'<c r="{ref}"{sattr}><v>{cell["value"]}</v></c>'
    if typ == 'formula':
        f = esc(cell['formula'])
        cache = cell.get('cache')
        if cell.get('str_result'):
            cache_text = '' if cache is None else esc(cache)
            return f'<c r="{ref}"{sattr} t="str"><f>{f}</f><v>{cache_text}</v></c>'
        if cache is None:
            return f'<c r="{ref}"{sattr}><f>{f}</f></c>'
        if is_num(cache):
            # Avoid -0.0 noise.
            if abs(float(cache)) < 1e-12:
                cache = 0
            return f'<c r="{ref}"{sattr}><f>{f}</f><v>{cache:.15g}</v></c>'
        # String cache for a formula.
        return f'<c r="{ref}"{sattr} t="str"><f>{f}</f><v>{esc(cache)}</v></c>'
    raise ValueError(cell)


def build_wide_xml() -> bytes:
    col_widths = {
        'A': 12, 'B': 36,
        'C': 16, 'D': 16, 'E': 14,
        'F': 16, 'G': 16, 'H': 14,
        'I': 16, 'J': 16, 'K': 14,
        'L': 16, 'M': 16, 'N': 14,
        'O': 16, 'P': 16, 'Q': 14,
        'R': 16, 'S': 16, 'T': 14,
        'U': 16, 'V': 16, 'W': 14,
        'X': 16, 'Y': 16, 'Z': 14,
        'AA': 16, 'AB': 16, 'AC': 16, 'AD': 16, 'AE': 16, 'AF': 10, 'AG': 60,
    }
    cols_xml = ['<cols>']
    for col, width in col_widths.items():
        i = column_index_from_string(col)
        cols_xml.append(f'<col min="{i}" max="{i}" width="{width}" customWidth="1"/>')
    cols_xml.append('</cols>')
    rows_xml = []
    for r in range(61, 181):
        if not cells.get(r):
            continue
        height = 30 if r in (61,62,63,64) or r in SECTION_ROWS else 18
        row_xml = [f'<row r="{r}" ht="{height}" customHeight="1">']
        for cidx in range(1, column_index_from_string('AG')+1):
            col = get_column_letter(cidx)
            if col in cells[r]:
                row_xml.append(cell_xml(col, r, cells[r][col]))
        row_xml.append('</row>')
        rows_xml.append(''.join(row_xml))
    xml = f'''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<dimension ref="A61:AG180"/>
<sheetViews><sheetView tabSelected="0" workbookViewId="0"><pane xSplit="2" ySplit="64" topLeftCell="C65" activePane="bottomRight" state="frozen"/><selection pane="bottomRight" activeCell="C65" sqref="C65"/></sheetView></sheetViews>
<sheetFormatPr defaultRowHeight="15"/>
{''.join(cols_xml)}
<sheetData>{''.join(rows_xml)}</sheetData>
<autoFilter ref="A64:AG180"/>
<pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/>
</worksheet>'''
    return xml.encode('utf-8')


def build_helper_xml() -> bytes:
    max_row = len(helper_rows)
    max_col = len(helper_rows[0])
    cols_xml = '<cols>' + ''.join(f'<col min="{i}" max="{i}" width="{w}" customWidth="1"/>' for i, w in enumerate([16,10,18,8,28,18,14,36,28,40,12,50], start=1)) + '</cols>'
    rows_xml = []
    for r, row in enumerate(helper_rows, start=1):
        row_xml = [f'<row r="{r}">']
        for c, v in enumerate(row, start=1):
            if v is None:
                continue
            col = get_column_letter(c)
            style = STYLE_HEADER if r == 1 else (STYLE_NUM if is_num(v) else STYLE_TEXT)
            if is_num(v):
                row_xml.append(cell_xml(col, r, {'type':'number','value':v,'style':style}))
            else:
                row_xml.append(cell_xml(col, r, {'type':'text','value':str(v),'style':style}))
        row_xml.append('</row>')
        rows_xml.append(''.join(row_xml))
    xml = f'''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<dimension ref="A1:{get_column_letter(max_col)}{max_row}"/>
<sheetViews><sheetView workbookViewId="0"><pane ySplit="1" topLeftCell="A2" activePane="bottomLeft" state="frozen"/><selection pane="bottomLeft" activeCell="A2" sqref="A2"/></sheetView></sheetViews>
<sheetFormatPr defaultRowHeight="15"/>
{cols_xml}
<sheetData>{''.join(rows_xml)}</sheetData>
<autoFilter ref="A1:{get_column_letter(max_col)}{max_row}"/>
<pageMargins left="0.7" right="0.7" top="0.75" bottom="0.75" header="0.3" footer="0.3"/>
</worksheet>'''
    return xml.encode('utf-8')

wide_xml = build_wide_xml()
helper_xml = build_helper_xml()

# Add sheet parts surgically to the uploaded v1 workbook.
def sha256(p: Path) -> str:
    return hashlib.sha256(p.read_bytes()).hexdigest()

with zipfile.ZipFile(SRC, 'r') as zin:
    names = zin.namelist()
    worksheet_nums = []
    for n in names:
        m = re.match(r'xl/worksheets/sheet(\d+)\.xml$', n)
        if m:
            worksheet_nums.append(int(m.group(1)))
    next_sheet_num = max(worksheet_nums) + 1
    wide_part = f'xl/worksheets/sheet{next_sheet_num}.xml'
    helper_part = f'xl/worksheets/sheet{next_sheet_num+1}.xml'
    wb_xml = zin.read('xl/workbook.xml').decode('utf-8')
    rels_xml = zin.read('xl/_rels/workbook.xml.rels').decode('utf-8')
    ct_xml = zin.read('[Content_Types].xml').decode('utf-8')
    # Compute new sheetIds and rIds.
    sheet_ids = [int(x) for x in re.findall(r'sheetId="(\d+)"', wb_xml)]
    next_sheet_id = max(sheet_ids) + 1
    rids = [int(x) for x in re.findall(r'Id="rId(\d+)"', rels_xml)]
    next_rid = max(rids) + 1
    wide_rid = f'rId{next_rid}'
    helper_rid = f'rId{next_rid+1}'

    # Remove stale calcChain relationship/content type and part, then add new sheet relationships.
    rels_xml = re.sub(r'<Relationship\b[^>]*Type="http://schemas\.openxmlformats\.org/officeDocument/2006/relationships/calcChain"[^>]*/>', '', rels_xml)
    rels_insert = (
        f'<Relationship Id="{wide_rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet{next_sheet_num}.xml"/>'
        f'<Relationship Id="{helper_rid}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet{next_sheet_num+1}.xml"/>'
    )
    rels_xml = rels_xml.replace('</Relationships>', rels_insert + '</Relationships>')

    # Add sheets to workbook.xml and make Excel recalc.
    sheet_insert = (
        f'<sheet name="{esc(WIDE_SHEET)}" sheetId="{next_sheet_id}" r:id="{wide_rid}"/>'
        f'<sheet name="{esc(MAP_SHEET)}" sheetId="{next_sheet_id+1}" r:id="{helper_rid}"/>'
    )
    wb_xml = wb_xml.replace('</sheets>', sheet_insert + '</sheets>')
    if re.search(r'<calcPr\b[^>]*/>', wb_xml):
        wb_xml = re.sub(r'<calcPr\b[^>]*/>', '<calcPr calcMode="auto" fullCalcOnLoad="1" forceFullCalc="1"/>', wb_xml)
    elif '</workbook>' in wb_xml:
        wb_xml = wb_xml.replace('</workbook>', '<calcPr calcMode="auto" fullCalcOnLoad="1" forceFullCalc="1"/></workbook>')

    ct_xml = re.sub(r'<Override PartName="/xl/calcChain\.xml"[^>]*/>', '', ct_xml)
    ct_insert = (
        f'<Override PartName="/{wide_part}" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>'
        f'<Override PartName="/{helper_part}" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>'
    )
    ct_xml = ct_xml.replace('</Types>', ct_insert + '</Types>')

    # Optional docProps/app update: increment worksheet count and append titles. Keep simple/robust.
    app_xml = zin.read('docProps/app.xml').decode('utf-8') if 'docProps/app.xml' in names else None
    if app_xml:
        app_xml = re.sub(r'(<vt:lpstr>Worksheets</vt:lpstr></vt:variant><vt:variant><vt:i4>)(\d+)(</vt:i4>)', lambda m: f"{m.group(1)}{int(m.group(2))+2}{m.group(3)}", app_xml, count=1)
        # Increase TitlesOfParts vector size and append two sheet names before </vt:vector> of TitlesOfParts.
        # This is cosmetic; if regex misses, workbook still opens.
        m = re.search(r'(<TitlesOfParts><vt:vector size=")(\d+)(" baseType="lpstr">)', app_xml)
        if m:
            app_xml = app_xml[:m.start()] + f"{m.group(1)}{int(m.group(2))+2}{m.group(3)}" + app_xml[m.end():]
            # append at the last </vt:vector></TitlesOfParts>
            app_xml = app_xml.replace('</vt:vector></TitlesOfParts>', f'<vt:lpstr>{esc(WIDE_SHEET)}</vt:lpstr><vt:lpstr>{esc(MAP_SHEET)}</vt:lpstr></vt:vector></TitlesOfParts>')

    with zipfile.ZipFile(OUT, 'w', compression=zipfile.ZIP_DEFLATED) as zout:
        for n in names:
            if n == 'xl/calcChain.xml':
                continue
            if n == 'xl/workbook.xml':
                zout.writestr(n, wb_xml.encode('utf-8'))
            elif n == 'xl/_rels/workbook.xml.rels':
                zout.writestr(n, rels_xml.encode('utf-8'))
            elif n == '[Content_Types].xml':
                zout.writestr(n, ct_xml.encode('utf-8'))
            elif n == 'docProps/app.xml' and app_xml is not None:
                zout.writestr(n, app_xml.encode('utf-8'))
            else:
                zout.writestr(n, zin.read(n))
        zout.writestr(wide_part, wide_xml)
        zout.writestr(helper_part, helper_xml)

summary = {
    'source': str(SRC),
    'source_sha256': sha256(SRC),
    'output': str(OUT),
    'output_sha256': sha256(OUT),
    'wide_sheet': WIDE_SHEET,
    'helper_sheet': MAP_SHEET,
    'new_parts': [wide_part, helper_part],
    'calcChain_removed': True,
    'wide_rows': '61:180',
    'wide_columns': 'A:AG',
    'helper_rows': len(helper_rows),
    'mapped_cf_formula_cells': sum(1 for (col,row), f in cell_formulas.items() if col in CF_COLS),
    'mapped_diff_formula_cells': sum(1 for (col,row), f in cell_formulas.items() if col in DIFF_COLS),
    'main_benchmark_rows': MAIN_BENCH,
    'checks_cached': {str(r): wide_values.get(('AF', r)) for r in MAIN_BENCH},
    'main_values': {str(r): {'AA_book': wide_values.get(('AA', r)), 'AB_cf': wide_values.get(('AB', r)), 'AC_diff': wide_values.get(('AC', r)), 'AD_cfD': wide_values.get(('AD', r)), 'AE_cf_minus_D': wide_values.get(('AE', r)), 'AF': wide_values.get(('AF', r))} for r in MAIN_BENCH},
}
REPORT_JSON.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding='utf-8')
print(json.dumps(summary, ensure_ascii=False, indent=2))
