import pandas as pd
import json, re, math, warnings, requests
from datetime import datetime
from collections import Counter, defaultdict
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=UserWarning)
import xml.etree.ElementTree as ET
from keybert import KeyBERT
import os
import sys
from urllib.parse import parse_qs
from collections import defaultdict


query = sys.argv[1]  # everything after main.py
params = parse_qs(query.lstrip("?"))

username = params.get("username", [""])[0]
password = params.get("password", [""])[0]
form_id  = params.get("form_id",  [""])[0]
path = params.get("path", [""])[0]
Dict_path = params.get("Dict_path", [""])[0]


if not username or not password or not form_id:
    raise ValueError(f"Missing query params. Got: username={bool(username)}, password={bool(password)}, form_id={bool(form_id)}")


# =========================================
#  PIVOT EXCLUDE LIST (ADD HERE)
# =========================================
EXCLUDE_LIST = {
    "interview conducting survery",
    "union council",
    "cnic no",
    "phone no",
    "gps coordinates",
    "lot",
    "respondant name",
    "gender of beneficiary",
    "relation with beneficiary",
    "date of visit"
    "Other please specify"
}
# =========================
# 1) LOAD EXCEL
# =========================

df = pd.read_excel(Dict_path, sheet_name="Monitoring")

# =========================
# 2) REMOVE URDU FROM DESCRIPTION (COL 3)
# =========================
def remove_urdu(x):
    return re.sub(r'[\u0600-\u06FF]+', '', x).strip() if isinstance(x, str) else x

df.iloc[:, 2] = df.iloc[:, 2].map(remove_urdu)

# =========================
# 3) DROP EXTRA COLUMN IF EXISTS
# =========================
if "Unnamed: 3" in df.columns:
    df = df.drop(columns=["Unnamed: 3"])

# =========================
# 4) REMOVE IF/CONTINUE, IF/SELECTED, LOOP/FROM ROWS (FAST)
# =========================
row_text = df.fillna("").astype(str).agg(" ".join, axis=1)

df = df.loc[
    ~row_text.str.contains(r"If.*Continue|If.*Selected|Loop.*From", case=False, regex=True)
].copy()

# Remove empty rows
df = df.replace(r"^\s*$", pd.NA, regex=True).dropna(how="all").reset_index(drop=True)

# =========================
# 5) SHIFT HEADING ROWS FROM UID -> DESCRIPTION
# =========================
col1 = df.columns[0]  # UID
col3 = df.columns[2]  # Description

pattern = r".*(?:[A-Za-z]{2}|[A-Za-z]{2}(?:\(\)|\[\]|\{\})|\([A-Za-z\s\-]+\))$"

uid_str = df[col1].fillna("").astype(str).str.strip()

mask = uid_str.str.contains(pattern, case=False, na=False)
safe_mask = mask & df[col3].isna()

shifted_col3_indexes = df.index[safe_mask].tolist()

df.loc[safe_mask, col3] = uid_str[safe_mask] + " HEADING"
df.loc[safe_mask, col1] = ""

print("Total shifted rows:", safe_mask.sum())

# =========================
# 6) NORMALIZE UID + CLEAN DESCRIPTION
# =========================
df[col1] = (
    df[col1]
    .fillna("")
    .astype(str)
    .str.strip()
    .str.replace(".", "_", regex=False)
)

df[col3] = df[col3].replace(r"[()_/:]+", "", regex=True)

kw_model = KeyBERT(model="all-MiniLM-L6-v2")

def heading_style(text, max_words=3):
    if not isinstance(text, str) or not text.strip():
        return text

    text = text.strip()

    # detect HEADING row
    is_heading_row = text.endswith("HEADING")
    if is_heading_row:
        text = text.replace("HEADING", "").strip()

    #  SKIP KeyBERT if description is Other/Others
    if text.lower() == "other" or text.lower().startswith("other"):
        phrase = text  # keep original
    else:
        keywords = kw_model.extract_keywords(
            text,
            keyphrase_ngram_range=(1, 3),
            stop_words="english",
            top_n=1
        )
        phrase = keywords[0][0] if keywords else text

    # heading style formatting
    phrase = " ".join(phrase.split()[:max_words])
    phrase = re.sub(r"[^A-Za-z0-9\s\-]", "", phrase).strip()
    phrase = phrase.title()

    if is_heading_row:
        return phrase + " HEADING"

    return phrase

df[col3] = df[col3].apply(lambda x: heading_style(x, 3))



# Save cleaned excel
cleaned_path = r"Dictionary_Cleaned.xlsx"
df.to_excel(cleaned_path, index=False)
print("Cleaned Excel saved:", cleaned_path)

# =========================
# 7) BUILD JSON
# =========================

df = pd.read_excel(cleaned_path)

def to_bool(val):
    if pd.isna(val):
        return False
    if isinstance(val, bool):
        return val
    if isinstance(val, (int, float)):
        return val == 1
    if isinstance(val, str):
        return val.strip().lower() in ["1", "true", "yes"]
    return False

# =========================
# 🔥 FULLY DYNAMIC COLUMN HANDLING
# =========================
cols = list(df.columns)

UID_COL  = cols[0]
QNO_COL  = cols[1]
DESC_COL = cols[2]

# Detect Multiple column dynamically
MULT_COL = next((c for c in cols if "multiple" in c.lower()), None)

# If Multiple column missing → create it
if MULT_COL is None:
    df["Multiple"] = False
    MULT_COL = "Multiple"

# Standardize column names (internal use)
df = df.rename(columns={
    UID_COL: "UID",
    QNO_COL: "Q_No",
    DESC_COL: "Description",
    MULT_COL: "Multiple"
})


parents_json = {}
current_parent = None
current_uid = None
current_question = None

shifted_col3_indexes = set(shifted_col3_indexes)  # faster lookup

for idx, row in df.iterrows():
    uid = str(row["UID"]).strip() if pd.notna(row["UID"]) else ""
    qno = str(row["Q_No"]).strip() if pd.notna(row["Q_No"]) else ""
    desc = str(row["Description"]).strip() if pd.notna(row["Description"]) else ""

    # Heading row
    if idx in shifted_col3_indexes:
        current_parent = desc
        parents_json.setdefault(current_parent, {})
        current_uid = None
        current_question = None
        continue

    # New UID starts
    if uid:
        current_uid = uid
        current_question = None

        if current_parent is None:
            current_parent = "UNASSIGNED_HEADING"
            parents_json.setdefault(current_parent, {})

        multiple = to_bool(row["Multiple"])

        parents_json[current_parent][current_uid] = {
            "multiple": multiple,
            "questions": []
        }

    # Question & options
    if current_uid and desc:

        if ("?" in desc) or (qno and current_question is None):
            current_question = {
                "question_no": qno,
                "description": desc,
                "options": []
            }
            parents_json[current_parent][current_uid]["questions"].append(current_question)

        elif current_question is not None:
            current_question["options"].append({
                "value": qno,
                "text": desc
            })

final_json = {"Dictionary": parents_json}

output_json = r"Dictionary.json"
with open(output_json, "w", encoding="utf-8") as f:
    json.dump(final_json, f, indent=4, ensure_ascii=False)

print("Final structured JSON created successfully:")
print(output_json)

CFG = {
    "dictionary_file": "Dictionary.json",

    "auth": (username, password),

    "base_url": "https://server.algogtech.com/v1/projects/1/forms",
    "form_id": form_id,  #  CHANGE ONLY THIS

    # outputs
    "out_json": {
        "glossary": "glossary.json",
        "submission_table": path,
        "family": "family_combined.json",
        "dim_mat": "dim_mat_value_level.json"
    },
    "out_excel": {
        "submissions_flat": "Submissions_Flat.xlsx",
        "male_info": "Male_Info.xlsx",
        "female_info": "Female_Info.xlsx"
    },

    # rules
    "summary_min_count": 5,
    "max_int_len": 10,
    "string_length": 255,
    "int_length": 11
}


def discover_tables_odatalike(CFG):
    base = CFG["base_url"].rstrip("/")
    fid = CFG["form_id"].strip()

    service_root = f"{base}/{fid}"

    # 1) Always include main Submissions
    discovered = {
        "submissions": f"{service_root}/Submissions"
    }

    # 2) Try service document (some OData servers return entity sets here)
    try:
        r = requests.get(service_root, auth=CFG["auth"], timeout=30)
        if r.ok:
            j = r.json()
            # Many OData servers return {"value":[{"name":"Submissions"}, {"name":"Submissions.male_info"}, ...]}
            for item in j.get("value", []):
                name = item.get("name")
                if isinstance(name, str) and name.startswith("Submissions."):
                    key = name.split("Submissions.", 1)[1]
                    discovered[key] = f"{service_root}/{name}"
            if len(discovered) > 1:
                return discovered
    except Exception:
        pass

    # 3) Fallback: read $metadata (most reliable for OData)
    metadata_urls = [
        f"{service_root}/$metadata",
        f"{service_root}/$metadata?$format=xml",
    ]

    xml_text = None
    for mu in metadata_urls:
        try:
            r = requests.get(mu, auth=CFG["auth"], timeout=30)
            if r.ok and ("xml" in r.headers.get("Content-Type", "").lower() or r.text.lstrip().startswith("<")):
                xml_text = r.text
                break
        except Exception:
            continue

    if not xml_text:
        # Could not discover; return only Submissions
        return discovered

    # Parse XML metadata
    root = ET.fromstring(xml_text)

    # EntitySets live under: Edmx -> DataServices -> Schema -> EntityContainer -> EntitySet
    # Namespaces vary, so we match by local-name()
    def local(tag):
        return tag.split("}", 1)[-1]

    entity_set_names = []
    for el in root.iter():
        if local(el.tag) == "EntitySet":
            name = el.attrib.get("Name")
            if name:
                entity_set_names.append(name)

    for name in entity_set_names:
        if name.startswith("Submissions."):
            key = name.split("Submissions.", 1)[1]
            discovered[key] = f"{service_root}/{name}"

    return discovered


URLS = discover_tables_odatalike(CFG)

submissions_url = URLS["submissions"]

child_tables = {k: v for k, v in URLS.items() if k != "submissions"}

for table_key, table_url in child_tables.items():
    print(f"Fetching {table_key} -> {table_url}")
    # fetch/export each table


# ==========================================================
# ✅ HELPERS (keep yours; only new/updated ones below)
# ==========================================================


def is_file_value(v):
    if not isinstance(v, str):
        return False

    v = v.lower().strip()
    file_exts = (".jpg", ".jpeg", ".png", ".gif", ".pdf", ".doc", ".docx", ".xls", ".xlsx")

    return v.endswith(file_exts)


def is_file_value(v):
    if not isinstance(v, str):
        return False

    v = v.lower()
    return v.endswith((".jpg", ".jpeg", ".png", ".pdf", ".doc", ".docx"))





def profile_field(values):
    clean = [v for v in values if not is_nullish(v)]
    unique_vals = set(clean)

    return {
        "count": len(clean),
        "unique_count": len(unique_vals),
        "unique_ratio": (len(unique_vals) / len(clean)) if clean else 0
    }



def map_schema_dtype(dtype):
    dtype = str(dtype).lower().strip()

    if dtype in ["int", "integer"]:
        return "int"

    if dtype in ["datetime", "date", "time", "timestamp"]:
        return "timestamp"

    return "varchar"


def api_get(url, auth):
    r = requests.get(url, auth=auth)
    if r.status_code != 200:
        raise Exception(f"API Error {r.status_code}: {r.text[:300]}")
    return r.json()


def get_records(payload):
    if isinstance(payload, dict):
        return payload.get("value") or payload.get("results") or [payload]
    return payload


def flatten_record(obj, out=None):
    out = out or {}
    if isinstance(obj, dict):
        for k, v in obj.items():
            if isinstance(v, dict):
                flatten_record(v, out)
            else:
                out[k] = v
    return out


def is_nullish(v):
    if v is None:
        return True
    if isinstance(v, float) and math.isnan(v):
        return True
    if isinstance(v, str) and v.strip() == "":
        return True
    return False


def make_hashable(v):
    if is_nullish(v):
        return None
    if isinstance(v, (list, tuple, set)):
        return "|".join(str(x) for x in v if not is_nullish(x))
    if isinstance(v, dict):
        return json.dumps(v, ensure_ascii=False, sort_keys=True)
    return v


def detect_type(value, MAX_INT_LEN=10):
    if is_nullish(value):
        return "string"

    if isinstance(value, bool):
        return "int"

    if isinstance(value, int):
        return "int" if len(str(abs(value))) <= MAX_INT_LEN else "string"

    if isinstance(value, float):
        return "int" if value.is_integer() else "string"

    if isinstance(value, str):
        v = value.strip()

        # numeric string
        if v.lstrip("-").isdigit():
            return "int" if len(v.lstrip("-")) <= MAX_INT_LEN else "string"

        # datetime formats
        try:
            datetime.fromisoformat(v)
            return "datetime"
        except:
            return "string"

    return "string"


def clean_name(text):
    if not isinstance(text, str):
        return ""
    text = text.lower()
    text = re.sub(r"\(.*?\)", "", text)
    text = re.sub(r"[^a-z0-9\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


def to_snake(text):
    if not isinstance(text, str):
        return ""
    text = re.sub(r"\s+", "_", text.lower())
    text = re.sub(r"_+", "_", text).strip("_")
    return text


def clean_label(text):
    if not isinstance(text, str):
        return ""
    text = re.sub(r"[^A-Za-z0-9 ]+", " ", text.strip())
    text = re.sub(r"\s+", " ", text).strip()
    return text.title()


def unique_name(name, used):
    base = name or "field"
    n = base
    i = 1
    while n in used:
        i += 1
        n = f"{base}_{i}"
    used.add(n)
    return n


def contains_yes_and_no(options):
    texts = {opt.get("text", "").strip().lower() for opt in options if isinstance(opt, dict)}
    return "yes" in texts and "no" in texts


# ==========================================================
# ✅ Dynamic discovery output filenames (NEW)
# ==========================================================

def ensure_dir(path):
    d = os.path.dirname(path)
    if d:
        os.makedirs(d, exist_ok=True)


def excel_path_for_table(cfg, table_key):
    """
    Dynamic excel naming:
      - submissions -> cfg["out_excel"]["submissions_flat"] (kept)
      - child tables -> <table_key>.xlsx in same folder as submissions_flat (or current dir)
    """
    submissions_xlsx = cfg["out_excel"].get("submissions_flat", "Submissions_Flat.xlsx")
    base_dir = os.path.dirname(submissions_xlsx)  # "" if no dir

    filename = f"{table_key}.xlsx"
    return os.path.join(base_dir, filename) if base_dir else filename


# ==========================================================
# ✅ Dictionary Parsing (same)
# ==========================================================
def parse_dictionary(dictionary_path):
    with open(dictionary_path, "r", encoding="utf-8") as f:
        dictionary_json = json.load(f)

    dictionary = dictionary_json.get("Dictionary", {})

    id_to_desc = {}
    id_to_options = {}
    id_to_multiple = {}   # ✅ NEW

    for heading, fields in dictionary.items():
        if not isinstance(fields, dict):
            continue

        for field_id, field_content in fields.items():
            questions = field_content.get("questions", [])
            if not questions:
                continue

            q = questions[0]

            id_to_desc[field_id] = q.get("description", "") or ""
            id_to_options[field_id] = q.get("options", []) or []

            # ✅ READ MULTIPLE FROM DICTIONARY
            id_to_multiple[field_id] = bool(field_content.get("multiple", False))

    return id_to_desc, id_to_options, id_to_multiple



# ==========================================================
# ✅ Build Glossary JSON (same)
# ==========================================================
def build_glossary(submissions_records, id_to_desc, cfg):
    flat = [flatten_record(r) for r in submissions_records]
    all_cols = sorted({k for row in flat for k in row.keys()})

    glossary = {}
    for col in all_cols:
        values = [row.get(col) for row in flat]
        sample = next((v for v in values if not is_nullish(v)), None)

        glossary[col] = {
            "description": id_to_desc.get(col, ""),
            "Type": detect_type(sample, cfg["max_int_len"]),
            "values": values
        }
    return glossary


# ==========================================================
# ✅ Build Summary + dim_mat (same)
# ==========================================================
def build_summary_cleaned(glossary, min_count=5):
    summary = {}

    for fid, fdata in glossary.items():
        values = fdata.get("values", [])
        total = len(values)

        safe_values = [make_hashable(v) for v in values]
        safe_values = [v for v in safe_values if v not in [None, ""]]

        if not safe_values:
            continue

        counts = Counter(safe_values)

        values_map = {
            str(k): {
                "count": int(v),
                "average": round((v / total) * 100, 2) if total else 0
            }
            for k, v in counts.items()
            if int(v) >= min_count
        }

        if not values_map:
            continue

        summary[fid] = {
            "description": fdata.get("description", ""),
            "Type": fdata.get("Type", "string"),
            "values": values_map
        }

    return summary


from collections import Counter

def build_dim_mat_dynamic(glossary, table_name="submissions"):
    dimensions = []
    metrics = []

    for fid, info in glossary.items():
        desc = (info.get("description") or "").strip()
        if not desc:
            continue

        label = clean_label(desc)
        dtype = str(info.get("Type", "string")).lower()
        raw_values = [
            make_hashable(v)
            for v in info.get("values", [])
            if not is_nullish(v) and not is_file_value(v)
        ]

        if not raw_values:
            continue

        # 🔥 value-level counts
        counts = Counter(raw_values)

        # ============================
        # ✅ DIMENSION WITH COUNTS
        # ============================
        dimensions.append({
            "source_id": fid,
            "name": label,
            "label": label,
            "values": {
                str(k): {"count": int(v)}
                for k, v in counts.items()
            },
            "type": dtype,
            "table": table_name
        })

        # ============================
        # (Optional) Metrics
        # ============================
        # agar chaaho to sirf total count rakho
        metrics.append({
            "source_id": fid,
            "name": label,
            "label": f"{label} Total Count",
            "aggregation": "count",
            "table": table_name,
            "value": sum(counts.values())
        })

    return {
        "dimensions": dimensions,
        "metrics": metrics
    }



# ==========================================================
# ✅ Submission schema + lookups (same, but cfg passed)
# ==========================================================
def build_submissions_schema(id_to_desc, id_to_options, id_to_multiple, submissions_records, cfg):
    flat = [flatten_record(r) for r in submissions_records]
    all_cols = {k for row in flat for k in row.keys()}

    col_values = {c: [] for c in all_cols}
    for row in flat:
        for c in all_cols:
            col_values[c].append(row.get(c))

    used_names = set()
    submissions_rows = []

    for field_id, desc in id_to_desc.items():
        options = id_to_options.get(field_id, [])

        values = col_values.get(field_id, [])
        sample = next((v for v in values if not is_nullish(v)), None)
        nullable = any(is_nullish(v) for v in values) if values else True

        # Check dictionary first
        multiple = bool(id_to_multiple.get(field_id, False))
        
        # ✅ Also infer from data: if any non-null string has a space between digits, it's a select_multiple!
        non_null_vals = [str(v).strip() for v in values if v is not None and str(v).strip() != ""]
        if any(" " in v for v in non_null_vals):
            multiple = True

        is_lookup = (
            isinstance(options, list)
            and len(options) >= 2
            and not contains_yes_and_no(options)
        )

        raw_dtype = detect_type(sample, cfg["max_int_len"])
        dtype = map_schema_dtype(raw_dtype)

        # ✅ MULTIPLE ALWAYS VARCHAR/TEXT
        if multiple:
            dtype = "varchar"

        length = 255 if dtype == "varchar" else ""

        name = unique_name(to_snake(clean_name(desc)), used_names)

        submissions_rows.append({
            "id": field_id,
            "name": name,
            "multiple_options": multiple,   # ✅ DICTIONARY DRIVEN
            "datatype": dtype,
            "nullable": bool(nullable),
            "predefined": "",
            "length": length,
            "is_lookup": bool(is_lookup),
            "lookup_reference": (name + "_Details") if is_lookup else ""
        })

    return submissions_rows


def build_lookup_tables(submissions_rows, id_to_options):
    lookup_tables = []
    id_to_row = {r["id"]: r for r in submissions_rows if isinstance(r, dict)}

    for field_id, row in id_to_row.items():
        if not row.get("is_lookup"):
            continue

        options = id_to_options.get(field_id, [])
        if not isinstance(options, list) or not options:
            continue

        option_map = {
            str(opt.get("value")): str(opt.get("text", "")).strip()
            for opt in options
            if isinstance(opt, dict) and opt.get("value") is not None
        }

        # ✅ Find the highest integer key and add 'Other' with the next number
        numeric_keys = [int(k) for k in option_map.keys() if str(k).isdigit()]
        next_id = max(numeric_keys) + 1 if numeric_keys else 1
        option_map[str(next_id)] = "Other"

        # ✅ Carry multiple flag so TableGeneratorService can create junction table
        multiple = bool(row.get("multiple_options", False))

        lookup_tables.append({
            "table_name": row.get("lookup_reference"),
            "Name": {"options": option_map},
            "dtype": "varchar",
            "multiple": multiple
        })

    return lookup_tables


# ==========================================================
# ✅ Dynamic child schema (NEW) – no A4/A5, no male/female assumptions
# ==========================================================
META_KEYS = {"__id", "__Submissions-id"}


def extract_question_ids_from_child_records(child_records, id_to_desc):
    """
    Collect question IDs from:
      - top-level keys that are actual question ids (present in dictionary)
      - nested blocks (dict values) => take their keys too
    """
    qids = set()

    for rec in child_records:
        if not isinstance(rec, dict):
            continue

        for k, v in rec.items():
            if k in META_KEYS:
                continue

            # Top-level question id
            if k in id_to_desc:
                qids.add(k)

            # Nested block of questions
            if isinstance(v, dict):
                for kk in v.keys():
                    if kk in id_to_desc:
                        qids.add(kk)

    return qids


def build_child_table_schema_dynamic(child_tables_records, id_to_desc, submissions_rows, cfg):
    """
    child_tables_records: dict(table_key -> list_of_records)
    returns: dict(table_key -> schema_rows)
    """
    sub_map = {r["id"]: r for r in submissions_rows if isinstance(r, dict)}
    child_schema = {}

    for table_key, records in child_tables_records.items():
        qids = extract_question_ids_from_child_records(records, id_to_desc)

        used_names = set()
        rows = []

        for qid in sorted(qids):
            template = sub_map.get(qid)
            if template:
                item = template.copy()
                item["name"] = unique_name(to_snake(clean_name(id_to_desc.get(qid, qid))), used_names)
                rows.append(item)
            else:
                name = unique_name(to_snake(clean_name(id_to_desc.get(qid, qid))), used_names)
                rows.append({
                    "id": qid,
                    "name": name,
                    "multiple_options": False,
                    "datatype": "varchar",
                    "nullable": True,
                    "predefined": "",
                    "length": cfg["string_length"],
                    "is_lookup": False,
                    "lookup_reference": ""
                })

        # meta link row (kept, but now NOT hardcoded to int)
        rows.append({
            "id": "uuid",
            "name": "Submission-id",
            "multiple_options": False,
            "datatype": "varchar",  # safer across systems
            "nullable": False,
            "predefined": False,
            "length": cfg["string_length"],
            "is_lookup": False,
            "lookup_reference": ""
        })

        child_schema[table_key] = rows

    return child_schema


# ==========================================================
# ✅ Dynamic "family" combined (NEW) – merges ALL child tables
# ==========================================================
def chunk_by_submission_id(records):
    grouped = defaultdict(list)
    for r in records:
        if not isinstance(r, dict):
            continue
        sid = r.get("__Submissions-id")
        if sid:
            grouped[sid].append(r)
    return grouped


def merge_children_by_submission_id(child_tables_records):
    """
    Returns:
      merged[sid] = { table_key: [...records...] , ... }
    """
    chunks = {k: chunk_by_submission_id(v) for k, v in child_tables_records.items()}
    all_ids = set()
    for m in chunks.values():
        all_ids |= set(m.keys())

    merged = {}
    for sid in all_ids:
        merged[sid] = {k: chunks[k].get(sid, []) for k in chunks.keys()}
    return merged


# ==========================================================
# ✅ MAIN RUN (dynamic end-to-end)
# ==========================================================
def run_dynamic(CFG, URLS):
    print(" Running Full Pipeline (Dynamic tables)")
    print(" FORM_ID:", CFG["form_id"])

    # Dictionary
    id_to_desc, id_to_options, id_to_multiple = parse_dictionary(CFG["dictionary_file"])
    # your function unchanged

    # --- Fetch submissions
    submissions_payload = api_get(URLS["submissions"], CFG["auth"])
    submissions_records = get_records(submissions_payload)

    # --- Discover child tables from URLS (everything except submissions)
    child_urls = {k: v for k, v in URLS.items() if k != "submissions"}

    # --- Fetch child tables dynamically
    child_tables_records = {}
    for table_key, table_url in child_urls.items():
        print(f"Fetching {table_key} -> {table_url}")
        payload = api_get(table_url, CFG["auth"])
        child_tables_records[table_key] = get_records(payload)

    # ======================================================
    # ✅ Excel outputs (dynamic)
    # ======================================================
    # submissions flat (kept from cfg)
    subs_xlsx = CFG["out_excel"]["submissions_flat"]
    ensure_dir(subs_xlsx)
    pd.json_normalize([flatten_record(x) for x in submissions_records]).to_excel(subs_xlsx, index=False)

    # child xlsx (dynamic)
    child_excel_outputs = {}  # store actual paths for logging
    for table_key, records in child_tables_records.items():
        out_path = excel_path_for_table(CFG, table_key)
        ensure_dir(out_path)
        pd.json_normalize(records).to_excel(out_path, index=False)
        child_excel_outputs[table_key] = out_path

    # ======================================================
    # ✅ Glossary / Summary / dim_mat (same)
    # ======================================================
    glossary = build_glossary(submissions_records, id_to_desc, CFG)
    summary_cleaned = build_summary_cleaned(glossary, CFG["summary_min_count"])
    dim_mat = build_dim_mat_dynamic(glossary, table_name="submissions")


    # ======================================================
    # ✅ Submission_Table JSON (dynamic child_table)
    # ======================================================
    submissions_rows = build_submissions_schema(
        id_to_desc,
        id_to_options,
        id_to_multiple,
        submissions_records,
        CFG
    )

    lookup_tables = build_lookup_tables(submissions_rows, id_to_options)
    child_table_schema = build_child_table_schema_dynamic(child_tables_records, id_to_desc, submissions_rows, CFG)

    submission_table = {
        "Schema": {
            "Tables": {
                "submissions": submissions_rows
            },
            "Lookup_Tables": lookup_tables,
            "Child_Table": child_table_schema
        }
    }
    # ✅ ADD submission_id row at start of EVERY child table (ONLY ONCE)
    default_submission_id_row = {
        "id": "submission_id",
        "name": "submission_id",
        "multiple_options": False,
        "datatype": "varchar",
        "nullable": False,
        "predefined": "",
        "length": 255,
        "is_lookup": False,
        "lookup_reference": ""
    }

    child_tables = submission_table["Schema"].get("Child_Table", {})

    for child_name, rows in child_tables.items():
        if not isinstance(rows, list):
            continue

        # ✅ remove old if exists (avoid duplicates)
        rows = [r for r in rows if isinstance(r, dict) and r.get("id") != "submission_id"]

        # ✅ add at start
        rows.insert(0, default_submission_id_row)

        child_tables[child_name] = rows

    submission_table["Schema"]["Child_Table"] = child_tables

    # ✅ ADD __id/uuid row at start of submissions (ONLY ONCE)
    default_uuid_row = {
        "id": "__id",
        "name": "uuid",
        "multiple_options": False,
        "datatype": "varchar",
        "nullable": False,
        "predefined": "",
        "length": 255,
        "is_lookup": False,
        "lookup_reference": ""
    }

    subs = submission_table["Schema"]["Tables"]["submissions"]

    # ✅ Remove if already exists (avoid duplicates)
    subs = [r for r in subs if isinstance(r, dict) and r.get("id") != "__id"]

    # ✅ Insert at start
    subs.insert(0, default_uuid_row)

    submission_table["Schema"]["Tables"]["submissions"] = subs

    # ======================================================
    # ✅ Family combined JSON (dynamic)
    # ======================================================
    family = merge_children_by_submission_id(child_tables_records)



    # ======================================================
    # ✅ Save JSON outputs (same)
    # ======================================================
    for _, out_path in CFG["out_json"].items():
        ensure_dir(out_path)

    with open(CFG["out_json"]["glossary"], "w", encoding="utf-8") as f:
        json.dump(glossary, f, ensure_ascii=False, indent=2)

    with open(CFG["out_json"]["submission_table"], "w", encoding="utf-8") as f:
        json.dump(submission_table, f, ensure_ascii=False, indent=2)

    with open(CFG["out_json"]["family"], "w", encoding="utf-8") as f:
        json.dump(family, f, ensure_ascii=False, indent=2)

    with open(CFG["out_json"]["dim_mat"], "w", encoding="utf-8") as f:
        json.dump(dim_mat, f, ensure_ascii=False, indent=2)



    # ======================================================
    # ✅ Logs
    # ======================================================
    print("\n DONE Outputs Created:")
    print("XLSX:", subs_xlsx)
    for k, p in child_excel_outputs.items():
        print("XLSX:", p)

    for k, v in CFG["out_json"].items():
        print("JSON:", v)




    test_id = list(id_to_desc.keys())[0]


# Call it
run_dynamic(CFG, URLS)
