import requests
import json
import re
import argparse

# ─────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────
AUTH_URL = ""
EMAIL = ""
PASSWORD = ""
OUTPUT_FILE = "schema_output.json"


# ─────────────────────────────────────────────


def fetch_data(auth_url, email, password, auth_mode="credentials", token="", request_method="POST", timeout_s=60):
    """Fetch data from API using either credentials or bearer token auth."""
    print("[1/2] Fetching data...")
    method = (request_method or "POST").upper()
    auth_mode = (auth_mode or "credentials").lower()

    headers = {}
    payload = None

    if auth_mode == "token":
        if token:
            headers["Authorization"] = f"Bearer {token}"
    else:
        payload = {"email": email, "password": password}

    if method == "GET":
        res = requests.get(auth_url, headers=headers, timeout=timeout_s)
    else:
        res = requests.post(auth_url, json=payload, headers=headers, timeout=timeout_s)

    res.raise_for_status()
    raw = res.json()

    if isinstance(raw, dict) and "data" in raw:
        records = raw["data"]
        print(f"   [OK] {raw.get('records', len(records))} total record(s) reported.")
        print(f"   [OK] {len(records)} record(s) in this response.")
        return records
    elif isinstance(raw, list):
        print(f"   [OK] {len(raw)} record(s) fetched.")
        return raw
    else:
        print("[WARN] Unexpected response shape:")
        print(json.dumps(raw, indent=2)[:500])
        return []


def infer_datatype(value):
    """Infer datatype and normalize all date/time → timestamp"""
    if value is None:
        return "varchar"
    if isinstance(value, bool):
        return "boolean"
    if isinstance(value, int):
        return "int"
    if isinstance(value, float):
        return "decimal"
    if isinstance(value, list):
        return "array"
    if isinstance(value, dict):
        return "json"

    s = str(value).strip()

    # ── Date / Time / Datetime → timestamp ──
    # Comprehensive patterns for dates, times, and datetimes
    date_patterns = [
        r"^\d{4}-\d{2}-\d{2}$",  # 2024-01-01
        r"^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}",  # 2024-01-01 12:00:00 or 2024-01-01T12:00:00
        r"^\d{4}/\d{2}/\d{2}",  # 2024/01/01
        r"^\d{2}-\d{2}-\d{4}",  # 01-12-2024 or 12-01-2024
        r"^\d{2}/\d{2}/\d{4}",  # 01/12/2024 or 12/01/2024
        r"^\d{1,2}-\d{1,2}-\d{4}",  # 1-1-2024
        r"^\d{1,2}/\d{1,2}/\d{4}",  # 1/1/2024
        r"^\d{2}:\d{2}:\d{2}",  # 12:00:00
        r"^\d{1,2}:\d{2}(:\d{2})?(\s?[AP]M)?$",  # 12:00 or 12:00:00 or 12:00 PM
        r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z?$",  # ISO 8601 with milliseconds
        r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2}$",  # ISO 8601 with timezone
    ]

    for pattern in date_patterns:
        if re.match(pattern, s, re.IGNORECASE):
            return "timestamp"

    return "varchar"


def is_lookup_field(key, seen_values):
    """True if all non-null values look like small integer codes."""
    non_null = [v for v in seen_values if v is not None and v != ""]
    if not non_null:
        return False
    return all(isinstance(v, str) and re.match(r"^\d{1,2}$", v) for v in non_null)


def make_field(field_id, key, seen_values):
    first_val = next((v for v in seen_values if v is not None), None)
    nullable = any(v is None or v == "" for v in seen_values)
    datatype = infer_datatype(first_val)
    lookup = is_lookup_field(key, seen_values)

    # ✅ Infer multiple options from data (e.g. "1 2 3")
    non_null_vals = [str(v).strip() for v in seen_values if v is not None and str(v).strip() != ""]
    is_multiple = any(" " in v for v in non_null_vals)

    if is_multiple:
        datatype = "varchar"  # force text/varchar for multiple options

    return {
        "id": field_id,
        "name": key,
        "multiple_options": is_multiple,
        "datatype": datatype,
        "nullable": nullable,
        "predefined": "",
        "length": 255 if datatype == "varchar" else None,
        "is_lookup": lookup,
        "lookup_reference": f"{field_id}_lookup" if lookup else ""
    }


def build_schema(records):
    print("[2/2] Building schema...")

    flat_values = {}
    child_values = {}

    for record in records:
        for key, value in record.items():
            if isinstance(value, list) and value and isinstance(value[0], dict):
                child_values.setdefault(key, {})
                for row in value:
                    for ck, cv in row.items():
                        child_values[key].setdefault(ck, []).append(cv)
            else:
                flat_values.setdefault(key, []).append(value)

    # ── Submissions table ──
    submissions = []

    lookup_tables = []

    for key, values in flat_values.items():
        if key in ("__id", "uuid", "_id"):
            continue

        field = make_field(key, key, values)
        submissions.append(field)

        if field["is_lookup"]:
            lookup_tables.append({
                "table_name": field["lookup_reference"],
                "Name": {"options": {}},
                "dtype": "varchar"
            })

    # ── Child tables ──
    child_tables = {}

    for tname, fields_map in child_values.items():
        rows = [
            {"id": "__id", "name": "uuid", "multiple_options": False,
             "datatype": "varchar", "nullable": False, "predefined": "",
             "length": 255, "is_lookup": False, "lookup_reference": ""},

            {"id": "__Submissions-id", "name": "submission_id",
             "multiple_options": False, "datatype": "varchar", "nullable": False,
             "predefined": "", "length": 255, "is_lookup": False, "lookup_reference": ""}
        ]

        for ck, cvalues in fields_map.items():
            field = make_field(ck, ck, cvalues)
            rows.append(field)

            if field["is_lookup"]:
                lookup_tables.append({
                    "table_name": field["lookup_reference"],
                    "Name": {"options": {}},
                    "dtype": "varchar"
                })

        child_tables[tname] = rows

    return {
        "Schema": {
            "tables": {"submissions": submissions},
            "lookup_tables": lookup_tables,
            "child_tables": child_tables
        }
    }


def normalize_schema_datatypes(schema):
    """Normalize legacy date/datetime labels to timestamp before saving."""
    if isinstance(schema, dict):
        normalized = {}
        for key, value in schema.items():
            if key == "datatype" and value in ("date", "datetime"):
                normalized[key] = "timestamp"
            else:
                normalized[key] = normalize_schema_datatypes(value)
        return normalized

    if isinstance(schema, list):
        return [normalize_schema_datatypes(item) for item in schema]

    return schema


def parse_args():
    parser = argparse.ArgumentParser(description="Build schema JSON from API response")
    parser.add_argument("--url", default=AUTH_URL, help="API URL")
    parser.add_argument("--email", default=EMAIL)
    parser.add_argument("--password", default=PASSWORD)
    parser.add_argument("--auth_mode", default="credentials", choices=["credentials", "token"])
    parser.add_argument("--token", default="")
    parser.add_argument("--request_method", default="POST", choices=["GET", "POST"])
    parser.add_argument("--timeout_s", default=60, type=int)
    parser.add_argument("--output", default=OUTPUT_FILE)
    return parser.parse_args()


def main():
    try:
        args = parse_args()

        if not args.url.strip():
            print("\n[ERROR] API URL is required.")
            return

        if args.auth_mode == "credentials":
            if not args.email.strip() or not args.password.strip():
                print("\n[ERROR] Email and password required.")
                return

        if args.auth_mode == "token":
            if not args.token.strip():
                print("\n[ERROR] Token required.")
                return

        records = fetch_data(
            auth_url=args.url,
            email=args.email,
            password=args.password,
            auth_mode=args.auth_mode,
            token=args.token,
            request_method=args.request_method,
            timeout_s=args.timeout_s,
        )

        if not records:
            print("\n[ERROR] No data received.")
            return

        schema = build_schema(records)
        schema = normalize_schema_datatypes(schema)

        with open(args.output, "w", encoding="utf-8") as f:
            json.dump(schema, f, indent=2, ensure_ascii=False)

        print(f"\n[OK] Schema saved to: {args.output}")
        print(f"  • Submission fields : {len(schema['Schema']['tables']['submissions'])}")
        print(f"  • Child tables      : {len(schema['Schema']['child_tables'])}")
        print(f"  • Lookup tables     : {len(schema['Schema']['lookup_tables'])}")

    except requests.exceptions.HTTPError as e:
        print(f"\n[ERROR] HTTP Error: {e.response.status_code} - {e.response.text}")
    except requests.exceptions.ConnectionError:
        print("\n[ERROR] Connection error.")
    except Exception as e:
        import traceback
        print(f"\n[ERROR] Unexpected error: {e}")
        traceback.print_exc()


if __name__ == "__main__":
    main()
