import re, os

base = r"C:\Users\dohertj2\Desktop\gr"
ddl_dir = os.path.join(base, "ddl", "tables")
os.makedirs(ddl_dir, exist_ok=True)

with open(os.path.join(base, "table_dump_raw.txt"), "r", encoding="utf-8") as f:
    content = f.read()

tables = re.findall(r'---TABLE_START:(.+?)---(.+?)---TABLE_END:\1---', content, re.DOTALL)

schema_lines = ["# Schema Reference\n\nGenerated from the ZB (Galaxy Repository) database.\n\n## Tables\n"]

for tbl_name, tbl_content in tables:
    tbl_name = tbl_name.strip()
    
    # Parse columns
    sections = tbl_content.strip().split("COLUMN_NAME")
    cols = []
    pk_cols = []
    fks = []
    
    # Parse column definitions (first section after header)
    if len(sections) >= 2:
        col_section = sections[1]
        lines = [l.strip() for l in col_section.split("\n") if l.strip() and not l.strip().startswith("---")]
        for line in lines:
            parts = [p.strip() for p in line.split("|")]
            if len(parts) >= 6 and parts[0] not in ("", "COLUMN_NAME") and not all(c in "-|" for c in parts[0]):
                col_name = parts[0]
                data_type = parts[1]
                char_len = parts[2] if parts[2] != "NULL" else None
                num_prec = parts[3] if parts[3] != "NULL" else None
                num_scale = parts[4] if parts[4] != "NULL" else None
                nullable = parts[5]
                default_val = parts[6] if len(parts) > 6 and parts[6] != "NULL" else None
                
                # Build type string
                if char_len:
                    type_str = f"{data_type}({char_len})"
                elif num_prec and data_type not in ("int", "smallint", "bigint", "tinyint", "bit"):
                    type_str = f"{data_type}({num_prec},{num_scale})"
                else:
                    type_str = data_type
                
                cols.append({
                    "name": col_name,
                    "type": type_str,
                    "nullable": nullable,
                    "default": default_val
                })
    
    # Parse PK columns (second COLUMN_NAME section)
    if len(sections) >= 3:
        pk_section = sections[2]
        lines = [l.strip() for l in pk_section.split("\n") if l.strip() and not l.strip().startswith("---") and not l.strip().startswith("fk_")]
        for line in lines:
            parts = [p.strip() for p in line.split("|")]
            pk_candidate = parts[0]
            if pk_candidate and pk_candidate not in ("", "COLUMN_NAME") and not all(c in "-|" for c in pk_candidate):
                # Stop if we hit the FK header
                if "ref_table" in line or "fk_column" in line:
                    break
                pk_cols.append(pk_candidate)
    
    # Parse FKs
    fk_matches = re.findall(r'fk_column\|ref_table\|ref_column\|fk_name\n-+\|.+\n(.*?)(?=---TABLE_END|$)', tbl_content, re.DOTALL)
    if fk_matches:
        for fk_block in fk_matches:
            for line in fk_block.strip().split("\n"):
                line = line.strip()
                if line and "|" in line:
                    parts = [p.strip() for p in line.split("|")]
                    if len(parts) >= 3 and parts[0] and not all(c in "-" for c in parts[0]):
                        fks.append({"col": parts[0], "ref_table": parts[1], "ref_col": parts[2]})
    
    # Write DDL file
    ddl_lines = [f"-- Table: {tbl_name}\n"]
    ddl_lines.append(f"CREATE TABLE [{tbl_name}] (")
    col_defs = []
    for c in cols:
        line = f"    [{c['name']}] {c['type']}"
        if c['nullable'] == 'NO':
            line += " NOT NULL"
        else:
            line += " NULL"
        if c['default']:
            line += f" DEFAULT {c['default']}"
        col_defs.append(line)
    if pk_cols:
        col_defs.append(f"    CONSTRAINT [PK_{tbl_name}] PRIMARY KEY ({', '.join('[' + c + ']' for c in pk_cols)})")
    ddl_lines.append(",\n".join(col_defs))
    ddl_lines.append(");\nGO\n")
    
    if fks:
        for fk in fks:
            ddl_lines.append(f"ALTER TABLE [{tbl_name}] ADD FOREIGN KEY ([{fk['col']}]) REFERENCES [{fk['ref_table']}] ([{fk['ref_col']}]);")
        ddl_lines.append("GO\n")
    
    with open(os.path.join(ddl_dir, f"{tbl_name}.sql"), "w", encoding="utf-8") as f:
        f.write("\n".join(ddl_lines))
    
    # Build schema.md entry
    schema_lines.append(f"### {tbl_name}\n")
    schema_lines.append("| Column | Type | Nullable | Notes |")
    schema_lines.append("|--------|------|----------|-------|")
    for c in cols:
        notes = []
        if c['name'] in pk_cols:
            notes.append("PK")
        for fk in fks:
            if fk['col'] == c['name']:
                notes.append(f"FK -> {fk['ref_table']}.{fk['ref_col']}")
        if c['default']:
            notes.append(f"Default: {c['default']}")
        schema_lines.append(f"| {c['name']} | {c['type']} | {c['nullable']} | {' '.join(notes)} |")
    schema_lines.append("")

print(f"Processed {len(tables)} tables")

with open(os.path.join(base, "schema_tables.md"), "w", encoding="utf-8") as f:
    f.write("\n".join(schema_lines))