"""
csv_to_json.py
Converts an Artemis II CSV file (downloaded from JPL Horizons or exported
from the tracker) into artemis2_data.json for the HTML tracker.

Usage:
    python3 csv_to_json.py artemis2_2026-04-02.csv

The CSV must have these columns (the tracker's Export CSV produces this format):
    time_utc, x_km, y_km, z_km, vx_kms, vy_kms, vz_kms,
    range_km, altitude_km, speed_kms,
    perigee_alt_km, apogee_alt_km, eccentricity,
    semi_major_axis_km, inclination_deg

If you have a raw Horizons CSV (from the web app or API with CSV_FORMAT=YES)
the script will also handle that automatically.
"""

import csv
import json
import math
import os
import sys
from datetime import datetime, timezone

EARTH_RADIUS_KM = 6371.0
OUTPUT_FILE     = 'artemis2_data.json'


def safe_float(val, default=None):
    try:
        f = float(val)
        return f if math.isfinite(f) else default
    except (ValueError, TypeError):
        return default


def parse_tracker_csv(rows):
    """Parse CSV that was exported from the tracker (our own format)."""
    vectors  = []
    elements = []

    for row in rows:
        # Skip blank or malformed rows
        if not row.get('time_utc', '').strip():
            continue

        x  = safe_float(row.get('x_km'))
        y  = safe_float(row.get('y_km'))
        z  = safe_float(row.get('z_km'))
        vx = safe_float(row.get('vx_kms'))
        vy = safe_float(row.get('vy_kms'))
        vz = safe_float(row.get('vz_kms'))

        if None in (x, y, z, vx, vy, vz):
            continue

        rng   = safe_float(row.get('range_km'))   or math.sqrt(x*x + y*y + z*z)
        speed = safe_float(row.get('speed_kms'))  or math.sqrt(vx*vx + vy*vy + vz*vz)

        vectors.append({
            'time':      row['time_utc'].strip(),
            'x_km':      round(x,  3),
            'y_km':      round(y,  3),
            'z_km':      round(z,  3),
            'vx_kms':    round(vx, 6),
            'vy_kms':    round(vy, 6),
            'vz_kms':    round(vz, 6),
            'range_km':  round(rng,   3),
            'speed_kms': round(speed, 6),
        })

        # Build elements record if the columns are present
        ec  = safe_float(row.get('eccentricity'))
        qr  = safe_float(row.get('perigee_dist_km'))
        pa  = safe_float(row.get('perigee_alt_km'))
        aa  = safe_float(row.get('apogee_alt_km'))
        sma = safe_float(row.get('semi_major_axis_km'))
        inc = safe_float(row.get('inclination_deg'))

        # Reconstruct missing fields where possible
        if pa is not None and qr is None:
            qr = pa + EARTH_RADIUS_KM
        if aa is not None and ec is not None:
            ad = aa + EARTH_RADIUS_KM
        elif qr is not None and ec is not None and sma is not None:
            ad = sma * (1 + ec) if ec < 1.0 else None
        else:
            ad = None

        if ec is not None:
            hyperbolic = ec >= 1.0
            elements.append({
                'time':               row['time_utc'].strip(),
                'eccentricity':       round(ec, 8),
                'semi_major_axis_km': round(sma, 3) if sma else None,
                'perigee_dist_km':    round(qr,  3) if qr  else None,
                'apogee_dist_km':     round(ad,  3) if ad  else None,
                'perigee_alt_km':     round(pa,  3) if pa  else None,
                'apogee_alt_km':      round(aa,  3) if (aa and not hyperbolic) else None,
                'inclination_deg':    round(inc, 6) if inc else None,
                'hyperbolic':         hyperbolic,
            })

    return vectors, elements


def parse_horizons_csv(rows):
    """
    Parse a raw Horizons vector table exported with CSV_FORMAT=YES.
    Column order: JDTDB, Cal_Date, X, Y, Z, VX, VY, VZ, [LT, RG, RR]
    """
    vectors = []
    for row in rows:
        # Horizons CSV rows have numeric first column (Julian date)
        keys = list(row.keys())
        if not keys:
            continue
        first = keys[0].strip().lstrip('\ufeff')
        try:
            float(row[keys[0]])  # first col should be JDTDB
        except (ValueError, TypeError):
            continue

        # Column 2 is the calendar date/time
        time_col = keys[1] if len(keys) > 1 else None
        if not time_col:
            continue

        try:
            x  = float(row[keys[2]])
            y  = float(row[keys[3]])
            z  = float(row[keys[4]])
            vx = float(row[keys[5]])
            vy = float(row[keys[6]])
            vz = float(row[keys[7]])
        except (ValueError, IndexError):
            continue

        vectors.append({
            'time':      row[time_col].strip(),
            'x_km':      round(x,  3),
            'y_km':      round(y,  3),
            'z_km':      round(z,  3),
            'vx_kms':    round(vx, 6),
            'vy_kms':    round(vy, 6),
            'vz_kms':    round(vz, 6),
            'range_km':  round(math.sqrt(x*x + y*y + z*z), 3),
            'speed_kms': round(math.sqrt(vx*vx + vy*vy + vz*vz), 6),
        })

    return vectors, []   # raw Horizons CSV has no elements columns


def detect_format(fieldnames):
    """Return 'tracker' or 'horizons' based on CSV column names."""
    names = [f.strip().lstrip('\ufeff').lower() for f in fieldnames]
    if 'time_utc' in names or 'x_km' in names:
        return 'tracker'
    return 'horizons'


def find_current_idx(vectors):
    """Find the vector record closest to right now."""
    now_ts = datetime.now(timezone.utc).timestamp()
    best_idx, best_diff = 0, float('inf')
    for i, v in enumerate(vectors):
        t = parse_time(v['time'])
        if t is None:
            continue
        diff = abs(t - now_ts)
        if diff < best_diff:
            best_diff = diff
            best_idx  = i
    return best_idx


def parse_time(time_str):
    """
    Parse any of the time formats that might appear in the CSV.
    Returns a UTC Unix timestamp float, or None on failure.

    Handled formats:
      - "A.D. 2026-Apr-02 18:00:00.0000 TDB"   (raw Horizons)
      - "2026-Apr-02 18:00:00.0000 TDB"
      - "2026-04-02T18:00:00"                   (ISO, from tracker export)
      - "2026-04-02 18:00:00"
    """
    s = time_str.strip()
    s = s.replace('A.D. ', '').replace(' TDB', '').replace(' UT', '').strip()

    MON = {
        'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6,
        'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12
    }

    # Try "2026-Apr-02 18:00:00.0000"
    for fmt in ('%Y-%b-%d %H:%M:%S.%f', '%Y-%b-%d %H:%M:%S', '%Y-%b-%d %H:%M'):
        try:
            return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc).timestamp()
        except ValueError:
            pass

    # Try ISO formats
    for fmt in ('%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S',
                '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M'):
        try:
            return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc).timestamp()
        except ValueError:
            pass

    return None


def convert(csv_path):
    print(f"Reading {csv_path}...")

    with open(csv_path, newline='', encoding='utf-8-sig') as f:
        # Peek at first few lines to detect if there's a Horizons header block
        raw = f.read()

    # Strip Horizons header (everything before $$SOE) if present
    if '$$SOE' in raw:
        print("  Detected raw Horizons output - stripping header...")
        data_block = raw.split('$$SOE')[1].split('$$EOE')[0].strip()
        # Re-wrap as a parseable CSV
        import io
        reader = csv.DictReader(io.StringIO(data_block))
        rows = list(reader)
        fmt  = 'horizons_raw'
    else:
        reader = csv.DictReader(io.StringIO(raw) if False else open(csv_path, newline='', encoding='utf-8-sig'))
        rows = list(reader)
        fmt  = detect_format(reader.fieldnames or [])

    print(f"  Format detected: {fmt}")
    print(f"  Rows found: {len(rows)}")

    if fmt == 'tracker':
        vectors, elements = parse_tracker_csv(rows)
    else:
        vectors, elements = parse_horizons_csv(rows)

    if not vectors:
        print("ERROR: No valid vector records found in CSV.")
        print("  Check that the file has x_km/y_km/z_km columns (tracker export)")
        print("  or is a raw Horizons CSV_FORMAT=YES output.")
        return False

    current_idx = find_current_idx(vectors)
    current     = vectors[current_idx]
    alt         = current['range_km'] - EARTH_RADIUS_KM

    # Split into past / future relative to now (what the HTML expects)
    now_ts = datetime.now(timezone.utc).timestamp()
    past   = [v for v in vectors if (parse_time(v['time']) or 0) <= now_ts]
    future = [v for v in vectors if (parse_time(v['time']) or 0) >  now_ts]

    # Find nearest elements record to current position
    current_elements = None
    if elements:
        cur_ts = parse_time(current['time']) or now_ts
        current_elements = min(elements, key=lambda e: abs((parse_time(e['time']) or 0) - cur_ts))

    output = {
        'timestamp':        datetime.now(timezone.utc).isoformat(),
        'fetched_at':       datetime.now(timezone.utc).isoformat(),
        'source':           os.path.basename(csv_path),
        'current':          current,
        'current_elements': current_elements,
        'past':             past,
        'future':           future,
        'vectors':          vectors,   # kept for CSV export in HTML
        'elements':         elements,  # kept for CSV export in HTML
        'mission_info': {
            'name':        'Artemis II',
            'spacecraft':  'Orion / Integrity',
            'horizons_id': '-1024',
            'launched':    '2026-04-01T22:24:00Z',
        }
    }

    tmp = OUTPUT_FILE + '.tmp'
    with open(tmp, 'w') as f:
        json.dump(output, f, separators=(',', ':'))
    os.replace(tmp, OUTPUT_FILE)

    print(f"\nSuccess!")
    print(f"  Vectors:  {len(vectors)}")
    print(f"  Elements: {len(elements)}")
    print(f"  Current:  range={current['range_km']:,.0f} km  alt={alt:,.0f} km  speed={current['speed_kms']:.3f} km/s")
    print(f"  Written:  {OUTPUT_FILE}")
    return True


if __name__ == '__main__':
    import io

    if len(sys.argv) < 2:
        # Try to find a CSV automatically
        candidates = [f for f in os.listdir('.') if f.endswith('.csv') and 'artemis' in f.lower()]
        if not candidates:
            candidates = [f for f in os.listdir('.') if f.endswith('.csv')]
        if not candidates:
            print("Usage: python3 csv_to_json.py <your_artemis2_data.csv>")
            print("No CSV files found in current directory.")
            sys.exit(1)
        candidates.sort(key=os.path.getmtime, reverse=True)
        csv_path = candidates[0]
        print(f"Auto-selected: {csv_path}")
    else:
        csv_path = sys.argv[1]

    if not os.path.exists(csv_path):
        print(f"File not found: {csv_path}")
        sys.exit(1)

    ok = convert(csv_path)
    sys.exit(0 if ok else 1)