#!/usr/bin/env node
/**
 * Scrape Ghana Presidential data from Peace FM Online (www.peacefmonline.com).
 * Supports: 2000, 2000a (runoff), 2004, 2008, 2008a (runoff), 2016, 2020, 2024.
 *
 * Usage:
 *   npx playwright install chromium   # first time only
 *   node scripts/scrape-peacefm.mjs 2000
 *   node scripts/scrape-peacefm.mjs 2020
 *   node scripts/scrape-peacefm.mjs 2024
 *   node scripts/scrape-peacefm.mjs all   # scrape every year in sequence
 *
 * Output: storage/app/peacefm_{slug}_presidential.json
 */

import { chromium } from 'playwright';
import { writeFileSync, mkdirSync } from 'fs';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const BASE = join(__dirname, '..');
const BASE_URL = 'https://www.peacefmonline.com';

const SLUGS = ['2000', '2000a', '2004', '2008', '2008a', '2016', '2020', '2024'];

function getPagesForSlug(slug) {
  if (slug === '2024') {
    return [
      { key: 'president', url: `${BASE_URL}/elections/2024/president` },
      { key: 'swing', url: `${BASE_URL}/elections/2024/president/analyses/swing-constituencies` },
      { key: 'marginal', url: `${BASE_URL}/elections/2024/president/analyses/marginal` },
      { key: 'regional_analysis', url: `${BASE_URL}/elections/2024/president/analyses/region-results` },
      { key: 'region_turnout', url: `${BASE_URL}/elections/2024/president/analyses/region-turnout` },
      { key: 'rejected_ballots', url: `${BASE_URL}/elections/2024/president/analyses/region-rejected-ballots` },
    ];
  }
  const base = `${BASE_URL}/pages/${slug}`;
  return [
    { key: 'president', url: `${base}/president` },
    { key: 'swing', url: `${base}/president/swing` },
    { key: 'marginal', url: `${base}/president/marginal` },
    { key: 'regional_analysis', url: `${base}/analysis/president` },
    { key: 'constituency_analysis', url: `${base}/analysis/president/constituency` },
  ];
}

async function extractPageData(page) {
  const tables = await page.evaluate(() => {
    const out = [];
    document.querySelectorAll('table').forEach((t, i) => {
      const rows = [];
      t.querySelectorAll('tr').forEach((tr) => {
        const cells = Array.from(tr.querySelectorAll('td, th')).map((c) => c.innerText.trim());
        if (cells.some(Boolean)) rows.push(cells);
      });
      if (rows.length) out.push({ index: i, rows });
    });
    return out;
  });
  const mainText = await page.evaluate(() => {
    const sel = document.querySelector('main') || document.querySelector('article') || document.body;
    return sel ? sel.innerText : '';
  });
  return { tables, mainText };
}

function parseNationalFromText(mainText) {
  const national = { npp: null, ndc: null };
  const pctMatch = mainText.match(/([\d.]+)\s*%\s*([\d.]+)\s*%\s*([\d,]+)\s*votes?\s*([\d,]+)\s*votes?/);
  if (pctMatch) {
    const nppPct = parseFloat(pctMatch[1]);
    const ndcPct = parseFloat(pctMatch[2]);
    const nppVotes = parseInt(String(pctMatch[3]).replace(/,/g, ''), 10);
    const ndcVotes = parseInt(String(pctMatch[4]).replace(/,/g, ''), 10);
    national.npp = { votes: nppVotes, percentage: nppPct };
    national.ndc = { votes: ndcVotes, percentage: ndcPct };
    return national;
  }
  const nppRe = /NPP[\s\S]*?(\d{1,3}(?:,\d{3})*)\s*votes?\s*\(?([\d.]+)\s*%?\)?/i;
  const ndcRe = /NDC[\s\S]*?(\d{1,3}(?:,\d{3})*)\s*votes?\s*\(?([\d.]+)\s*%?\)?/i;
  let m = nppRe.exec(mainText);
  if (m) national.npp = { votes: parseInt(m[1].replace(/,/g, ''), 10), percentage: parseFloat(m[2]) };
  m = ndcRe.exec(mainText);
  if (m) national.ndc = { votes: parseInt(m[1].replace(/,/g, ''), 10), percentage: parseFloat(m[2]) };
  return national;
}

/** Parse national from president table (Candidate, Votes, Percent). */
function parseNationalFromTable(tables) {
  const national = { npp: null, ndc: null };
  for (const t of tables || []) {
    const rows = t.rows || [];
    if (rows.length < 2) continue;
    const votesCol = 2;
    const pctCol = 3;
    for (let i = 1; i < rows.length; i++) {
      const row = rows[i];
      const candidate = (row[0] || '').toLowerCase();
      const votes = parseInt(String(row[votesCol] || '').replace(/,/g, ''), 10) || 0;
      const pct = parseFloat(String(row[pctCol] || '').replace('%', '')) || 0;
      if (votes <= 0) continue;
      if (candidate.includes('mahama') || candidate.includes('john dramani')) {
        national.ndc = { votes, percentage: pct };
      } else if (candidate.includes('bawumia') || candidate.includes('mahamudu') || candidate.includes('akufo-addo') || (candidate.includes('nana') && candidate.includes('akufo'))) {
        national.npp = { votes, percentage: pct };
      }
    }
    if (national.npp || national.ndc) return national;
  }
  return national;
}

const REGION_ORDER = ['Ahafo', 'Ashanti', 'Bono', 'Bono East', 'Central', 'Eastern', 'Greater Accra', 'North East', 'Northern', 'Oti', 'Savannah', 'Upper East', 'Upper West', 'Volta', 'Western', 'Western North'];

function parseRegionalVotesFromTables(tables) {
  const rows = [];
  tables.forEach((t, tableIndex) => {
    const regionName = REGION_ORDER[tableIndex] || `Region_${tableIndex}`;
    let nppVotes = 0, nppPct = 0, ndcVotes = 0, ndcPct = 0;
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const label = (row[0] || '').toLowerCase();
      const votes = parseInt(String(row[1] || row[2] || '').replace(/,/g, ''), 10) || 0;
      const pct = parseFloat(String(row[2] || row[3] || '').replace('%', '')) || 0;
      if (label.includes('nana') || label.includes('akufo') || label.includes('npp') || (row[1] && String(row[1]).includes('NPP'))) {
        nppVotes = votes; nppPct = pct;
      } else if (label.includes('mahama') || label.includes('john') || label.includes('ndc') || (row[1] && String(row[1]).includes('NDC'))) {
        ndcVotes = votes; ndcPct = pct;
      }
    }
    if (nppVotes > 0 || ndcVotes > 0) {
      rows.push({ region: regionName, npp_votes: nppVotes, npp_pct: nppPct, ndc_votes: ndcVotes, ndc_pct: ndcPct });
    }
  });
  return rows;
}

function parseSwingListFromTables(tables) {
  const list = [];
  for (const t of tables || []) {
    const headers = (t.rows[0] || []).map((c) => c.toLowerCase());
    const constituencyIdx = headers.findIndex((h) => h.includes('constituency'));
    const regionIdx = headers.findIndex((h) => h.includes('region'));
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const constituency = constituencyIdx >= 0 ? (row[constituencyIdx] || '').trim() : (row[0] || '').trim();
      const region = regionIdx >= 0 ? (row[regionIdx] || '').trim() : (row[2] || '').trim();
      if (!constituency || constituency === 'Constituency' || constituency === '---') continue;
      list.push({
        no: list.length + 1,
        constituency,
        region,
        party_prev: row[3] || row[4] || '',
        pct_prev: parseFloat(String(row[3] || row[4] || '').replace('%', '')) || 0,
        party_curr: row[5] || row[6] || '',
        pct_curr: parseFloat(String(row[5] || row[6] || '').replace('%', '')) || 0,
      });
    }
  }
  return list;
}

function parseMarginalFromTables(tables) {
  const list = [];
  for (const t of tables || []) {
    const header = (t.rows[0] || []).map((c) => c.toLowerCase()).join(' ');
    if (!header.includes('constituency') && !header.includes('vote')) continue;
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const constituency = (row[0] || '').trim();
      const voteDiff = (row[1] || row[2] || '').trim();
      if (!constituency || constituency === '---') continue;
      list.push({ constituency, vote_difference: voteDiff });
    }
  }
  return list;
}

const REGION_NAMES = new Set(REGION_ORDER);

function parseRegionalComparisonFromTables(tables) {
  const rows = [];
  for (const t of tables || []) {
    for (const row of t.rows || []) {
      const first = (row[0] || '').trim();
      if (!REGION_NAMES.has(first)) continue;
      const nums = row.slice(1).map((c) => parseFloat(String(c).replace('%', '').replace('+', '')) || 0);
      if (nums.length >= 6) {
        rows.push({
          region: first,
          ndc_1: nums[0],
          ndc_2: nums[1],
          ndc_gl: nums[2],
          npp_1: nums[3],
          npp_2: nums[4],
          npp_gl: nums[5],
        });
      }
    }
  }
  return rows;
}

async function scrapeSlug(slug) {
  const PAGES = getPagesForSlug(slug);
  const OUT_PATH = join(BASE, 'storage', 'app', `peacefm_${slug}_presidential.json`);
  const output = {
    source: 'www.peacefmonline.com',
    scraped_at: new Date().toISOString(),
    slug,
    election_type: 'presidential',
    pages: {},
  };

  let browser;
  try {
    browser = await chromium.launch({ headless: true });
    const page = await browser.newPage();
    await page.setExtraHTTPHeaders({ 'Accept-Language': 'en-GB,en;q=0.9' });
    await page.setViewportSize({ width: 1280, height: 800 });

    for (const { key, url } of PAGES) {
      console.log('  Fetching:', url);
      try {
        await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 20000 });
        await new Promise((r) => setTimeout(r, slug === '2024' ? 4000 : 3000));
        const { tables, mainText } = await extractPageData(page);
        output.pages[key] = { url, tables, mainTextSnippet: mainText.slice(0, 2500) };

        if (key === 'president') {
          let nationalData = parseNationalFromText(mainText);
          if ((!nationalData.npp && !nationalData.ndc) && tables?.length > 0) {
            nationalData = parseNationalFromTable(tables);
          }
          output.national = {
            national: nationalData,
            regional_votes: parseRegionalVotesFromTables(tables),
            raw_tables: tables,
          };
        } else if (key === 'swing') {
          output.swing = {
            swing_list: parseSwingListFromTables(tables),
            by_region: tables[0] ? tables[0].rows.slice(1).map((r) => ({ region: r[0], total: r[1], constituencies: r[2] })).filter((r) => r.region && r.region !== 'Region') : [],
            raw_tables: tables,
          };
        } else if (key === 'marginal') {
          output.marginal = {
            marginal_list: parseMarginalFromTables(tables),
            raw_tables: tables,
          };
        } else if (key === 'regional_analysis') {
          output.regional_analysis = {
            regional_comparison: parseRegionalComparisonFromTables(tables),
            raw_tables: tables,
          };
        }
      } catch (e) {
        console.warn('  Error on', url, e.message);
        output.pages[key] = { url, error: e.message };
      }
    }
  } finally {
    if (browser) await browser.close();
  }

  mkdirSync(dirname(OUT_PATH), { recursive: true });
  writeFileSync(OUT_PATH, JSON.stringify(output, null, 2), 'utf8');
  console.log('  Wrote:', OUT_PATH);
  return output;
}

async function main() {
  const arg = process.argv[2];
  const slugsToRun = arg === 'all' ? SLUGS : (arg ? [arg] : ['2020']);
  if (!SLUGS.includes(slugsToRun[0]) && slugsToRun[0] !== 'all') {
    console.log('Usage: node scripts/scrape-peacefm.mjs <slug|all>');
    console.log('Slugs:', SLUGS.join(', '));
    process.exit(1);
  }

  for (const slug of slugsToRun) {
    console.log('\nScraping Peace FM:', slug);
    await scrapeSlug(slug);
  }
  console.log('\nDone.');
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
