#!/usr/bin/env node
/**
 * Scrape 2024 Ghana Presidential data from Peace FM Online (www.peacefmonline.com).
 * Same Playwright technique as 2020 scraper.
 *
 * Usage: node scripts/scrape-peacefm-2024.mjs
 * Output: storage/app/peacefm_2024_presidential.json
 */

import { chromium } from 'playwright';
import { writeFileSync, mkdirSync } from 'fs';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const BASE = join(__dirname, '..');
const OUT_PATH = join(BASE, 'storage', 'app', 'peacefm_2024_presidential.json');

const BASE_URL = 'https://www.peacefmonline.com';
const PAGES_2024 = [
  { key: 'president', url: `${BASE_URL}/elections/2024/president` },
  { key: 'region_turnout', url: `${BASE_URL}/elections/2024/president/analyses/region-turnout` },
  { key: 'region_results', url: `${BASE_URL}/elections/2024/president/analyses/region-results` },
  { key: 'rejected_ballots', url: `${BASE_URL}/elections/2024/president/analyses/region-rejected-ballots` },
  { key: 'marginal', url: `${BASE_URL}/elections/2024/president/analyses/marginal` },
  { key: 'swing', url: `${BASE_URL}/elections/2024/president/analyses/swing-constituencies` },
  { key: 'map', url: `${BASE_URL}/elections/2024/president/analyses/map` },
];

const REGION_ORDER = ['Ahafo', 'Ashanti', 'Bono', 'Bono East', 'Central', 'Eastern', 'Greater Accra', 'North East', 'Northern', 'Oti', 'Savannah', 'Upper East', 'Upper West', 'Volta', 'Western', 'Western North'];

async function extractPageData(page) {
  const tables = await page.evaluate(() => {
    const out = [];
    document.querySelectorAll('table').forEach((t, i) => {
      const rows = [];
      t.querySelectorAll('tr').forEach((tr) => {
        const cells = Array.from(tr.querySelectorAll('td, th')).map((c) => c.innerText.trim());
        if (cells.some(Boolean)) rows.push(cells);
      });
      if (rows.length) out.push({ index: i, rows });
    });
    return out;
  });
  const mainText = await page.evaluate(() => {
    const sel = document.querySelector('main') || document.querySelector('article') || document.body;
    return sel ? sel.innerText : '';
  });
  return { tables, mainText };
}

function parseNationalFromText(mainText) {
  const national = { npp: null, ndc: null };
  // 2024 page: "2024: X votes (Y%)" or "0 votes (0%)"
  const pctMatch = mainText.match(/([\d.]+)\s*%\s*([\d.]+)\s*%\s*([\d,]+)\s*votes?\s*([\d,]+)\s*votes?/);
  if (pctMatch) {
    const nppVotes = parseInt(String(pctMatch[3]).replace(/,/g, ''), 10);
    const ndcVotes = parseInt(String(pctMatch[4]).replace(/,/g, ''), 10);
    if (nppVotes > 0 || ndcVotes > 0) {
      national.npp = { votes: nppVotes, percentage: parseFloat(pctMatch[1]) };
      national.ndc = { votes: ndcVotes, percentage: parseFloat(pctMatch[2]) };
    }
  }
  return national;
}

/** Parse national NPP/NDC from president table (Candidate, Votes, Percent). Mahama=NDC, Bawumia=NPP. */
function parseNationalFromPresidentTable(tables) {
  const national = { npp: null, ndc: null };
  for (const t of tables || []) {
    const rows = t.rows || [];
    if (rows.length < 2) continue;
    const votesCol = 2;
    const pctCol = 3;
    for (let i = 1; i < rows.length; i++) {
      const row = rows[i];
      const candidate = (row[0] || '').toLowerCase();
      const votes = parseInt(String(row[votesCol] || '').replace(/,/g, ''), 10) || 0;
      const pct = parseFloat(String(row[pctCol] || '').replace('%', '')) || 0;
      if (votes <= 0) continue;
      if (candidate.includes('mahama') || candidate.includes('john dramani')) {
        national.ndc = { votes, percentage: pct };
      } else if (candidate.includes('bawumia') || candidate.includes('mahamudu')) {
        national.npp = { votes, percentage: pct };
      }
    }
    if (national.npp || national.ndc) return national;
  }
  return national;
}

function parseRegionalVotesFromTables(tables) {
  const rows = [];
  tables.forEach((t, tableIndex) => {
    const regionName = REGION_ORDER[tableIndex] || `Region_${tableIndex}`;
    let nppVotes = 0, nppPct = 0, ndcVotes = 0, ndcPct = 0;
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const label = (row[0] || '').toLowerCase();
      const votes = parseInt(String(row[1] || row[2] || '').replace(/,/g, ''), 10) || 0;
      const pct = parseFloat(String(row[2] || row[3] || '').replace('%', '')) || 0;
      if (label.includes('nana') || label.includes('akufo') || label.includes('npp') || (row[1] && String(row[1]).includes('NPP'))) {
        nppVotes = votes; nppPct = pct;
      } else if (label.includes('mahama') || label.includes('john') || label.includes('ndc') || (row[1] && String(row[1]).includes('NDC'))) {
        ndcVotes = votes; ndcPct = pct;
      }
    }
    if (nppVotes > 0 || ndcVotes > 0) {
      rows.push({ region: regionName, npp_votes: nppVotes, npp_pct: nppPct, ndc_votes: ndcVotes, ndc_pct: ndcPct });
    }
  });
  return rows;
}

function parseSwingByRegion(tables) {
  const byRegion = [];
  for (const t of tables) {
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const region = (row[0] || '').trim();
      if (region && region !== 'Region' && region !== 'Total') {
        byRegion.push({ region, total: row[1] ?? '', constituencies: row[2] ?? '' });
      }
    }
  }
  return byRegion;
}

function parseMarginalFromTables(tables) {
  const list = [];
  for (const t of tables) {
    const header = (t.rows[0] || []).map((c) => c.toLowerCase()).join(' ');
    if (!header.includes('constituency') && !header.includes('vote')) continue;
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const constituency = (row[0] || '').trim();
      const voteDiff = (row[1] || row[2] || '').trim();
      if (!constituency || constituency === '---') continue;
      list.push({ constituency, vote_difference: voteDiff });
    }
  }
  return list;
}

function parseRegionalComparisonFromTables(tables) {
  const regionNames = new Set(REGION_ORDER);
  const rows = [];
  for (const t of tables) {
    for (const row of t.rows) {
      const first = (row[0] || '').trim();
      if (!regionNames.has(first)) continue;
      const nums = row.slice(1).map((c) => parseFloat(String(c).replace('%', '').replace('+', '')) || 0);
      if (nums.length >= 4) {
        rows.push({
          region: first,
          ndc_2016: nums[0],
          ndc_2020: nums[1],
          ndc_gl: nums[2],
          npp_2016: nums[3],
          npp_2020: nums[4],
          npp_gl: nums[5],
        });
      }
    }
  }
  return rows;
}

async function main() {
  let browser;
  const output = {
    source: 'www.peacefmonline.com',
    scraped_at: new Date().toISOString(),
    election_year: 2024,
    election_type: 'presidential',
    pages: {},
  };

  try {
    browser = await chromium.launch({ headless: true });
    const page = await browser.newPage();
    await page.setExtraHTTPHeaders({ 'Accept-Language': 'en-GB,en;q=0.9' });
    await page.setViewportSize({ width: 1280, height: 800 });

    for (const { key, url } of PAGES_2024) {
      console.log('Fetching:', url);
      try {
        await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 20000 });
        await new Promise((r) => setTimeout(r, 4000));
        const { tables, mainText } = await extractPageData(page);
        output.pages[key] = { url, tables, mainTextSnippet: mainText.slice(0, 2500) };

        if (key === 'president') {
          let nationalData = parseNationalFromText(mainText);
          if ((!nationalData.npp && !nationalData.ndc) && tables?.length > 0) {
            nationalData = parseNationalFromPresidentTable(tables);
          }
          output.national = {
            national: nationalData,
            regional_votes: parseRegionalVotesFromTables(tables),
            raw_tables: tables,
          };
        } else if (key === 'swing') {
          output.swing = {
            swing_list: [],
            by_region: parseSwingByRegion(tables),
            raw_tables: tables,
          };
        } else if (key === 'marginal') {
          output.marginal = {
            marginal_list: parseMarginalFromTables(tables),
            raw_tables: tables,
          };
        } else if (key === 'region_results') {
          output.regional_analysis = {
            regional_comparison: parseRegionalComparisonFromTables(tables),
            raw_tables: tables,
          };
        } else if (key === 'region_turnout') {
          output.region_turnout = { tables, mainTextSnippet: mainText.slice(0, 1500) };
        } else if (key === 'rejected_ballots') {
          output.rejected_ballots = { tables, mainTextSnippet: mainText.slice(0, 1500) };
        }
      } catch (e) {
        console.warn('Error on', url, e.message);
        output.pages[key] = { url, error: e.message };
      }
    }
  } finally {
    if (browser) await browser.close();
  }

  mkdirSync(dirname(OUT_PATH), { recursive: true });
  writeFileSync(OUT_PATH, JSON.stringify(output, null, 2), 'utf8');
  console.log('Wrote:', OUT_PATH);
  console.log('National NPP:', output.national?.national?.npp ? 'yes' : 'no');
  console.log('National NDC:', output.national?.national?.ndc ? 'yes' : 'no');
  console.log('Regional votes:', output.national?.regional_votes?.length ?? 0);
  console.log('Swing by_region:', output.swing?.by_region?.length ?? 0);
  console.log('Marginal list:', output.marginal?.marginal_list?.length ?? 0);
  console.log('Regional comparison:', output.regional_analysis?.regional_comparison?.length ?? 0);
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
