<?php

namespace App\Services;

use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Log;
use App\Models\Election\Election;

/**
 * Scrapes 2020 Ghana Presidential election data from Peace FM Online (ghanaelections.peacefmonline.com).
 * Source: https://ghanaelections.peacefmonline.com/pages/2020/president and subpages.
 */
class PeaceFm2020ScraperService
{
    private const BASE_URL = 'https://ghanaelections.peacefmonline.com';
    private const TIMEOUT = 25;

    public function scrapeAll(): array
    {
        $election = Election::where('year', 2020)->where('type', 'presidential')->first();
        if (!$election) {
            return ['success' => false, 'error' => '2020 presidential election not found. Run seeders first.'];
        }

        $results = [];
        $results['national'] = $this->scrapePresidentPage($election);
        $results['swing'] = $this->scrapeSwingPage($election);
        $results['marginal'] = $this->scrapeMarginalPage($election);
        $results['regional_analysis'] = $this->scrapeRegionalAnalysisPage($election);

        return ['success' => true, 'election_id' => $election->id, 'results' => $results];
    }

    /**
     * Main president page: national totals, regional percentages, regional vote blocks.
     */
    public function scrapePresidentPage(Election $election): array
    {
        $url = self::BASE_URL . '/pages/2020/president';
        $body = $this->fetch($url);
        if (!$body) {
            return ['success' => false, 'error' => 'Failed to fetch president page'];
        }

        $payload = [
            'source_url' => $url,
            'scraped_at' => now()->toIso8601String(),
            'national' => $this->parseNationalSummary($body),
            'regional_percentages' => $this->parseRegionalPercentagesTable($body),
            'regional_votes' => $this->parseRegionalVotesBlocks($body),
            'parliamentary_seats_table' => $this->parseParliamentarySeatsTable($body),
        ];

        $this->upsertScrapedData($election->id, 'national_summary', $payload, $url);
        $this->syncRegionalResultsFromScraped($election, $payload['regional_votes'] ?? []);

        return ['success' => true, 'url' => $url];
    }

    /**
     * Swing constituencies page.
     */
    public function scrapeSwingPage(Election $election): array
    {
        $url = self::BASE_URL . '/pages/2020/president/swing';
        $body = $this->fetch($url);
        if (!$body) {
            return ['success' => false, 'error' => 'Failed to fetch swing page'];
        }

        $payload = [
            'source_url' => $url,
            'scraped_at' => now()->toIso8601String(),
            'by_region' => $this->parseSwingByRegion($body),
            'swing_list' => $this->parseSwingList($body),
        ];

        $this->upsertScrapedData($election->id, 'swing_constituencies', $payload, $url);
        return ['success' => true, 'url' => $url];
    }

    /**
     * Marginal wins page.
     */
    public function scrapeMarginalPage(Election $election): array
    {
        $url = self::BASE_URL . '/pages/2020/president/marginal';
        $body = $this->fetch($url, 30);
        if (!$body) {
            return ['success' => false, 'error' => 'Failed to fetch marginal page (timeout or empty)'];
        }

        $payload = [
            'source_url' => $url,
            'scraped_at' => now()->toIso8601String(),
            'marginal_list' => $this->parseMarginalTable($body),
        ];

        $this->upsertScrapedData($election->id, 'marginal_wins', $payload, $url);
        return ['success' => true, 'url' => $url];
    }

    /**
     * Regional analysis (NDC/NPP 2016 vs 2020 by region).
     */
    public function scrapeRegionalAnalysisPage(Election $election): array
    {
        $url = self::BASE_URL . '/pages/2020/analysis/president';
        $body = $this->fetch($url);
        if (!$body) {
            return ['success' => false, 'error' => 'Failed to fetch regional analysis page'];
        }

        $payload = [
            'source_url' => $url,
            'scraped_at' => now()->toIso8601String(),
            'regional_comparison' => $this->parseRegionalComparisonTable($body),
        ];

        $this->upsertScrapedData($election->id, 'regional_analysis', $payload, $url);
        return ['success' => true, 'url' => $url];
    }

    private function fetch(string $url, int $timeout = self::TIMEOUT): ?string
    {
        try {
            $response = Http::timeout($timeout)
                ->withHeaders(['User-Agent' => 'MewsElectionBot/1.0'])
                ->get($url);
            if ($response->successful()) {
                return $response->body();
            }
            Log::warning('PeaceFm scraper non-200', ['url' => $url, 'status' => $response->status()]);
            return null;
        } catch (\Throwable $e) {
            Log::warning('PeaceFm scraper fetch error', ['url' => $url, 'message' => $e->getMessage()]);
            return null;
        }
    }

    private function parseNationalSummary(string $html): array
    {
        $out = ['npp' => null, 'ndc' => null];
        $text = $this->normalizeText($html);
        // Match "2020: 6,653,737 votes (51.4%)" (markdown ** or HTML stripped)
        if (preg_match_all('/2020:\s*([\d,]+)\s*votes\s*\(([\d.]+)%\)/i', $text, $all)) {
            if (isset($all[1][0])) {
                $out['npp'] = ['votes' => (int) str_replace(',', '', $all[1][0]), 'percentage' => (float) $all[2][0]];
            }
            if (isset($all[1][1])) {
                $out['ndc'] = ['votes' => (int) str_replace(',', '', $all[1][1]), 'percentage' => (float) $all[2][1]];
            }
        }
        return $out;
    }

    private function normalizeText(string $html): string
    {
        $text = strip_tags(preg_replace('/<script[^>]*>[\s\S]*?<\/script>/i', '', $html));
        $text = preg_replace('/\s+/', ' ', $text);
        return $text;
    }

    private function parseRegionalPercentagesTable(string $html): array
    {
        $rows = [];
        $text = $this->normalizeText($html);
        if (preg_match_all('/\|\s*\[([^\]]+)\]\([^)]+\)\s*\|\s*\*\*([\d.]+)%\*\*\s*\|\s*\*\*([\d.]+)%\*\*/', $html, $m, PREG_SET_ORDER)) {
            foreach ($m as $x) {
                $rows[] = ['region' => trim($x[1]), 'npp_pct' => (float) $x[2], 'ndc_pct' => (float) $x[3]];
            }
        }
        $tables = $this->extractHtmlTables($html);
        foreach ($tables as $table) {
            foreach ($table as $row) {
                if (count($row) >= 3 && preg_match('/^(Ahafo|Ashanti|Bono|Central|Eastern|Greater Accra|Northern|Oti|Savannah|Upper East|Upper West|Volta|Western)$/i', trim($row[0]))) {
                    $npp = $this->parsePct($row[1] ?? '');
                    $ndc = $this->parsePct($row[2] ?? '');
                    if ($npp !== null && $ndc !== null) {
                        $rows[] = ['region' => trim($row[0]), 'npp_pct' => $npp, 'ndc_pct' => $ndc];
                    }
                }
            }
        }
        return $rows;
    }

    private function parsePct(string $s): ?float
    {
        return preg_match('/([\d.]+)\s*%?/', trim($s), $m) ? (float) $m[1] : null;
    }

    private function extractHtmlTables(string $html): array
    {
        $out = [];
        $dom = @new \DOMDocument();
        @$dom->loadHTML('<?xml encoding="UTF-8">' . $html, LIBXML_NOERROR);
        $tables = $dom->getElementsByTagName('table');
        foreach ($tables as $table) {
            $rows = [];
            foreach ($table->getElementsByTagName('tr') as $tr) {
                $cells = [];
                foreach ($tr->getElementsByTagName('td') as $td) {
                    $cells[] = trim(preg_replace('/\s+/', ' ', $td->textContent));
                }
                if (!empty($cells)) {
                    $rows[] = $cells;
                }
            }
            if (!empty($rows)) {
                $out[] = $rows;
            }
        }
        return $out;
    }

    private function parseRegionalVotesBlocks(string $html): array
    {
        $regions = [];
        // Match "Results History - X region" then both NPP and NDC rows (order may vary)
        if (preg_match_all('/Results History - ([^|*\n]+) region\s*[\s\S]*?Nana Akufo-AddoNPP\s*\|\s*([\d,]+)\s*\|\s*([\d.]+)%[\s\S]*?John MahamaNDC\s*\|\s*([\d,]+)\s*\|\s*([\d.]+)%/i', $html, $m, PREG_SET_ORDER)) {
            foreach ($m as $x) {
                $regions[] = [
                    'region' => trim(strip_tags($x[1])),
                    'npp_votes' => (int) str_replace(',', '', $x[2]),
                    'npp_pct' => (float) $x[3],
                    'ndc_votes' => (int) str_replace(',', '', $x[4]),
                    'ndc_pct' => (float) $x[5],
                ];
            }
        }
        if (empty($regions) && preg_match_all('/Results History - ([^|*\n]+) region\s*[\s\S]*?John MahamaNDC\s*\|\s*([\d,]+)\s*\|\s*([\d.]+)%[\s\S]*?Nana Akufo-AddoNPP\s*\|\s*([\d,]+)\s*\|\s*([\d.]+)%/i', $html, $m2, PREG_SET_ORDER)) {
            foreach ($m2 as $x) {
                $regions[] = [
                    'region' => trim(strip_tags($x[1])),
                    'npp_votes' => (int) str_replace(',', '', $x[4]),
                    'npp_pct' => (float) $x[5],
                    'ndc_votes' => (int) str_replace(',', '', $x[2]),
                    'ndc_pct' => (float) $x[3],
                ];
            }
        }
        return $regions;
    }

    private function parseParliamentarySeatsTable(string $html): array
    {
        $rows = [];
        if (preg_match_all('/\|\s*(Nana Akufo-Addo|John Mahama)\s*\|\s*\*\*(\d+)\*\*\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(-?\d+)\s*\|/', $html, $m, PREG_SET_ORDER)) {
            foreach ($m as $x) {
                $rows[] = [
                    'candidate' => trim($x[1]),
                    'seats' => (int) $x[2],
                    'gains' => (int) $x[3],
                    'loss' => (int) $x[4],
                    'net' => (int) $x[5],
                ];
            }
        }
        return $rows;
    }

    private function parseSwingByRegion(string $html): array
    {
        $rows = [];
        if (preg_match_all('/\|\s*([^|]+)\s*\|\s*\*\*(\d+)\*\*\s*\|\s*([^|]*)\s*\|/', $html, $m, PREG_SET_ORDER)) {
            $header = true;
            foreach ($m as $x) {
                $region = trim($x[1]);
                if ($region === 'Region' || $region === 'Total') {
                    continue;
                }
                if ($region === '') {
                    continue;
                }
                $rows[] = ['region' => $region, 'total' => (int) $x[2], 'constituencies' => trim($x[3])];
            }
        }
        return array_slice($rows, 0, 20);
    }

    private function parseSwingList(string $html): array
    {
        $list = [];
        // | 1 | [Ablekuma Central](...) \- (Greater Accra) | 52.38% | NPP | | NDC | 50.96% |
        if (preg_match_all('/\|\s*(\d+)\s*\|\s*\[([^\]]+)\]\([^)]+\)\s*\\\\?\-?\s*\(([^)]+)\)\s*\|\s*([\d.]+)%\s*\|\s*(NPP|NDC)\s*\|[^|]*\|\s*(NPP|NDC)\s*\|\s*([\d.]+)%\s*\|/', $html, $m, PREG_SET_ORDER)) {
            foreach ($m as $x) {
                $list[] = [
                    'no' => (int) $x[1],
                    'constituency' => trim($x[2]),
                    'region' => trim($x[3]),
                    '2016_majority_pct' => (float) $x[4],
                    '2016_party' => trim($x[5]),
                    '2020_party' => trim($x[7]),
                    '2020_majority_pct' => (float) $x[8],
                ];
            }
        }
        return $list;
    }

    private function parseMarginalTable(string $html): array
    {
        $list = [];
        if (preg_match_all('/\|\s*([^|]+)\s*\|\s*([^|]+)\s*\|/', $html, $m, PREG_SET_ORDER)) {
            $skip = true;
            foreach ($m as $x) {
                $const = trim($x[1]);
                $diff = trim($x[2]);
                if (stripos($const, 'Constituency') !== false && stripos($diff, 'Vote') !== false) {
                    $skip = false;
                    continue;
                }
                if ($skip || $const === '' || $const === '---') {
                    continue;
                }
                $list[] = ['constituency' => $const, 'vote_difference' => $diff];
            }
        }
        return $list;
    }

    private function parseRegionalComparisonTable(string $html): array
    {
        $rows = [];
        // | Ahafo | 44% | 44% | 0.2% | 55% | 55% | 0.0% |
        if (preg_match_all('/\|\s*(Ahafo|Ashanti|Bono|Bono East|Central|Eastern|Greater Accra|North East|Northern|Oti|Savannah|Upper East|Upper West|Volta|Western|Western North)\s*\|\s*([\d.]+)%\s*\|\s*([\d.]+)%\s*\|\s*([-\d.]+)%\s*\|\s*([\d.]+)%\s*\|\s*([\d.]+)%\s*\|\s*([-\d.]+)%\s*\|/', $html, $m, PREG_SET_ORDER)) {
            foreach ($m as $x) {
                $rows[] = [
                    'region' => trim($x[1]),
                    'ndc_2016' => (float) $x[2],
                    'ndc_2020' => (float) $x[3],
                    'ndc_gl' => (float) $x[4],
                    'npp_2016' => (float) $x[5],
                    'npp_2020' => (float) $x[6],
                    'npp_gl' => (float) $x[7],
                ];
            }
        }
        return $rows;
    }

    private function upsertScrapedData(int $electionId, string $dataType, array $payload, string $sourceUrl): void
    {
        DB::table('election_scraped_data')->updateOrInsert(
            ['election_id' => $electionId, 'data_type' => $dataType],
            [
                'payload' => json_encode($payload),
                'source_url' => $sourceUrl,
                'scraped_at' => now(),
                'updated_at' => now(),
            ]
        );
    }

    private function syncRegionalResultsFromScraped(Election $election, array $regionalVotes): void
    {
        if (empty($regionalVotes)) {
            return;
        }
        $nppId = DB::table('parties')->where('short_name', 'NPP')->value('id');
        $ndcId = DB::table('parties')->where('short_name', 'NDC')->value('id');
        $nppCandidateId = DB::table('candidates')->where('name', 'Nana Akufo-Addo')->value('id');
        $ndcCandidateId = DB::table('candidates')->where('name', 'John Dramani Mahama')->value('id');
        $regionNameToId = DB::table('regions')->pluck('id', 'name')->toArray();
        $regionAlias = ['Bono East' => 'Bono East', 'Bonoeast' => 'Bono East', 'Western North' => 'Western North', 'Westernnorth' => 'Western North', 'North East' => 'North East', 'Northeast' => 'North East', 'Upper East' => 'Upper East', 'Uppereast' => 'Upper East', 'Upper West' => 'Upper West', 'Upperwest' => 'Upper West', 'Greater Accra' => 'Greater Accra', 'Greateraccra' => 'Greater Accra'];
        foreach ($regionalVotes as $r) {
            $name = $r['region'] ?? '';
            $name = $regionAlias[$name] ?? $name;
            $regionId = $regionNameToId[$name] ?? null;
            if (!$regionId || ($r['npp_votes'] ?? 0) + ($r['ndc_votes'] ?? 0) <= 0) {
                continue;
            }
            $total = (int) (($r['npp_votes'] ?? 0) + ($r['ndc_votes'] ?? 0));
            $nppV = (int) ($r['npp_votes'] ?? 0);
            $ndcV = (int) ($r['ndc_votes'] ?? 0);
            $nppPct = (float) ($r['npp_pct'] ?? ($total ? round(100 * $nppV / $total, 2) : 0));
            $ndcPct = (float) ($r['ndc_pct'] ?? ($total ? round(100 * $ndcV / $total, 2) : 0));
            $winner = $nppV >= $ndcV ? 'NPP' : 'NDC';
            DB::table('election_results_regional')->updateOrInsert(
                ['election_id' => $election->id, 'region_id' => $regionId, 'party_id' => $nppId],
                ['candidate_id' => $nppCandidateId, 'votes' => $nppV, 'percentage' => $nppPct, 'total_votes_in_region' => $total, 'is_region_winner' => $winner === 'NPP', 'updated_at' => now()]
            );
            DB::table('election_results_regional')->updateOrInsert(
                ['election_id' => $election->id, 'region_id' => $regionId, 'party_id' => $ndcId],
                ['candidate_id' => $ndcCandidateId, 'votes' => $ndcV, 'percentage' => $ndcPct, 'total_votes_in_region' => $total, 'is_region_winner' => $winner === 'NDC', 'updated_at' => now()]
            );
        }
    }
}
