#!/usr/bin/env python3
"""
Comprehensive script to update ALL text content from scraped data
Updates menus, headers, paragraphs, buttons, etc. while preserving HTML structure
"""

import json
from pathlib import Path
from bs4 import BeautifulSoup

class ComprehensiveContentUpdater:
    def __init__(self):
        self.base_dir = Path(".")
        self.scraped_data_file = self.base_dir / "scraped_content" / "data" / "scraped_pages.json"
        self.scraped_data = {}
        
        self.page_mapping = {
            "https://smart-const.com/": {
                "en": "home.html",
                "ar": "home-ar.html"
            },
            "https://smart-const.com/who-we-are/": {
                "en": "about.html",
                "ar": "about-ar.html"
            },
            "https://smart-const.com/our-services/construction-services/": {
                "en": "ourService.html",
                "ar": "ourService-ar.html"
            },
            "https://smart-const.com/our-projects/": {
                "en": "project.html",
                "ar": "project-ar.html"
            },
            "https://smart-const.com/contact-us/": {
                "en": "contactUs.html",
                "ar": "contactUs-ar.html"
            },
        }
        
    def load_scraped_data(self):
        """Load scraped data"""
        with open(self.scraped_data_file, 'r', encoding='utf-8') as f:
            self.scraped_data = json.load(f)
        print(f"Loaded {len(self.scraped_data)} scraped pages")
    
    def update_home_page(self, soup, page_data):
        """Update home page content"""
        text_data = page_data.get('text', {})
        headings = text_data.get('headings', [])
        paragraphs = text_data.get('paragraphs', [])
        
        # Update main hero heading (h1 in slide-2__content)
        hero_section = soup.find('div', class_='slide-2__content')
        if hero_section:
            h1 = hero_section.find('h1')
            if h1:
                h2_headings = [h for h in headings if h['level'] == 'h2']
                if h2_headings:
                    h1.string = h2_headings[0]['text']  # "Building Great Projects Since 2008"
            
            # Update subtitle paragraph
            subtitle = hero_section.find('p', class_='slide-2__paragraph')
            if subtitle:
                h2_headings = [h for h in headings if h['level'] == 'h2']
                if h2_headings and len(h2_headings) > 0:
                    subtitle.string = h2_headings[0]['text']
            
            # Update main description paragraph
            main_desc = hero_section.find('p', class_='slide-2__paragraph--2')
            if main_desc and paragraphs:
                main_desc.string = paragraphs[0] if len(paragraphs) > 0 else ''
        
        # Update "Our Services" section heading
        services_section = soup.find('div', class_='homeSlider--slide-3__ourProject')
        if services_section:
            h3 = services_section.find('h3')
            if h3:
                h2_headings = [h for h in headings if h['level'] == 'h2']
                if h2_headings and len(h2_headings) > 1:
                    h3.string = h2_headings[1]['text']  # "Our Expertise"
            
            p_tag = services_section.find('p')
            if p_tag:
                h2_headings = [h for h in headings if h['level'] == 'h2']
                if h2_headings and len(h2_headings) > 1:
                    p_tag.string = h2_headings[1]['text']
        
        # Update "Our Projects" section
        projects_section = soup.find('section', class_='aboutUsProjects')
        if projects_section:
            header = projects_section.find('div', class_='aboutUsProjects--header')
            if header:
                h3 = header.find('h3')
                if h3:
                    h2_headings = [h for h in headings if h['level'] == 'h2']
                    if h2_headings and len(h2_headings) > 2:
                        h3.string = h2_headings[2]['text']  # "Ongoing Projects"
                
                h5 = header.find('h5')
                if h5:
                    h2_headings = [h for h in headings if h['level'] == 'h2']
                    if h2_headings and len(h2_headings) > 2:
                        h5.string = h2_headings[2]['text']
        
        # Update "Latest News" section
        news_section = soup.find('div', class_='slide-5__news')
        if news_section:
            h3 = news_section.find('h3')
            if h3:
                h3.string = "Latest News"  # Keep as is or update from scraped
        
        return soup
    
    def update_about_page(self, soup, page_data):
        """Update about page content"""
        text_data = page_data.get('text', {})
        headings = text_data.get('headings', [])
        paragraphs = text_data.get('paragraphs', [])
        
        # Update main heading
        main_h3 = soup.find('h3', class_='box-max-width__heading')
        if main_h3:
            h1_headings = [h for h in headings if h['level'] == 'h1']
            if h1_headings:
                main_h3.string = h1_headings[0]['text']
            elif headings:
                main_h3.string = headings[0]['text']
        
        # Update subheading
        subheading = soup.find('p', class_='box-max-width__Sub-heading')
        if subheading and headings:
            if len(headings) > 1:
                subheading.string = headings[1]['text']
            else:
                subheading.string = headings[0]['text']
        
        # Update content paragraph
        content_p = soup.find('p', class_='box-max-width__content')
        if content_p and paragraphs:
            content_p.string = paragraphs[0] if len(paragraphs) > 0 else ''
        
        # Update mission section
        mission_section = soup.find('div', class_='mission-values-container__flex')
        if mission_section:
            mission_h3 = mission_section.find('h3', string=lambda x: x and 'mission' in x.lower())
            if not mission_h3:
                mission_h3 = mission_section.find('h3')
            if mission_h3 and headings:
                h3_headings = [h for h in headings if h['level'] == 'h3']
                if h3_headings:
                    mission_h3.string = h3_headings[0]['text']
        
        return soup
    
    def update_menu_navigation(self, soup, page_data):
        """Update navigation menu items"""
        text_data = page_data.get('text', {})
        lists = text_data.get('lists', [])
        
        # Find menu navigation
        nav = soup.find('nav', id='cssmenu') or soup.find('nav', class_='head_btm_menu')
        if nav:
            # Update menu links
            menu_links = nav.find_all('a', href=True)
            for link in menu_links:
                href = link.get('href', '').lower()
                link_text = link.get_text(strip=True)
                
                # Map based on href
                if 'home' in href or link_text.lower() == 'home':
                    link.string = 'Home'
                elif 'about' in href:
                    if lists and len(lists) > 0:
                        # Look for "Who We Are" in lists
                        for lst in lists:
                            if isinstance(lst, list) and 'Who We Are' in str(lst):
                                link.string = 'Who We Are'
                                break
                elif 'service' in href:
                    link.string = 'Services'
                elif 'project' in href:
                    link.string = 'Projects'
                elif 'contact' in href:
                    link.string = 'Contact Us'
        
        return soup
    
    def update_all_headings(self, soup, page_data):
        """Update all headings systematically"""
        text_data = page_data.get('text', {})
        headings = text_data.get('headings', [])
        
        if not headings:
            return soup
        
        # Group headings by level
        h1_list = [h for h in headings if h['level'] == 'h1']
        h2_list = [h for h in headings if h['level'] == 'h2']
        h3_list = [h for h in headings if h['level'] == 'h3']
        
        # Update h1 tags (skip title tag)
        h1_tags = [h for h in soup.find_all('h1') if h.name == 'h1' and h.parent.name != 'title']
        for i, h1_tag in enumerate(h1_tags):
            if i < len(h1_list):
                current = h1_tag.get_text(strip=True)
                if not current or len(current) < 5 or 'hello' in current.lower():
                    h1_tag.string = h1_list[i]['text']
        
        # Update h2 tags
        h2_tags = soup.find_all('h2')
        for i, h2_tag in enumerate(h2_tags):
            if i < len(h2_list):
                current = h2_tag.get_text(strip=True)
                if not current or len(current) < 5:
                    h2_tag.string = h2_list[i]['text']
        
        # Update h3 tags
        h3_tags = soup.find_all('h3')
        for i, h3_tag in enumerate(h3_tags):
            if i < len(h3_list):
                current = h3_tag.get_text(strip=True)
                if not current or len(current) < 5:
                    h3_tag.string = h3_list[i]['text']
        
        return soup
    
    def update_all_paragraphs(self, soup, page_data):
        """Update paragraphs in main content areas"""
        text_data = page_data.get('text', {})
        paragraphs = text_data.get('paragraphs', [])
        
        if not paragraphs:
            return soup
        
        # Find main content areas and update paragraphs
        content_areas = [
            soup.find('div', class_='slide-2__content'),
            soup.find('section', id='aboutUsBoxText'),
            soup.find('div', class_='box-max-width'),
            soup.find('main'),
        ]
        
        for area in content_areas:
            if not area:
                continue
            
            p_tags = area.find_all('p')
            for i, p_tag in enumerate(p_tags):
                if i < len(paragraphs):
                    current = p_tag.get_text(strip=True)
                    # Update if placeholder or very short
                    if (not current or 
                        len(current) < 20 or 
                        'hello world' in current.lower() or
                        'lorem' in current.lower()):
                        p_tag.clear()
                        p_tag.string = paragraphs[i]
        
        return soup
    
    def update_page(self, html_file, page_data, is_arabic=False):
        """Update a single HTML file"""
        html_path = self.base_dir / html_file
        if not html_path.exists():
            print(f"File not found: {html_file}")
            return False
        
        print(f"Updating {html_file}...")
        
        with open(html_path, 'r', encoding='utf-8') as f:
            html_content = f.read()
        
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Update based on page type
        if 'home' in html_file:
            soup = self.update_home_page(soup, page_data)
        elif 'about' in html_file:
            soup = self.update_about_page(soup, page_data)
        
        # Update common elements
        soup = self.update_all_headings(soup, page_data)
        soup = self.update_all_paragraphs(soup, page_data)
        soup = self.update_menu_navigation(soup, page_data)
        
        # Save
        with open(html_path, 'w', encoding='utf-8') as f:
            f.write(str(soup))
        
        print(f"✓ Updated {html_file}")
        return True
    
    def update_all(self):
        """Update all pages"""
        self.load_scraped_data()
        
        updated = 0
        for scraped_url, files in self.page_mapping.items():
            if scraped_url not in self.scraped_data:
                # Try alternative URL
                alt_url = scraped_url.rstrip('/')
                if alt_url not in self.scraped_data:
                    print(f"No data for {scraped_url}")
                    continue
                page_data = self.scraped_data[alt_url]
            else:
                page_data = self.scraped_data[scraped_url]
            
            if 'en' in files:
                if self.update_page(files['en'], page_data, False):
                    updated += 1
            
            if 'ar' in files:
                if self.update_page(files['ar'], page_data, True):
                    updated += 1
        
        print(f"\n{'='*60}")
        print(f"Updated {updated} files")
        print(f"{'='*60}")

if __name__ == "__main__":
    updater = ComprehensiveContentUpdater()
    updater.update_all()
