#!/usr/bin/env python3
"""
Script to update HTML text content from scraped data
Only updates text content, preserves HTML structure and CSS
"""

import json
from pathlib import Path
from bs4 import BeautifulSoup

class TextContentUpdater:
    def __init__(self):
        self.base_dir = Path(".")
        self.scraped_data_file = self.base_dir / "scraped_content" / "data" / "scraped_pages.json"
        self.scraped_data = {}
        
        # Page mapping: scraped URL -> HTML files (English and Arabic)
        self.page_mapping = {
            "https://smart-const.com/": {
                "en": "home.html",
                "ar": "home-ar.html"
            },
            "https://smart-const.com/who-we-are/": {
                "en": "about.html",
                "ar": "about-ar.html"
            },
            "https://smart-const.com/our-services/construction-services/": {
                "en": "ourService.html",
                "ar": "ourService-ar.html"
            },
            "https://smart-const.com/our-projects/": {
                "en": "project.html",
                "ar": "project-ar.html"
            },
            "https://smart-const.com/contact-us/": {
                "en": "contactUs.html",
                "ar": "contactUs-ar.html"
            },
        }
        
    def load_scraped_data(self):
        """Load scraped data from JSON"""
        with open(self.scraped_data_file, 'r', encoding='utf-8') as f:
            self.scraped_data = json.load(f)
        print(f"Loaded {len(self.scraped_data)} scraped pages")
    
    def update_menu_items(self, soup, page_data):
        """Update menu items from scraped content"""
        text_data = page_data.get('text', {})
        
        # Extract menu items from lists
        menu_items = []
        if text_data.get('lists'):
            for lst in text_data['lists']:
                if isinstance(lst, list) and len(lst) > 0:
                    # Look for menu-like lists (usually first list contains navigation)
                    if any(item in str(lst[0]).lower() for item in ['home', 'about', 'services', 'projects', 'contact']):
                        menu_items = lst
                        break
        
        # Update navigation menu items
        nav_links = soup.find_all('a', href=True)
        menu_texts = ['Home', 'About Us', 'Services', 'Projects', 'Latest News', 'Join Us', 'Contact Us']
        
        for link in nav_links:
            link_text = link.get_text(strip=True)
            href = link.get('href', '')
            
            # Update menu text if it matches common patterns
            if link_text in menu_texts:
                # Try to find corresponding text from scraped data
                if 'home' in href.lower() or link_text.lower() == 'home':
                    if menu_items and 'Home' in str(menu_items):
                        continue  # Already correct
                elif 'about' in href.lower() or link_text.lower() == 'about us':
                    if menu_items and 'Who We Are' in str(menu_items):
                        link.string = 'Who We Are'
                elif 'service' in href.lower() or link_text.lower() == 'services':
                    if menu_items and 'Services' in str(menu_items):
                        continue  # Already correct
                elif 'project' in href.lower() or link_text.lower() == 'projects':
                    if menu_items and 'Projects' in str(menu_items):
                        continue  # Already correct
                elif 'contact' in href.lower() or link_text.lower() == 'contact us':
                    if menu_items and 'Contact' in str(menu_items):
                        continue  # Already correct
        
        return soup
    
    def update_headings(self, soup, page_data):
        """Update headings from scraped content"""
        text_data = page_data.get('text', {})
        headings = text_data.get('headings', [])
        
        if not headings:
            return soup
        
        # Update h1 tags
        h1_tags = soup.find_all('h1')
        h1_headings = [h for h in headings if h['level'] == 'h1']
        for i, h1_tag in enumerate(h1_tags):
            if i < len(h1_headings):
                # Only update if current text is placeholder or very short
                current_text = h1_tag.get_text(strip=True)
                if not current_text or len(current_text) < 10 or 'hello world' in current_text.lower():
                    h1_tag.string = h1_headings[i]['text']
        
        # Update h2 tags
        h2_tags = soup.find_all('h2')
        h2_headings = [h for h in headings if h['level'] == 'h2']
        for i, h2_tag in enumerate(h2_tags):
            if i < len(h2_headings):
                current_text = h2_tag.get_text(strip=True)
                if not current_text or len(current_text) < 10:
                    h2_tag.string = h2_headings[i]['text']
        
        # Update h3 tags
        h3_tags = soup.find_all('h3')
        h3_headings = [h for h in headings if h['level'] == 'h3']
        for i, h3_tag in enumerate(h3_tags):
            if i < len(h3_headings):
                current_text = h3_tag.get_text(strip=True)
                if not current_text or len(current_text) < 10:
                    h3_tag.string = h3_headings[i]['text']
        
        return soup
    
    def update_paragraphs(self, soup, page_data):
        """Update paragraphs from scraped content"""
        text_data = page_data.get('text', {})
        paragraphs = text_data.get('paragraphs', [])
        
        if not paragraphs:
            return soup
        
        # Update paragraphs in main content areas
        # Find main content sections
        content_sections = [
            soup.find('div', class_='slide-2__content'),
            soup.find('section', id='aboutUsBoxText'),
            soup.find('div', class_='box-max-width'),
            soup.find('main'),
            soup.find('div', class_='container'),
        ]
        
        for section in content_sections:
            if not section:
                continue
            
            p_tags = section.find_all('p')
            for i, p_tag in enumerate(p_tags):
                if i < len(paragraphs):
                    current_text = p_tag.get_text(strip=True)
                    # Only update if placeholder or very short
                    if (not current_text or 
                        len(current_text) < 20 or 
                        'hello world' in current_text.lower() or
                        'lorem ipsum' in current_text.lower()):
                        # Clear existing content and set new text
                        p_tag.clear()
                        p_tag.string = paragraphs[i]
        
        return soup
    
    def update_home_page_content(self, soup, page_data):
        """Update home page specific content"""
        text_data = page_data.get('text', {})
        
        # Update main hero heading
        hero_h1 = soup.find('div', class_='slide-2__content')
        if hero_h1:
            h1 = hero_h1.find('h1')
            if h1:
                headings = text_data.get('headings', [])
                h1_headings = [h for h in headings if h['level'] == 'h1']
                if h1_headings:
                    current_text = h1.get_text(strip=True)
                    if not current_text or len(current_text) < 10:
                        h1.string = h1_headings[0]['text']
        
        # Update subtitle/paragraph
        subtitle_p = soup.find('p', class_='slide-2__paragraph')
        if subtitle_p:
            headings = text_data.get('headings', [])
            if headings:
                current_text = subtitle_p.get_text(strip=True)
                if not current_text or len(current_text) < 10:
                    subtitle_p.string = headings[0]['text'] if headings else ''
        
        # Update main paragraph
        main_p = soup.find('p', class_='slide-2__paragraph--2')
        if main_p:
            paragraphs = text_data.get('paragraphs', [])
            if paragraphs:
                current_text = main_p.get_text(strip=True)
                if not current_text or len(current_text) < 20:
                    main_p.string = paragraphs[0]
        
        # Update section headings
        section_h3 = soup.find('div', class_='homeSlider--slide-3__ourProject')
        if section_h3:
            h3 = section_h3.find('h3')
            if h3:
                headings = text_data.get('headings', [])
                h3_headings = [h for h in headings if h['level'] == 'h3']
                if h3_headings:
                    current_text = h3.get_text(strip=True)
                    if not current_text or len(current_text) < 5:
                        h3.string = h3_headings[0]['text']
        
        return soup
    
    def update_about_page_content(self, soup, page_data):
        """Update about page specific content"""
        text_data = page_data.get('text', {})
        
        # Update main heading
        main_h3 = soup.find('h3', class_='box-max-width__heading')
        if main_h3:
            headings = text_data.get('headings', [])
            h3_headings = [h for h in headings if h['level'] == 'h3']
            if h3_headings:
                current_text = main_h3.get_text(strip=True)
                if not current_text or len(current_text) < 10:
                    main_h3.string = h3_headings[0]['text']
        
        # Update subheading
        subheading = soup.find('p', class_='box-max-width__Sub-heading')
        if subheading:
            headings = text_data.get('headings', [])
            if headings:
                current_text = subheading.get_text(strip=True)
                if not current_text or len(current_text) < 10:
                    subheading.string = headings[1]['text'] if len(headings) > 1 else headings[0]['text']
        
        # Update content paragraphs
        content_p = soup.find('p', class_='box-max-width__content')
        if content_p:
            paragraphs = text_data.get('paragraphs', [])
            if paragraphs:
                current_text = content_p.get_text(strip=True)
                if not current_text or len(current_text) < 20:
                    content_p.string = paragraphs[0] if paragraphs else ''
        
        return soup
    
    def update_page(self, html_file, page_data, is_arabic=False):
        """Update a single HTML file"""
        html_path = self.base_dir / html_file
        if not html_path.exists():
            print(f"File not found: {html_file}")
            return False
        
        print(f"\nUpdating {html_file}...")
        
        # Load HTML
        with open(html_path, 'r', encoding='utf-8') as f:
            html_content = f.read()
        
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Update content based on page type
        if 'home' in html_file:
            soup = self.update_home_page_content(soup, page_data)
        elif 'about' in html_file:
            soup = self.update_about_page_content(soup, page_data)
        
        # Update common elements
        soup = self.update_headings(soup, page_data)
        soup = self.update_paragraphs(soup, page_data)
        soup = self.update_menu_items(soup, page_data)
        
        # Save updated HTML
        with open(html_path, 'w', encoding='utf-8') as f:
            f.write(str(soup))
        
        print(f"✓ Updated {html_file}")
        return True
    
    def update_all_pages(self):
        """Update all pages with scraped content"""
        self.load_scraped_data()
        
        updated_count = 0
        for scraped_url, files in self.page_mapping.items():
            if scraped_url not in self.scraped_data:
                print(f"Skipping - no data for {scraped_url}")
                continue
            
            page_data = self.scraped_data[scraped_url]
            
            # Update English version
            if 'en' in files:
                if self.update_page(files['en'], page_data, is_arabic=False):
                    updated_count += 1
            
            # Update Arabic version
            if 'ar' in files:
                if self.update_page(files['ar'], page_data, is_arabic=True):
                    updated_count += 1
        
        print(f"\n{'='*60}")
        print(f"Update complete! Updated {updated_count} files")
        print(f"{'='*60}")

if __name__ == "__main__":
    updater = TextContentUpdater()
    updater.update_all_pages()