#!/usr/bin/env python3
"""
Script to update Arabic HTML pages with scraped content
"""

import json
import shutil
from pathlib import Path
from bs4 import BeautifulSoup

class ArabicPageUpdater:
    def __init__(self):
        self.base_dir = Path(".")
        self.scraped_data_file = self.base_dir / "scraped_content" / "data" / "scraped_pages.json"
        self.scraped_images_dir = self.base_dir / "scraped_content" / "images"
        self.assets_dir = self.base_dir / "assets"
        self.scraped_data = {}
        
        # Page mapping: scraped URL -> Arabic HTML file
        self.page_mapping = {
            "https://smart-const.com/": "home-ar.html",
            "https://smart-const.com/who-we-are/": "about-ar.html",
            "https://smart-const.com/our-services/construction-services/": "ourService-ar.html",
            "https://smart-const.com/our-projects/": "project-ar.html",
            "https://smart-const.com/contact-us/": "contactUs-ar.html",
        }
        
    def load_scraped_data(self):
        """Load scraped data from JSON"""
        with open(self.scraped_data_file, 'r', encoding='utf-8') as f:
            self.scraped_data = json.load(f)
        print(f"Loaded {len(self.scraped_data)} scraped pages")
    
    def update_home_page_ar(self, soup, page_data):
        """Update Arabic home page content"""
        text_data = page_data.get('text', {})
        
        # Update hero section
        hero_section = soup.find('div', class_='hero-content') or soup.find('section', id='section1')
        if hero_section and text_data.get('headings'):
            h1 = hero_section.find('h1')
            if h1 and text_data['headings']:
                for heading in text_data['headings']:
                    if heading['level'] == 'h1':
                        # Keep Arabic structure but update if needed
                        if not h1.string or len(h1.string.strip()) < 5:
                            h1.string = heading['text']
                        break
        
        # Update about section (section2)
        about_section = soup.find('div', id='section2') or soup.find('section', class_='slide-2')
        if about_section and text_data.get('paragraphs'):
            paragraphs = about_section.find_all('p')
            for i, p_tag in enumerate(paragraphs[:3]):
                if i < len(text_data['paragraphs']):
                    # Only update if current text is placeholder or very short
                    current_text = p_tag.get_text(strip=True)
                    if not current_text or len(current_text) < 20 or 'hello world' in current_text.lower():
                        p_tag.string = text_data['paragraphs'][i]
        
        return soup
    
    def update_about_page_ar(self, soup, page_data):
        """Update Arabic about page content"""
        text_data = page_data.get('text', {})
        
        # Update main heading
        h1 = soup.find('h1')
        if h1 and text_data.get('headings'):
            for heading in text_data['headings']:
                if heading['level'] == 'h1':
                    current_text = h1.get_text(strip=True)
                    if not current_text or len(current_text) < 5:
                        h1.string = heading['text']
                    break
        
        # Update main content paragraphs
        content_section = soup.find('section', id='aboutUsBoxText') or soup.find('div', class_='box-max-width')
        if content_section and text_data.get('paragraphs'):
            paragraphs = content_section.find_all('p')
            for i, p_tag in enumerate(paragraphs):
                if i < len(text_data['paragraphs']):
                    current_text = p_tag.get_text(strip=True)
                    # Update if placeholder or empty
                    if not current_text or len(current_text) < 20:
                        p_tag.string = text_data['paragraphs'][i]
        
        return soup
    
    def update_services_page_ar(self, soup, page_data):
        """Update Arabic services page content"""
        text_data = page_data.get('text', {})
        
        # Update headings
        if text_data.get('headings'):
            h1 = soup.find('h1')
            if h1:
                for heading in text_data['headings']:
                    if heading['level'] == 'h1':
                        current_text = h1.get_text(strip=True)
                        if not current_text or len(current_text) < 5:
                            h1.string = heading['text']
                        break
        
        return soup
    
    def update_images(self, soup, page_data):
        """Update image references in HTML"""
        images = page_data.get('images', [])
        
        # Create mapping from original URL to local path
        url_to_local = {}
        for img in images:
            if 'local_path' in img:
                filename = Path(img['local_path']).name
                url_to_local[img['url']] = f"./assets/scraped_images/{filename}"
        
        # Update img tags
        for img_tag in soup.find_all('img'):
            src = img_tag.get('src', '')
            # Check if this image was scraped
            for original_url, local_path in url_to_local.items():
                if original_url in src or Path(src).name in original_url:
                    img_tag['src'] = local_path
                    if not img_tag.get('alt') and 'alt' in img:
                        img_tag['alt'] = img.get('alt', '')
                    break
        
        return soup
    
    def update_arabic_pages(self):
        """Main function to update Arabic pages"""
        self.load_scraped_data()
        
        # Process each mapped page
        for scraped_url, html_file in self.page_mapping.items():
            if scraped_url not in self.scraped_data:
                print(f"Skipping {html_file} - no scraped data for {scraped_url}")
                continue
            
            html_path = self.base_dir / html_file
            if not html_path.exists():
                print(f"Arabic HTML file not found: {html_file}")
                continue
            
            print(f"\nUpdating {html_file}...")
            
            # Load HTML
            with open(html_path, 'r', encoding='utf-8') as f:
                html_content = f.read()
            
            soup = BeautifulSoup(html_content, 'html.parser')
            page_data = self.scraped_data[scraped_url]
            
            # Update content based on page type
            if 'home' in html_file:
                soup = self.update_home_page_ar(soup, page_data)
            elif 'about' in html_file:
                soup = self.update_about_page_ar(soup, page_data)
            elif 'Service' in html_file:
                soup = self.update_services_page_ar(soup, page_data)
            elif 'project' in html_file:
                # Similar to services
                soup = self.update_services_page_ar(soup, page_data)
            elif 'contact' in html_file:
                # Update contact info
                soup = self.update_services_page_ar(soup, page_data)
            
            # Update images
            soup = self.update_images(soup, page_data)
            
            # Save updated HTML
            with open(html_path, 'w', encoding='utf-8') as f:
                f.write(str(soup))
            
            print(f"Updated {html_file}")

if __name__ == "__main__":
    updater = ArabicPageUpdater()
    updater.update_arabic_pages()
