#!/usr/bin/env python3
"""
Script to update the "Who We Are" page with content from scraped pages.
"""

import json
import re
from pathlib import Path

def load_scraped_data():
    """Load scraped pages data."""
    scraped_file = Path('scraped_content/data/scraped_pages.json')
    with open(scraped_file, 'r', encoding='utf-8') as f:
        return json.load(f)

def get_who_we_are_content(scraped_data):
    """Extract content from who-we-are page."""
    who_we_are = scraped_data.get('https://smart-const.com/who-we-are/', {})
    text_data = who_we_are.get('text', {})
    
    headings = text_data.get('headings', [])
    paragraphs = text_data.get('paragraphs', [])
    images = who_we_are.get('images', [])
    
    # Extract core values
    core_values = []
    for heading in headings:
        if heading.get('level') == 'h4':
            core_values.append(heading.get('text', ''))
    
    return {
        'headings': headings,
        'paragraphs': paragraphs,
        'core_values': core_values,
        'images': images
    }

# Arabic translations for core values
ARABIC_CORE_VALUES = {
    'CustomerSatisfaction': 'رضا العملاء',
    'On-TimeProject Delivery': 'التسليم في الوقت المحدد',
    'QualityDriven Mindset': 'عقلية مدفوعة بالجودة',
    'ProfessionalMethodologies': 'المنهجيات المهنية'
}

def update_about_page(file_path, content_data, is_arabic=False):
    """Update the about page with scraped content."""
    encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
    content = None
    
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding) as f:
                content = f.read()
            break
        except UnicodeDecodeError:
            continue
    
    if content is None:
        print(f"Could not read {file_path}")
        return False
    
    # Update hero image if available
    hero_images = [img for img in content_data['images'] if '2-11' in img.get('local_path', '') or '3-9' in img.get('local_path', '')]
    if hero_images:
        hero_img_path = hero_images[0].get('local_path', '')
        if hero_img_path:
            # Convert to assets path if needed
            if 'scraped_content/images/' in hero_img_path:
                hero_img_path = hero_img_path.replace('scraped_content/images/', 'assets/scraped_images/')
            
            # Update hero image src
            hero_pattern = r'(<img[^>]*src=["\'])([^"\']*)(["\'][^>]*class=["\'][^"\']*["\'][^>]*>)'
            content = re.sub(hero_pattern, rf'\1{hero_img_path}\3', content, count=1)
    
    # Update main description paragraph
    main_desc = content_data['paragraphs'][0] if content_data['paragraphs'] else ''
    if main_desc:
        desc_pattern = r'(<p class="box-max-width__content">)(.*?)(</p>)'
        content = re.sub(desc_pattern, rf'\1{main_desc}\3', content, flags=re.DOTALL)
    
    # Update core values - replace accordion items
    core_values = content_data['core_values']
    if core_values:
        # Find the mission-core-values section - handle duplicate divs
        values_section_pattern = r'(<div class="mission-core-values">\s*<div class="mission-core-values">)(.*?)(</div>\s*</div>\s*</div>\s*</div>\s*</section>)'
        match = re.search(values_section_pattern, content, re.DOTALL)
        
        if not match:
            # Try single div pattern
            values_section_pattern = r'(<div class="mission-core-values">)(.*?)(</div>\s*</div>\s*</div>\s*</div>\s*</section>)'
            match = re.search(values_section_pattern, content, re.DOTALL)
        
        if match:
            # Create new accordion items for core values
            accordion_items = []
            for i, value in enumerate(core_values[:4], 1):  # Limit to 4 values
                if is_arabic:
                    # Use Arabic translation
                    value_display = ARABIC_CORE_VALUES.get(value, value)
                    desc_ar = "شركة المشاريع الذكية للإنشاءات هي شركة إنشاءات مدفوعة بالجودة تأسست من قبل مهندسين مؤهلين تأهيلاً عالياً وذوي خبرة واسعة في جميع التخصصات لتكون مثالاً على الممارسة الهندسية الجيدة."
                    accordion_body = desc_ar
                else:
                    # Format English value
                    value_clean = value.replace('Satisfaction', ' Satisfaction').replace('Delivery', ' Delivery').replace('Mindset', ' Mindset').replace('Methodologies', ' Methodologies')
                    value_clean = value_clean.replace('Customer', 'Customer ').replace('On-Time', 'On-Time ').replace('Quality', 'Quality ').replace('Professional', 'Professional ')
                    value_display = value_clean
                    accordion_body = main_desc if main_desc else 'Our commitment to excellence drives everything we do.'
                
                accordion_item = f'''<div class="accordion-item">
<h6 class="accordion-header faqAccordionHeader" id="flush-heading{i}">
<button aria-controls="flush-collapse{i}" aria-expanded="false" class="accordion-button collapsed" data-bs-target="#flush-collapse{i}" data-bs-toggle="collapse" type="button">
                    {value_display}
                  </button>
</h6>
<div aria-labelledby="flush-heading{i}" class="accordion-collapse collapse" data-bs-parent="#accordionFlushExample" id="flush-collapse{i}">
<div class="accordion-body">
                    {accordion_body}
                  </div>
</div>
</div>'''
                accordion_items.append(accordion_item)
            
            new_values_section = '<div class="mission-core-values">\n' + '\n'.join(accordion_items) + '\n</div>'
            content = re.sub(values_section_pattern, rf'\1{new_values_section}\3', content, flags=re.DOTALL)
    
    # Write back
    try:
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"✓ Updated {file_path}")
        return True
    except Exception as e:
        print(f"Error writing {file_path}: {e}")
        return False

def main():
    """Main function."""
    scraped_data = load_scraped_data()
    content_data = get_who_we_are_content(scraped_data)
    
    print(f"Found {len(content_data['core_values'])} core values")
    print(f"Core values: {content_data['core_values']}")
    
    # Update English page
    update_about_page('about.html', content_data, is_arabic=False)
    
    # Update Arabic page
    update_about_page('about-ar.html', content_data, is_arabic=True)
    
    print("\nDone!")

if __name__ == '__main__':
    main()
