#!/usr/bin/env python3
"""
Script to update the "Join Us" page with content from scraped pages.
"""

import json
import re
from pathlib import Path

def load_scraped_data():
    """Load scraped pages data."""
    scraped_file = Path('scraped_content/data/scraped_pages.json')
    with open(scraped_file, 'r', encoding='utf-8') as f:
        return json.load(f)

def get_join_content(scraped_data):
    """Extract relevant content for join page."""
    main_page = scraped_data.get('https://smart-const.com', {})
    who_we_are = scraped_data.get('https://smart-const.com/who-we-are/', {})
    
    main_text = main_page.get('text', {})
    who_text = who_we_are.get('text', {})
    
    # Get company description
    company_desc = who_text.get('paragraphs', [])
    if not company_desc:
        company_desc = main_text.get('paragraphs', [])
    
    return {
        'company_description': company_desc[0] if company_desc else '',
        'company_name': 'Smart Projects Construction Co.',
        'company_short': 'SPCC'
    }

def update_join_page(file_path, content_data, is_arabic=False):
    """Update the join page with scraped content."""
    encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
    content = None
    
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding) as f:
                content = f.read()
            break
        except UnicodeDecodeError:
            continue
    
    if content is None:
        print(f"Could not read {file_path}")
        return False
    
    # Update company name references
    if is_arabic:
        # Arabic translations
        content = content.replace('El Seif', 'شركة المشاريع الذكية للإنشاءات')
        content = content.replace('ESEC Engineering Contracting', 'شركة المشاريع الذكية للإنشاءات')
        
        # Update careers section
        careers_pattern = r'(<h3>Careers at El Seif</h3>\s*<p>)(.*?)(</p>)'
        new_careers_text = '''<h3>الوظائف في شركة المشاريع الذكية للإنشاءات</h3>
<p>
              إذا كنت شغوفاً بقطاع البناء، وتريد إحداث فرق، وتتطلع للانضمام إلى فريق ديناميكي،
              نحن نود أن نسمع منك. انضم إلينا في شركة المشاريع الذكية للإنشاءات
              معاً، دعونا نشكل المستقبل.
            </p>'''
        content = re.sub(careers_pattern, new_careers_text, content, flags=re.DOTALL)
        
        # Update internship section
        internship_pattern = r'(<h3>Internship at El Seif</h3>\s*<p>)(.*?)(</p>)'
        new_internship_text = '''<h3>التدريب في شركة المشاريع الذكية للإنشاءات</h3>
<p>
              إذا كنت طالباً أو خريجاً جديداً وتريد اكتساب خبرة عملية في قطاع البناء،
              نحن نقدم برامج تدريبية شاملة. انضم إلينا في شركة المشاريع الذكية للإنشاءات
              وابدأ رحلتك المهنية معنا.
            </p>'''
        content = re.sub(internship_pattern, new_internship_text, content, flags=re.DOTALL)
        
        # Update button text
        content = content.replace('Current Vacancies', 'الوظائف المتاحة')
        content = content.replace('Internship Opportunities', 'فرص التدريب')
    else:
        # English updates
        content = content.replace('El Seif', content_data['company_name'])
        content = content.replace('ESEC Engineering Contracting', content_data['company_name'])
        
        # Update careers section
        careers_pattern = r'(<h3>Careers at El Seif</h3>\s*<p>)(.*?)(</p>)'
        new_careers_text = f'''<h3>Careers at {content_data['company_short']}</h3>
<p>
              If you're passionate about the construction industry, driven to
              make a difference, and eager to be part of a dynamic team, we
              would love to hear from you. Join us at {content_data['company_name']}
              and together, let's shape the future.
            </p>'''
        content = re.sub(careers_pattern, new_careers_text, content, flags=re.DOTALL)
        
        # Update internship section
        internship_pattern = r'(<h3>Internship at El Seif</h3>\s*<p>)(.*?)(</p>)'
        new_internship_text = f'''<h3>Internship at {content_data['company_short']}</h3>
<p>
              If you're a student or recent graduate looking to gain practical experience
              in the construction industry, we offer comprehensive internship programs.
              Join us at {content_data['company_name']} and start your professional journey with us.
            </p>'''
        content = re.sub(internship_pattern, new_internship_text, content, flags=re.DOTALL)
    
    # Write back
    try:
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"✓ Updated {file_path}")
        return True
    except Exception as e:
        print(f"Error writing {file_path}: {e}")
        return False

def main():
    """Main function."""
    scraped_data = load_scraped_data()
    content_data = get_join_content(scraped_data)
    
    print(f"Company: {content_data['company_name']}")
    print(f"Description: {content_data['company_description'][:100]}...")
    
    # Update English page
    update_join_page('join.html', content_data, is_arabic=False)
    
    # Update Arabic page
    update_join_page('join-ar.html', content_data, is_arabic=True)
    
    print("\nDone!")

if __name__ == '__main__':
    main()
