#!/usr/bin/env python3
"""
Script to update service detail pages with content and images from scraped pages.
"""

import json
import re
import os
from pathlib import Path

# Load scraped data
def load_scraped_data():
    """Load scraped pages data."""
    try:
        with open('scraped_content/data/scraped_pages.json', 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading scraped data: {e}")
        return {}

# Get images from scraped content
def get_scraped_images():
    """Get list of available images from scraped content."""
    # Check both locations - prefer assets/scraped_images, fallback to scraped_content/images
    images_dir = Path('assets/scraped_images')
    if not images_dir.exists():
        images_dir = Path('scraped_content/images')
        if not images_dir.exists():
            return []
    
    image_files = []
    for ext in ['*.jpg', '*.jpeg', '*.png']:
        image_files.extend(list(images_dir.glob(ext)))
    
    # Filter out PDFs and return relative paths
    image_files = [f for f in image_files if f.suffix.lower() != '.pdf']
    return [str(f.relative_to(Path('.'))) for f in sorted(image_files)]

# Extract content from scraped pages
def extract_service_content(scraped_data):
    """Extract service-related content from scraped pages."""
    service_urls = [
        'https://smart-const.com/construction-services/',
        'https://smart-const.com/our-services/construction-services/'
    ]
    
    content = {
        'title': 'Construction Services',
        'paragraphs': [],
        'headings': [],
        'images': []
    }
    
    for url in service_urls:
        if url in scraped_data:
            page = scraped_data[url]
            text_data = page.get('text', {})
            
            if not content['title'] or content['title'] == 'Construction Services':
                content['title'] = text_data.get('title', 'Construction Services')
            
            # Get paragraphs (skip very short ones)
            paragraphs = [p for p in text_data.get('paragraphs', []) if len(p) > 50]
            content['paragraphs'].extend(paragraphs)
            
            # Get headings
            headings = text_data.get('headings', [])
            content['headings'].extend(headings)
            
            # Get images
            images = page.get('images', [])
            content['images'].extend(images)
    
    # Remove duplicates and limit
    content['paragraphs'] = list(dict.fromkeys(content['paragraphs']))[:10]
    content['headings'] = list(dict.fromkeys([h.get('text', '') for h in content['headings']]))[:10]
    content['images'] = list(dict.fromkeys([img.get('local_path', '') for img in content['images']]))[:20]
    
    return content

# Service content in English
ENGLISH_SERVICE_CONTENT = {
    'main_title': 'Construction Services',
    'intro': 'We specialize in constructing robust and efficient industrial facilities tailored to meet the specific needs of various industries. Our expertise encompasses everything from manufacturing plants and warehouses to distribution centers and industrial complexes.',
    'paragraphs': [
        'Our commercial building projects focus on creating functional and aesthetically pleasing spaces that drive business success. We deliver a wide range of commercial structures, including office buildings, retail centers, shopping malls, and mixed-use developments.',
        'We are dedicated to building high-quality residential spaces that enhance people\'s lives. Our residential projects encompass a diverse range of housing options, from single-family homes and apartments to townhouses and residential complexes.',
        'SPCC is a quality-driven construction company founded by highly qualified and well-experienced engineers in all disciplines to stand up as an example of good engineering practice.',
        'With our comprehensive construction services, we bring together extensive expertise and capabilities to deliver exceptional results for our clients. Our commitment to quality, safety, and timely delivery sets us apart in the industry.'
    ],
    'bullets': [
        'Comprehensive project planning and management',
        'Expert engineering and design services',
        'Quality construction and execution',
        'Timely project delivery',
        'Commitment to safety and compliance'
    ]
}

# Service content in Arabic
ARABIC_SERVICE_CONTENT = {
    'main_title': 'خدمات البناء',
    'intro': 'نتخصص في بناء المرافق الصناعية القوية والفعالة المصممة خصيصاً لتلبية الاحتياجات المحددة لمختلف الصناعات. تشمل خبرتنا كل شيء من مصانع التصنيع والمستودعات إلى مراكز التوزيع والمجمعات الصناعية.',
    'paragraphs': [
        'تركز مشاريع المباني التجارية لدينا على إنشاء مساحات وظيفية وجذابة من الناحية الجمالية تحقق نجاح الأعمال. نقدم مجموعة واسعة من الهياكل التجارية، بما في ذلك المباني المكتبية ومراكز البيع بالتجزئة ومراكز التسوق والتطويرات متعددة الاستخدامات.',
        'نحن ملتزمون ببناء مساحات سكنية عالية الجودة تعزز حياة الناس. تشمل مشاريعنا السكنية مجموعة متنوعة من خيارات السكن، من المنازل الفردية والشقق إلى المنازل المتلاصقة والمجمعات السكنية.',
        'شركة المشاريع الذكية للإنشاءات هي شركة إنشاءات مدفوعة بالجودة تأسست من قبل مهندسين مؤهلين تأهيلاً عالياً وذوي خبرة واسعة في جميع التخصصات لتكون مثالاً على الممارسة الهندسية الجيدة.',
        'من خلال خدماتنا الشاملة للبناء، نجمع بين الخبرات الواسعة والقدرات لتقديم نتائج استثنائية لعملائنا. التزامنا بالجودة والسلامة والتسليم في الوقت المحدد يميزنا في الصناعة.'
    ],
    'bullets': [
        'التخطيط والإدارة الشاملة للمشاريع',
        'خدمات الهندسة والتصميم المتخصصة',
        'البناء والتنفيذ عالي الجودة',
        'تسليم المشاريع في الوقت المحدد',
        'الالتزام بالسلامة والامتثال'
    ]
}


def update_service_detail_page(file_path, is_arabic=False):
    """Update service detail page with scraped content."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return False
    
    # Load scraped data
    scraped_data = load_scraped_data()
    scraped_content = extract_service_content(scraped_data)
    images = get_scraped_images()
    
    # Use appropriate content
    service_content = ARABIC_SERVICE_CONTENT if is_arabic else ENGLISH_SERVICE_CONTENT
    
    # Update hero section title and image
    hero_title_pattern = r'(<h1>)(.*?)(</h1>)'
    hero_img_pattern = r'(<img[^>]*src=")([^"]*)("[^>]*class=""[^>]*alt=""[^>]*>)'
    
    # Update hero title
    content = re.sub(hero_title_pattern, r'\1' + service_content['main_title'] + r'\3', content, count=1)
    
    # Update hero image with scraped image
    if images:
        hero_image = images[0] if images else 'assets/scraped_images/WhatsApp-Image-2022-10-22-at-5.20.12-PM-1-600x300.jpeg'
        content = re.sub(hero_img_pattern, r'\1' + hero_image + r'\3', content, count=1)
    
    # Update breadcrumb
    breadcrumb_pattern = r'(<strong>)(Digital Engineering|الهندسة الرقمية)(</strong>)'
    content = re.sub(breadcrumb_pattern, r'\1' + service_content['main_title'] + r'\3', content)
    
    # Update main description section - replace the entire h4 content including multiline
    desc_section_pattern = r'(<h4>)([\s\S]*?)(</h4>)'
    if re.search(desc_section_pattern, content):
        content = re.sub(desc_section_pattern, r'\1\n            ' + service_content['intro'] + '\n          \3', content, count=1)
    
    # Update paragraphs - handle multiline content
    para_patterns = [
        (r'(<p class="paragraph-1">)([\s\S]*?)(</p>)', service_content['paragraphs'][0] if len(service_content['paragraphs']) > 0 else ''),
        (r'(<p class="paragraph-2">)([\s\S]*?)(</p>)', service_content['paragraphs'][1] if len(service_content['paragraphs']) > 1 else '')
    ]
    
    for pattern, replacement in para_patterns:
        if replacement:
            content = re.sub(pattern, r'\1\n            ' + replacement + '\n          </p>', content, count=1)
    
    # Update image gallery with scraped images
    gallery_figure_pattern = r'(<div class="figure">\s*<img\s+src=")([^"]*)(")'
    # Match thumbnails div and everything inside until closing </div>
    gallery_thumb_pattern = r'(<div class="thumbnails">)(.*?)(</div>\s*</div>\s*</div>)'
    
    if images and len(images) >= 4:
        # Update main figure
        content = re.sub(gallery_figure_pattern, r'\1' + images[0] + r'\3', content, count=1)
        
        # Update thumbnails - replace entire thumbnails section content
        thumbnails_html = '\n            '.join([
            f'<div><img class="{"active" if i == 0 else ""}" src="{img}"/></div>'
            for i, img in enumerate(images[:4])
        ])
        # Find thumbnails section and replace its content
        thumb_match = re.search(gallery_thumb_pattern, content, re.DOTALL)
        if thumb_match:
            content = content[:thumb_match.start(2)] + '\n            ' + thumbnails_html + '\n          ' + content[thumb_match.end(2):]
    
    # Update bullets section
    bullets_pattern = r'(<div class="digital-engineering--bullets">)(.*?)(</div>)'
    bullets_html = '\n          '.join([f'<p>{bullet}</p>' for bullet in service_content['bullets']])
    content = re.sub(bullets_pattern, r'\1\n          ' + bullets_html + '\n        \3', content, count=1, flags=re.DOTALL)
    
    # Update final paragraph
    final_para_pattern = r'(<p class="digital-engineering-text">)(.*?)(</p>)'
    final_text = service_content['paragraphs'][-1] if len(service_content['paragraphs']) > 0 else ''
    if final_text:
        content = re.sub(final_para_pattern, r'\1\n          ' + final_text + '\n        \3', content, count=1, flags=re.DOTALL)
    
    # Update service boxes section - replace with services from scraped content
    services_section_pattern = r'(<div class="container digital-engineering-ourServices">)(.*?)(</div>\s*</section>)'
    
    # Get services list
    services = [
        "Design and Build", "Infrastructure Projects", "Steel Building Projects",
        "Civil Works", "Electrical Projects", "Mechanical Projects",
        "Process Engineering & Execution", "Plant Machinery Engineering & Installation",
        "Special Metal Fabrication", "Project Management"
    ]
    
    arabic_services = [
        "التصميم والبناء", "مشاريع البنية التحتية", "مشاريع المباني الفولاذية",
        "الأعمال المدنية", "المشاريع الكهربائية", "المشاريع الميكانيكية",
        "هندسة العمليات والتنفيذ", "هندسة وتثبيت آلات المصانع",
        "تصنيع المعادن الخاصة", "إدارة المشاريع"
    ]
    
    service_list = arabic_services if is_arabic else services
    
    # Create service boxes HTML
    service_boxes = []
    for i, service in enumerate(service_list[:6]):  # Show first 6 services
        img_idx = min(i, len(images) - 1) if images else 0
        img_src = images[img_idx] if images else 'assets/scraped_images/WhatsApp-Image-2022-10-22-at-5.20.12-PM-1-600x300.jpeg'
        
        service_box = f'''<div class="service-box">
            <img src="{img_src}"/>
            <div class="service-box-content">
              <div class="service-box--img">
                <img src="./assets/icon/business-and-trade copy 3.png"/>
              </div>
              <p>{service}</p>
            </div>
            <div class="hoveredContent">
              <div class="d-flex align-items-center hoveredTitle">
                <div class="hoveredIcon">
                  <img src="./assets/icon/business-and-trade copy 3.png"/>
                </div>
                <p>{service}</p>
              </div>
              <div class="hoveredContent--bottom">
                <p>
                  {service_content['paragraphs'][i % len(service_content['paragraphs'])] if service_content['paragraphs'] else ''}
                </p>
                <a href="./ourServicedetail{"-ar" if is_arabic else ""}.html">
                  <button>
                    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="#4A90E2" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-arrow-right-circle">
                      <circle cx="12" cy="12" r="10" />
                      <path d="M8 12h8" />
                      <path d="m12 16 4-4-4-4" /></svg><span> {"اقرأ المزيد" if is_arabic else "Read more"} </span>
                  </button>
                </a>
              </div>
            </div>
            <div class="bg-overlay-black"></div>
          </div>'''
        service_boxes.append(service_box)
    
    services_html = '\n\n          '.join(service_boxes)
    content = re.sub(services_section_pattern, r'\1\n          ' + services_html + '\n        \3', content, count=1, flags=re.DOTALL)
    
    # Write back
    try:
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"✓ Updated {file_path}")
        return True
    except Exception as e:
        print(f"Error writing {file_path}: {e}")
        return False


def main():
    """Main function."""
    base_dir = Path('/media/tpct/main/shiftcodes/smc01sc2025.shiftcodes.net')
    
    # Update English service detail page
    english_file = base_dir / 'ourServicedetail.html'
    update_service_detail_page(str(english_file), is_arabic=False)
    
    # Update Arabic service detail page
    arabic_file = base_dir / 'ourServicedetail-ar.html'
    update_service_detail_page(str(arabic_file), is_arabic=True)
    
    print("\nDone!")


if __name__ == '__main__':
    main()
