#!/usr/bin/env python3
"""
Script to find unused assets by scanning HTML, CSS, and JS files
"""

import os
import re
from pathlib import Path
from collections import defaultdict

class AssetFinder:
    def __init__(self):
        self.base_dir = Path(".")
        self.assets_dir = self.base_dir / "assets"
        self.css_dir = self.base_dir / "css"
        self.js_dir = self.base_dir / "js"
        
        # Files to scan
        self.html_files = list(self.base_dir.glob("*.html"))
        self.css_files = list(self.css_dir.glob("*.css")) if self.css_dir.exists() else []
        self.js_files = list(self.js_dir.glob("*.js")) if self.js_dir.exists() else []
        
        # Track asset references
        self.referenced_assets = set()
        self.all_assets = set()
        
    def find_asset_references_in_content(self, content, file_path):
        """Find all asset references in file content"""
        # Find src=, href=, url(), background-image, etc.
        patterns = [
            r'src=["\']([^"\']+)["\']',
            r'href=["\']([^"\']+)["\']',
            r'url\(["\']?([^"\']+)["\']?\)',
            r'background-image:\s*url\(["\']?([^"\']+)["\']?\)',
            r'@import\s+["\']([^"\']+)["\']',
        ]
        
        for pattern in patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            for match in matches:
                # Normalize path
                asset_path = match.strip()
                
                # Skip external URLs
                if asset_path.startswith(('http://', 'https://', '//', 'data:')):
                    continue
                
                # Skip anchors and javascript
                if asset_path.startswith(('#', 'javascript:', 'mailto:', 'tel:')):
                    continue
                
                # Resolve relative paths
                if asset_path.startswith('./'):
                    asset_path = asset_path[2:]
                elif asset_path.startswith('/'):
                    asset_path = asset_path[1:]
                
                # Extract filename
                filename = os.path.basename(asset_path)
                if filename:
                    self.referenced_assets.add(filename.lower())
                    
                    # Also check without extension variations
                    base_name = os.path.splitext(filename)[0]
                    self.referenced_assets.add(base_name.lower())
    
    def scan_files(self):
        """Scan all HTML, CSS, and JS files"""
        print("Scanning files for asset references...")
        
        all_files = self.html_files + self.css_files + self.js_files
        
        for file_path in all_files:
            try:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    content = f.read()
                self.find_asset_references_in_content(content, file_path)
            except Exception as e:
                print(f"Error reading {file_path}: {e}")
        
        print(f"Found {len(self.referenced_assets)} referenced asset names")
    
    def find_all_assets(self):
        """Find all asset files"""
        print("Finding all asset files...")
        
        if not self.assets_dir.exists():
            print("Assets directory not found")
            return
        
        # Walk through assets directory
        for root, dirs, files in os.walk(self.assets_dir):
            for file in files:
                file_path = Path(root) / file
                # Get relative path from assets_dir
                rel_path = file_path.relative_to(self.assets_dir)
                self.all_assets.add(str(rel_path))
                # Also add just filename
                self.all_assets.add(file.lower())
                # And base name
                base_name = os.path.splitext(file)[0].lower()
                self.all_assets.add(base_name)
        
        print(f"Found {len(self.all_assets)} total asset files")
    
    def find_unused_assets(self):
        """Find assets that are not referenced"""
        self.scan_files()
        self.find_all_assets()
        
        # Get actual file paths
        actual_files = []
        if self.assets_dir.exists():
            for root, dirs, files in os.walk(self.assets_dir):
                for file in files:
                    file_path = Path(root) / file
                    actual_files.append(file_path)
        
        unused = []
        used = []
        
        for file_path in actual_files:
            filename = file_path.name.lower()
            base_name = os.path.splitext(filename)[0]
            rel_path = str(file_path.relative_to(self.assets_dir)).lower()
            
            # Check if referenced
            is_referenced = (
                filename in self.referenced_assets or
                base_name in self.referenced_assets or
                rel_path in self.referenced_assets or
                any(ref in filename or filename in ref for ref in self.referenced_assets)
            )
            
            if is_referenced:
                used.append(file_path)
            else:
                unused.append(file_path)
        
        return unused, used
    
    def generate_report(self):
        """Generate a report of unused assets"""
        unused, used = self.find_unused_assets()
        
        print(f"\n{'='*60}")
        print(f"Asset Usage Report")
        print(f"{'='*60}")
        print(f"Total assets: {len(unused) + len(used)}")
        print(f"Used assets: {len(used)}")
        print(f"Unused assets: {len(unused)}")
        print(f"\nUnused assets:")
        
        # Group by directory
        unused_by_dir = defaultdict(list)
        for asset in unused:
            rel_path = asset.relative_to(self.assets_dir)
            dir_name = str(rel_path.parent) if rel_path.parent != Path('.') else 'root'
            unused_by_dir[dir_name].append(rel_path.name)
        
        for dir_name, files in sorted(unused_by_dir.items()):
            print(f"\n  {dir_name}/ ({len(files)} files)")
            for file in sorted(files)[:10]:  # Show first 10
                print(f"    - {file}")
            if len(files) > 10:
                print(f"    ... and {len(files) - 10} more")
        
        # Save to file
        report_file = self.base_dir / "unused_assets.txt"
        with open(report_file, 'w') as f:
            f.write("Unused Assets Report\n")
            f.write("="*60 + "\n\n")
            for asset in sorted(unused):
                rel_path = asset.relative_to(self.assets_dir)
                f.write(f"{rel_path}\n")
        
        print(f"\nReport saved to: {report_file}")
        return unused

if __name__ == "__main__":
    finder = AssetFinder()
    unused = finder.generate_report()
