import re

html_path = r'es\eventos\j-balvin-made-bucaramanga.html'

with open(html_path, 'r', encoding='utf-8') as f:
    content = f.read()

print(f"Original file size: {len(content)} chars")
print(f"SVG placeholders found: {content.count('data:image/svg+xml')}")

# Fix 1: Replace Blazy lazy-loaded images
# Pattern: src="data:image/svg+xml..." ... data-src="/sites/default/files/..."
# Replace the SVG placeholder src with the actual tuboleta.com URL

def fix_blazy_img(match):
    """Replace SVG placeholder src= with actual image URL from data-src="""
    full_tag = match.group(0)
    # Find data-src value
    data_src_match = re.search(r'data-src="(/sites/default/files/[^"]+)"', full_tag)
    if data_src_match:
        actual_path = data_src_match.group(1)
        actual_url = f"https://tuboleta.com{actual_path}"
        # Replace the SVG placeholder src with real URL
        fixed = re.sub(
            r'src="data:image/svg\+xml;charset=utf-8,[^"]*"',
            f'src="{actual_url}"',
            full_tag
        )
        return fixed
    return full_tag

# Match entire <img> tags that have the SVG placeholder
content = re.sub(
    r'<img[^>]*src="data:image/svg\+xml;charset=utf-8,[^"]*"[^>]*>',
    fix_blazy_img,
    content
)

print(f"SVG placeholders remaining: {content.count('data:image/svg+xml')}")

# Fix 2: Fix <source> tags that use about:blank with data-srcset
# Pattern: srcset="about:blank" ... data-srcset="/sites/default/files/..."
def fix_blazy_source(match):
    """Replace about:blank srcset= with actual srcset from data-srcset="""
    full_tag = match.group(0)
    # Find data-srcset value
    data_srcset_match = re.search(r'data-srcset="(/sites/default/files/[^"]+)"', full_tag)
    if data_srcset_match:
        actual_path = data_srcset_match.group(1)
        # Handle the 1x suffix
        if not actual_path.endswith(' 1x'):
            actual_url = f"https://tuboleta.com{actual_path}"
        else:
            path_only = actual_path.rsplit(' 1x', 1)[0]
            actual_url = f"https://tuboleta.com{path_only} 1x"
        # Replace about:blank srcset with real URL
        fixed = re.sub(
            r'srcset="about:blank"',
            f'srcset="{actual_url}"',
            full_tag
        )
        return fixed
    return full_tag

# Match entire <source> tags that have about:blank
content = re.sub(
    r'<source[^>]*srcset="about:blank"[^>]*>',
    fix_blazy_source,
    content
)

print(f"'about:blank' remaining: {content.count('about:blank')}")

# Fix 3: Remove is-b-loading class to prevent blazy from re-adding placeholders
content = content.replace(' is-b-loading', '')

# Fix 4: Add a script to disable Blazy lazy loading and just show images directly
# Insert before </head>
disable_blazy_script = '''
<script>
// Disable Blazy lazy loading - show all lazy images immediately
document.addEventListener('DOMContentLoaded', function() {
    // Force all lazy images to load
    document.querySelectorAll('img[data-src]').forEach(function(img) {
        if (img.dataset.src && !img.getAttribute('src').startsWith('http')) {
            img.src = 'https://tuboleta.com' + img.dataset.src;
        }
    });
    // Force all lazy sources to load
    document.querySelectorAll('source[data-srcset]').forEach(function(source) {
        if (source.dataset.srcset) {
            var parts = source.dataset.srcset.split(' ');
            if (parts[0] && !parts[0].startsWith('http')) {
                parts[0] = 'https://tuboleta.com' + parts[0];
                source.srcset = parts.join(' ');
            }
        }
    });
});
</script>
'''

content = content.replace('</head>', disable_blazy_script + '</head>')

# Fix 5: The main event image uses a direct src (not lazy) but with a relative path - ensure it's correct
# Line: src="../../sites/default/files/2025-12/DETALLE%20EVENTO_4.png"
# This should work fine with relative path since we're in es/eventos/

# Fix 6: Add meta base tag to help with absolute paths
# Actually, let's just keep relative paths working as they are

# Fix 7: ensure the "Comprar" button links work properly - they go to external URLs which is fine

with open(html_path, 'w', encoding='utf-8') as f:
    f.write(content)

print("File saved successfully!")
print(f"New file size: {len(content)} chars")
