From 3f65d2398b6b3c506af4a0d425c49c6890af9af7 Mon Sep 17 00:00:00 2001
From: super-nabla <supernabla@outlook.it>
Date: Sun, 14 Dec 2025 17:46:52 +0100
Subject: [PATCH 1/8] debug </tvar> position

---
 app.py   |  4 ++--
 tests.py | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/app.py b/app.py
index 0925546..79627c1 100644
--- a/app.py
+++ b/app.py
@@ -419,9 +419,9 @@ def process_double_brackets(text, tvar_id=0):
     
     # Assuming it's a regular internal link
     if len(parts) == 1:
-        return f'[[<tvar name={tvar_id}>Special:MyLanguage</tvar>/{capitalise_first_letter(parts[0])}|{parts[0]}]]', double_brackets_types.wikilink
+        return f'[[<tvar name={tvar_id}>Special:MyLanguage/{capitalise_first_letter(parts[0])}</tvar>|{parts[0]}]]', double_brackets_types.wikilink
     if len(parts) == 2 :
-        return f'[[<tvar name={tvar_id}>Special:MyLanguage</tvar>/{capitalise_first_letter(parts[0])}|{parts[1]}]]', double_brackets_types.wikilink
+        return f'[[<tvar name={tvar_id}>Special:MyLanguage/{capitalise_first_letter(parts[0])}</tvar>|{parts[1]}]]', double_brackets_types.wikilink
     return text
 
 def process_external_link(text, tvar_url_id=0):
diff --git a/tests.py b/tests.py
index 2112349..0bb4460 100644
--- a/tests.py
+++ b/tests.py
@@ -22,7 +22,7 @@ def test_internal_and_external_links(self):
             convert_to_translatable_wikitext(
                 'This is a text with an [[internal link]] and an [https://openstreetmap.org external link].'
             ),
-            '<translate>This is a text with an [[<tvar name=0>Special:MyLanguage</tvar>/Internal link|internal link]] and an [<tvar name=url0>https://openstreetmap.org</tvar> external link].</translate>'
+            '<translate>This is a text with an [[<tvar name=0>Special:MyLanguage/Internal link</tvar>|internal link]] and an [<tvar name=url0>https://openstreetmap.org</tvar> external link].</translate>'
         )
     
     def test_category_with_translation(self):
@@ -40,7 +40,7 @@ def test_notoc_preserved(self):
     def test_simple_internal_link(self):
         self.assertEqual(
             convert_to_translatable_wikitext('[[link]]'),
-            '<translate>[[<tvar name=0>Special:MyLanguage</tvar>/Link|link]]</translate>'
+            '<translate>[[<tvar name=0>Special:MyLanguage/Link</tvar>|link]]</translate>'
         )
     
     def test_multiline_text(self):
@@ -62,7 +62,7 @@ def test_double_namespace_without_list_case_1(self):
             convert_to_translatable_wikitext(
                 '[[Help]]ing'
             ),
-            '<translate>[[<tvar name=0>Special:MyLanguage</tvar>/Help|Help]]ing</translate>'
+            '<translate>[[<tvar name=0>Special:MyLanguage/Help</tvar>|Help]]ing</translate>'
         )
     
     def test_double_namespace_without_list_case_2(self):
@@ -70,7 +70,7 @@ def test_double_namespace_without_list_case_2(self):
             convert_to_translatable_wikitext(
                 '[[Help]] ing'
             ),
-            '<translate>[[<tvar name=0>Special:MyLanguage</tvar>/Help|Help]] ing</translate>'
+            '<translate>[[<tvar name=0>Special:MyLanguage/Help</tvar>|Help]] ing</translate>'
         )
 
     def test_template_simple(self):
@@ -155,7 +155,7 @@ def test_image_with_upright(self):
     def test_multiple_elements_in_one_line(self):
         self.assertEqual(
             convert_to_translatable_wikitext("Hello world! [[Link]] {{Template}} [https://meta.wikimedia.org/wiki/Main_Page Home]"),
-            '<translate>Hello world! [[<tvar name=0>Special:MyLanguage</tvar>/Link|Link]]</translate> {{Template}} <translate>[<tvar name=url0>https://meta.wikimedia.org/wiki/Main_Page</tvar> Home]</translate>'
+            '<translate>Hello world! [[<tvar name=0>Special:MyLanguage/Link</tvar>|Link]]</translate> {{Template}} <translate>[<tvar name=url0>https://meta.wikimedia.org/wiki/Main_Page</tvar> Home]</translate>'
         )
 
     def test_text_around_br_tag(self):

From 78a2bbd261b00b097d57ef2341832b698b60709b Mon Sep 17 00:00:00 2001
From: super-nabla <supernabla@outlook.it>
Date: Sun, 14 Dec 2025 18:34:25 +0100
Subject: [PATCH 2/8] make py scripts more modular

---
 app.py            | 792 +---------------------------------------------
 tests.py          |   2 +-
 wikitranslator.py | 790 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 793 insertions(+), 791 deletions(-)
 create mode 100644 wikitranslator.py

diff --git a/app.py b/app.py
index 79627c1..af70954 100644
--- a/app.py
+++ b/app.py
@@ -1,799 +1,11 @@
 from flask import Flask, request, render_template, jsonify
 from flask_cors import CORS  # Import flask-cors
-import re
-from enum import Enum
-import sys
+
+from wikitranslator import convert_to_translatable_wikitext
 
 app = Flask(__name__)
 CORS(app)  # Enable CORS for all routes
 
-behaviour_switches = ['__NOTOC__', '__FORCETOC__', '__TOC__', '__NOEDITSECTION__', '__NEWSECTIONLINK__', '__NONEWSECTIONLINK__', '__NOGALLERY__', '__HIDDENCAT__', '__EXPECTUNUSEDCATEGORY__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOTITLECONVERT__', '__NOTC__', '__START__', '__END__', '__INDEX__', '__NOINDEX__', '__STATICREDIRECT__', '__EXPECTUNUSEDTEMPLATE__', '__NOGLOBAL__', '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', '__ARCHIVEDTALK__', '__NOTALK__', '__EXPECTWITHOUTSCANS__']
-
-# --- Helper Functions for Processing Different Wikitext Elements ---
-# These functions are designed to handle specific wikitext structures.
-# Some will recursively call the main `convert_to_translatable_wikitext`
-# function to process their internal content, ensuring nested elements
-# are also handled correctly.
-
-def capitalise_first_letter(text):
-    """
-    Capitalises the first letter of the given text.
-    If the text is empty or consists only of whitespace, it returns the text unchanged.
-    """
-    if not text or not text.strip():
-        return text
-    return text[0].upper() + text[1:]
-
-def is_emoji_unicode(char):
-    # This is a very simplified set of common emoji ranges.
-    # A comprehensive list would be much longer and more complex.
-    # See https://www.unicode.org/Public/emoji/ for full details.
-    if 0x1F600 <= ord(char) <= 0x1F64F:  # Emoticons
-        return True
-    if 0x1F300 <= ord(char) <= 0x1F5FF:  # Miscellaneous Symbols and Pictographs
-        return True
-    if 0x1F680 <= ord(char) <= 0x1F6FF:  # Transport and Map Symbols
-        return True
-    if 0x2600 <= ord(char) <= 0x26FF:    # Miscellaneous Symbols
-        return True
-    if 0x2700 <= ord(char) <= 0x27BF:    # Dingbats
-        return True
-    # Add more ranges as needed for full coverage
-    return False
-
-def _wrap_in_translate(text):
-    """
-    Wraps the given text with <translate> tags.
-    It ensures that empty or whitespace-only strings are not wrapped.
-    The <translate> tags are added around the non-whitespace content,
-    preserving leading and trailing whitespace.
-    """
-    if not text or not text.strip():
-        return text
-
-    # Find the first and last non-whitespace characters
-    first_char_index = -1
-    last_char_index = -1
-    for i, char in enumerate(text):
-        if char not in (' ', '\n', '\t', '\r', '\f', '\v'): # Check for common whitespace characters
-            if first_char_index == -1:
-                first_char_index = i
-            last_char_index = i
-
-    # If no non-whitespace characters are found (should be caught by text.strip() check, but for robustness)
-    if first_char_index == -1:
-        return text
-
-    leading_whitespace = text[:first_char_index]
-    content = text[first_char_index : last_char_index + 1]
-    trailing_whitespace = text[last_char_index + 1 :]
-
-    return f"{leading_whitespace}<translate>{content}</translate>{trailing_whitespace}"
-
-def process_syntax_highlight(text):
-    """
-    Processes <syntaxhighlight> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<syntaxhighlight') and text.endswith('</syntaxhighlight>')), "Invalid syntax highlight tag"
-    # Get inside the <syntaxhighlight> tag
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_table(text):
-    """
-    Processes table blocks in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('{|') and text.endswith('|}')), "Invalid table tag"
-    return text
-
-def process_blockquote(text):
-    """
-    Processes blockquote tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<blockquote>') and text.endswith('</blockquote>')), "Invalid blockquote tag"
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_poem_tag(text):
-    """
-    Processes <poem> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<poem') and text.endswith('</poem>')), "Invalid poem tag"
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_code_tag(text, tvar_code_id=0):
-    """
-    Processes <code> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<code') and text.endswith('</code>')), "Invalid code tag"
-    # Get inside the <code> tag
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = f'<tvar name=code{tvar_code_id}>{content}</tvar>'
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_div(text):
-    """
-    Processes <div> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<div') and text.endswith('</div>')), "Invalid div tag"
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_hiero(text):
-    """
-    Processes <hiero> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<hiero>') and text.endswith('</hiero>')), "Invalid hiero tag"
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_sub_sup(text):
-    """
-    Processes <sub> and <sup> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert((text.startswith('<sub>') and text.endswith('</sub>')) or
-           (text.startswith('<sup>') and text.endswith('</sup>'))), "Invalid sub/sup tag"
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_math(text):
-    """
-    Processes <math> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<math>') and text.endswith('</math>')), "Invalid math tag"
-    return text
-
-def process_small_tag(text):
-    """
-    Processes <small> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<small>') and text.endswith('</small>')), "Invalid small tag"
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_nowiki(text):
-    """
-    Processes <nowiki> tags in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    assert(text.startswith('<nowiki>') and text.endswith('</nowiki>')), "Invalid nowiki tag"
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
-
-def process_item(text):
-    """
-    Processes list items in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    offset = 0
-    if text.startswith(';'):
-        offset = 1
-    elif text.startswith(':'):
-        offset = 1
-    elif text.startswith('#'):
-        while text[offset] == '#':
-            offset += 1
-    elif text.startswith('*'):
-        while text[offset] == '*':
-            offset += 1
-    # Add translate tags around the item content
-    item_content = text[offset:].strip()
-    if not item_content:
-        return text
-    return text[:offset] + ' ' + _wrap_in_translate(item_content) + '\n'
-
-class double_brackets_types(Enum):
-    wikilink = 1
-    category = 2
-    inline_icon = 3
-    not_inline_icon_file = 4
-    special = 5
-    invalid_file = 6
-
-def _process_file(s, tvar_inline_icon_id=0): 
-    # Define keywords that should NOT be translated when found as parameters
-    NON_TRANSLATABLE_KEYWORDS = {
-        'left', 'right', 'centre', 'center', 'thumb', 'frameless', 'border', 'none', 
-        'upright', 'baseline', 'middle', 'sub', 'super', 'text-top', 'text-bottom', '{{dirstart}}', '{{dirend}}'
-    }
-    NON_TRANSLATABLE_KEYWORDS_PREFIXES = {
-        'link=', 'upright=', 'alt='
-    }
-    NOT_INLINE_KEYWORDS = {
-        'left', 'right', 'centre', 'center', 'thumb', 'frameless', 'border', 'none', '{{dirstart}}', '{{dirend}}'
-    }
-    file_aliases = ['File:', 'file:', 'Image:', 'image:']
-
-    tokens = []
-    
-    inner_content = s[2:-2]  # Remove the leading [[ and trailing ]]
-    tokens = inner_content.split('|')
-    tokens = [token.strip() for token in tokens]  # Clean up whitespace around tokens
-    
-    # The first token shall start with a file alias
-    # e.g., "File:Example.jpg" or "Image:Example.png"
-    if not tokens or not tokens[0].startswith(tuple(file_aliases)):
-        return line, double_brackets_types.invalid_file
-    
-    # The first token is a file link
-    filename = tokens[0].split(':', 1)[1] if ':' in tokens[0] else tokens[0]
-    tokens[0] = f'File:{filename}' 
-    
-    # Substitute 'left' with {{dirstart}}
-    while 'left' in tokens:
-        tokens[tokens.index('left')] = '{{dirstart}}'
-    # Substitute 'right' with {{dirend}}
-    while 'right' in tokens:
-        tokens[tokens.index('right')] = '{{dirend}}'
-    
-    ############################
-    # Managing inline icons
-    #############################
-    is_inline_icon = True
-    for token in tokens:
-        if token in NOT_INLINE_KEYWORDS:
-            is_inline_icon = False
-            break
-    if is_inline_icon :
-        # Check if it contains 'alt=' followed by an emoji
-        for token in tokens[1:]:
-            if token.startswith('alt='):
-                alt_text = token[len('alt='):].strip()
-                if not any(is_emoji_unicode(char) for char in alt_text):
-                    is_inline_icon = False
-                    break
-            elif token not in NON_TRANSLATABLE_KEYWORDS:
-                is_inline_icon = False
-                break
-            elif any(token.startswith(prefix) for prefix in NON_TRANSLATABLE_KEYWORDS_PREFIXES):
-                is_inline_icon = False
-                break
-        
-    if is_inline_icon:
-        # return something like: <tvar name="icon">[[File:smiley.png|alt=🙂]]</tvar>
-        returnline = f'<tvar name=icon{tvar_inline_icon_id}>[[' + '|'.join(tokens) + ']]</tvar>'
-        return returnline, double_brackets_types.inline_icon
-    
-    ############################
-    # Managing general files
-    #############################
-    
-    output_parts = []
-    
-    # The first token is the file name (e.g., "File:Example.jpg")
-    # We substitute any occurrences of "Image:" with "File:"
-    output_parts.append(tokens[0])
-
-    pixel_regex = re.compile(r'\d+(?:x\d+)?px')  # Matches pixel values like "100px" or "100x50px)"
-    for token in tokens[1:]:
-        # Check for 'alt='
-        if token.startswith('alt='):
-            alt_text = token[len('alt='):].strip()
-            output_parts.append('alt='+_wrap_in_translate(alt_text))
-        # Check if the token is a known non-translatable keyword
-        elif token in NON_TRANSLATABLE_KEYWORDS:
-            output_parts.append(token)
-        # If the token starts with a known non-translatable prefix, keep it as is
-        elif any(token.startswith(prefix) for prefix in NON_TRANSLATABLE_KEYWORDS_PREFIXES):
-            output_parts.append(token)
-        # If the token is a pixel value, keep it as is
-        elif pixel_regex.match(token):
-            output_parts.append(token)
-        # Otherwise, assume it's a caption or other translatable text
-        else:
-            output_parts.append(f"<translate>{token}</translate>")
-
-    # Reconstruct the line with the transformed parts
-    returnline = '[[' + '|'.join(output_parts) + ']]' 
-    return returnline, double_brackets_types.not_inline_icon_file
-    
-def process_double_brackets(text, tvar_id=0):
-    """
-    Processes internal links in the wikitext.
-    It wraps the content in <translate> tags.
-    """
-    if not (text.startswith("[[") and text.endswith("]]")) :
-        print(f"Input >{text}< must be wrapped in double brackets [[ ]]")
-        sys.exit(1)
-    # Split the link into parts, handling both internal links and links with display text
-    
-    inner_wl = text[2:-2]  # Remove the leading [[ and trailing ]]
-    parts = inner_wl.split('|')
-    
-    # part 0
-    category_aliases = ['Category:', 'category:', 'Cat:', 'cat:']
-    file_aliases = ['File:', 'file:', 'Image:', 'image:']
-    
-    parts[0] = parts[0].strip()  # Clean up the first part
-    # Check if the first part is a category or file alias
-    if parts[0].startswith(tuple(category_aliases)):
-        # Handle category links
-        cat_name = parts[0].split(':', 1)[1] if ':' in parts[0] else parts[0]
-        return f'[[Category:{cat_name}{{{{#translation:}}}}]]', double_brackets_types.category
-    elif parts[0].startswith(tuple(file_aliases)):
-        # Handle file links
-        return _process_file(text)
-    elif parts[0].startswith('Special:'):
-        # Handle special pages
-        return f'[[{parts[0]}]]', double_brackets_types.special
-    
-    # Assuming it's a regular internal link
-    if len(parts) == 1:
-        return f'[[<tvar name={tvar_id}>Special:MyLanguage/{capitalise_first_letter(parts[0])}</tvar>|{parts[0]}]]', double_brackets_types.wikilink
-    if len(parts) == 2 :
-        return f'[[<tvar name={tvar_id}>Special:MyLanguage/{capitalise_first_letter(parts[0])}</tvar>|{parts[1]}]]', double_brackets_types.wikilink
-    return text
-
-def process_external_link(text, tvar_url_id=0):
-    """
-    Processes external links in the format [http://example.com Description] and ensures
-    that only the description part is wrapped in <translate> tags, leaving the URL untouched.
-    """
-    match = re.match(r'\[(https?://[^\s]+)\s+([^\]]+)\]', text)
-
-    if match:
-        url_part = match.group(1)
-        description_part = match.group(2)
-        # Wrap only the description part in <translate> tags, leave the URL untouched
-        return f'[<tvar name=url{tvar_url_id}>{url_part}</tvar> {description_part}]'
-    return text
-
-def process_template(text):
-    """
-    Processes the text to ensure that only the content outside of double curly braces {{ ... }} is wrapped in <translate> tags,
-    while preserving the template content inside the braces without translating it.
-    """
-    assert(text.startswith('{{') and text.endswith('}}')), "Invalid template tag"
-    # Split the template content from the rest of the text
-    inner_content = text[2:-2].strip()  # Remove the leading {{ and trailing }}
-    inner_content = capitalise_first_letter(inner_content)  # Capitalise the first letter of the inner content
-    
-    # If the inner content is empty, return an empty string
-    if not inner_content :
-        return text
-    
-    # Wrap the inner content in <translate> tags
-    return '{{' + inner_content + '}}'
-
-def process_raw_url(text):
-    """
-    Processes raw URLs in the wikitext.
-    It wraps the URL in <translate> tags.
-    """
-    # This function assumes the text is a raw URL, e.g., "http://example.com"
-    # and wraps it in <translate> tags.
-    if not text.strip():
-        return text
-    return text.strip()
-
-
-# --- Main Tokenisation Logic ---
-
-def convert_to_translatable_wikitext(wikitext):
-    """
-    Converts standard wikitext to translatable wikitext by wrapping
-    translatable text with <translate> tags, while preserving and
-    correctly handling special wikitext elements.
-    This function tokenizes the entire text, not line by line.
-    """
-    if not wikitext:
-        return ""
-    
-    # add an extra newline at the beginning, useful to process items at the beginning of the text
-    wikitext = '\n' + wikitext
-
-    parts = []
-    last = 0
-    curr = 0
-    text_length = len(wikitext)
-
-    while curr < text_length :
-        found = None
-        # Syntax highlight block
-        pattern = '<syntaxhighlight'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</syntaxhighlight>', curr) + len('</syntaxhighlight>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_syntax_highlight))
-            curr = end_pos
-            last = curr
-            continue 
-        # Table block
-        pattern = '{|'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('|}', curr) + len('|}')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_table))
-            curr = end_pattern
-            last = curr
-            continue
-        # Blockquote
-        pattern = '<blockquote>'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</blockquote>', curr) + len('</blockquote>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_blockquote))
-            curr = end_pattern
-            last = curr
-            continue
-        # Poem tag
-        pattern = '<poem'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</poem>', curr) + len('</poem>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_poem_tag))
-            curr = end_pattern
-            last = curr
-            continue
-        # Code tag
-        pattern = '<code'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</code>', curr) + len('</code>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_code_tag))
-            curr = end_pattern
-            last = curr
-            continue
-        # Div tag
-        pattern = '<div'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</div>', curr) + len('</div>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_div))
-            curr = end_pattern
-            last = curr
-            continue
-        # Hiero tag
-        pattern = '<hiero>'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</hiero>', curr) + len('</hiero>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_hiero))
-            curr = end_pattern
-            last = curr
-            continue
-        # Sub tag
-        pattern = '<sub>'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</sub>', curr) + len('</sub>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_sub_sup))
-            curr = end_pattern
-            last = curr
-            continue
-        # Sup tag
-        pattern = '<sup>'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</sup>', curr) + len('</sup>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_sub_sup))
-            curr = end_pattern
-            last = curr
-            continue
-        # Math tag
-        pattern = '<math>'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</math>', curr) + len('</math>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_math))
-            curr = end_pattern
-            last = curr
-            continue
-        # Small tag
-        pattern = '<small>'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</small>', curr) + len('</small>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_small_tag))
-            curr = end_pattern
-            last = curr
-            continue
-        # Nowiki tag
-        pattern = '<nowiki>'
-        if wikitext.startswith(pattern, curr):
-            end_pattern = wikitext.find('</nowiki>', curr) + len('</nowiki>')
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pattern], process_nowiki))
-            curr = end_pattern
-            last = curr
-            continue
-        # br tag
-        patterns = ['<br>', '<br/>', '<br />']
-        for p in patterns:
-            if wikitext.startswith(p, curr):
-                end_pattern = curr + len(p)
-                if last < curr:
-                    parts.append((wikitext[last:curr], _wrap_in_translate))
-                parts.append((wikitext[curr:end_pattern], lambda x: x))
-                curr = end_pattern
-                last = curr
-                found = True
-                break
-        if found:
-            continue
-        # Lists
-        patterns_newline = ['\n*', '\n#', '\n:', '\n;']
-        if any(wikitext.startswith(p, curr) for p in patterns_newline) :
-            curr += 1 # Discard the newline character
-            parts.append((wikitext[last:curr], _wrap_in_translate))
-            # Iterate through the list items
-            patterns = ['*', '#', ':', ';']
-            while any(wikitext.startswith(p, curr) for p in patterns) :
-                end_pattern = wikitext.find('\n', curr)
-                if end_pattern == -1:
-                    end_pattern = text_length
-                else :
-                    end_pattern += 1 # Include the newline in the part
-                parts.append((wikitext[curr:end_pattern], process_item))
-                curr = end_pattern
-                last = curr
-            continue
-        # Internal links
-        pattern = '[['
-        if wikitext.startswith(pattern, curr):
-            # Count the number of opening double brackets '[[' and closing ']]' to find the end
-            end_pos = curr + 2
-            bracket_count = 1
-            while end_pos < text_length and bracket_count > 0:
-                if wikitext.startswith('[[', end_pos):
-                    bracket_count += 1
-                    end_pos += 2
-                elif wikitext.startswith(']]', end_pos):
-                    bracket_count -= 1
-                    end_pos += 2
-                else:   
-                    end_pos += 1
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            if end_pos > curr + 2:  # Ensure we have a valid link
-                parts.append((wikitext[curr:end_pos], process_double_brackets))
-            curr = end_pos
-            last = curr
-            continue
-        # External links
-        pattern = '[http'
-        if wikitext.startswith(pattern, curr):
-            # Find the end of the external link
-            end_pos = wikitext.find(']', curr)
-            if end_pos == -1:
-                end_pos = text_length
-            else :
-                end_pos += 1 # Include the closing ']' in the part
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pos + 1], process_external_link))
-            curr = end_pos
-            last = curr
-            continue
-        # Templates
-        pattern = '{{'
-        if wikitext.startswith(pattern, curr):
-            # Find the end of the template
-            end_pos = wikitext.find('}}', curr) + 2
-            if end_pos == 1:
-                end_pos = text_length
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pos], process_template))
-            curr = end_pos
-            last = curr
-            continue
-        # Raw URLs
-        pattern = 'http'
-        if wikitext.startswith(pattern, curr):
-            # Find the end of the URL (space or end of string)
-            end_pos = wikitext.find(' ', curr)
-            if end_pos == -1:
-                end_pos = text_length
-            if last < curr:
-                parts.append((wikitext[last:curr], _wrap_in_translate))
-            parts.append((wikitext[curr:end_pos], process_raw_url))
-            curr = end_pos
-            last = curr
-            continue
-        # Behaviour switches
-        for switch in behaviour_switches:
-            if wikitext.startswith(switch, curr):
-                end_pos = curr + len(switch)
-                if last < curr:
-                    parts.append((wikitext[last:curr], _wrap_in_translate))
-                parts.append((wikitext[curr:end_pos], lambda x: x))
-                curr = end_pos
-                last = curr
-                
-        
-        curr += 1  # Move to the next character if no pattern matched
-        
-    # Add any remaining text after the last processed part
-    if last < text_length:
-        parts.append((wikitext[last:], _wrap_in_translate))
-    
-    """
-    print ('*' * 20)
-    for i, (part, handler) in enumerate(parts):
-        print(f"--- Start element {i} with handler {handler.__name__} ---")
-        print(part) 
-        print(f"---\n") 
-        
-    print ('*' * 20)
-    """
-    
-    # Process links
-    tvar_id = 0
-    tvar_url_id = 0
-    tvar_code_id = 0
-    tvar_inline_icon_id = 0
-    for i, (part, handler) in enumerate(parts):
-        # Handlers for links require a tvar_id
-        if handler == process_double_brackets:
-            new_part, double_brackets_type = handler(part, tvar_id)
-            if double_brackets_type in [double_brackets_types.wikilink, double_brackets_types.special, double_brackets_types.inline_icon]:
-                new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
-            else :
-                new_handler = lambda x: x  # No further processing for categories and files
-            parts[i] = (new_part, new_handler)
-            tvar_id += 1
-        elif handler == process_external_link:
-            new_part = handler(part, tvar_url_id)
-            new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
-            parts[i] = (new_part, new_handler)
-            tvar_url_id += 1
-        elif handler == process_code_tag:
-            new_part = handler(part, tvar_code_id)
-            new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
-            parts[i] = (new_part, new_handler)
-            tvar_code_id += 1
-        elif handler == process_double_brackets :
-            new_part, double_brackets_type = handler(part, tvar_inline_icon_id)
-            if double_brackets_type == double_brackets_types.inline_icon:
-                new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
-                tvar_inline_icon_id += 1
-            else:
-                new_handler = lambda x: x
-            
-    # Scan again the parts: merge consecutive parts handled by _wrap_in_translate
-    _parts = []
-    if parts:
-        current_part, current_handler = parts[0]
-        for part, handler in parts[1:]:
-            if handler == _wrap_in_translate and current_handler == _wrap_in_translate:
-                # Merge the parts
-                current_part += part
-            else:
-                _parts.append((current_part, current_handler))
-                current_part, current_handler = part, handler
-        # Add the last accumulated part
-        _parts.append((current_part, current_handler))
-        
-    # Process the parts with their respective handlers
-    processed_parts = [handler(part) for part, handler in _parts]            
-    
-    # Debug output
-    """
-    print("Processed parts:")
-    for i, (ppart, (part, handler)) in enumerate(zip(processed_parts, _parts)):
-        print(f"--- Start element {i} with handler {handler.__name__} ---")
-        print(part)
-        print(f"---\n") 
-        print(ppart)  
-        print(f"---\n") 
-    """
-    
-    # Join the processed parts into a single string
-    return ''.join(processed_parts)[1:]  # Remove the leading newline added at the beginning
-
 @app.route('/')
 def index():
     return render_template('home.html')
diff --git a/tests.py b/tests.py
index 0bb4460..29d0e68 100644
--- a/tests.py
+++ b/tests.py
@@ -1,5 +1,5 @@
 import unittest
-from app import convert_to_translatable_wikitext, process_double_brackets
+from app import convert_to_translatable_wikitext
 
 class TestTranslatableWikitext(unittest.TestCase):
 
diff --git a/wikitranslator.py b/wikitranslator.py
new file mode 100644
index 0000000..3ff4997
--- /dev/null
+++ b/wikitranslator.py
@@ -0,0 +1,790 @@
+import re
+from enum import Enum
+import sys
+
+behaviour_switches = ['__NOTOC__', '__FORCETOC__', '__TOC__', '__NOEDITSECTION__', '__NEWSECTIONLINK__', '__NONEWSECTIONLINK__', '__NOGALLERY__', '__HIDDENCAT__', '__EXPECTUNUSEDCATEGORY__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOTITLECONVERT__', '__NOTC__', '__START__', '__END__', '__INDEX__', '__NOINDEX__', '__STATICREDIRECT__', '__EXPECTUNUSEDTEMPLATE__', '__NOGLOBAL__', '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', '__ARCHIVEDTALK__', '__NOTALK__', '__EXPECTWITHOUTSCANS__']
+
+# --- Helper Functions for Processing Different Wikitext Elements ---
+# These functions are designed to handle specific wikitext structures.
+# Some will recursively call the main `convert_to_translatable_wikitext`
+# function to process their internal content, ensuring nested elements
+# are also handled correctly.
+
+def capitalise_first_letter(text):
+    """
+    Capitalises the first letter of the given text.
+    If the text is empty or consists only of whitespace, it returns the text unchanged.
+    """
+    if not text or not text.strip():
+        return text
+    return text[0].upper() + text[1:]
+
+def is_emoji_unicode(char):
+    # This is a very simplified set of common emoji ranges.
+    # A comprehensive list would be much longer and more complex.
+    # See https://www.unicode.org/Public/emoji/ for full details.
+    if 0x1F600 <= ord(char) <= 0x1F64F:  # Emoticons
+        return True
+    if 0x1F300 <= ord(char) <= 0x1F5FF:  # Miscellaneous Symbols and Pictographs
+        return True
+    if 0x1F680 <= ord(char) <= 0x1F6FF:  # Transport and Map Symbols
+        return True
+    if 0x2600 <= ord(char) <= 0x26FF:    # Miscellaneous Symbols
+        return True
+    if 0x2700 <= ord(char) <= 0x27BF:    # Dingbats
+        return True
+    # Add more ranges as needed for full coverage
+    return False
+
+def _wrap_in_translate(text):
+    """
+    Wraps the given text with <translate> tags.
+    It ensures that empty or whitespace-only strings are not wrapped.
+    The <translate> tags are added around the non-whitespace content,
+    preserving leading and trailing whitespace.
+    """
+    if not text or not text.strip():
+        return text
+
+    # Find the first and last non-whitespace characters
+    first_char_index = -1
+    last_char_index = -1
+    for i, char in enumerate(text):
+        if char not in (' ', '\n', '\t', '\r', '\f', '\v'): # Check for common whitespace characters
+            if first_char_index == -1:
+                first_char_index = i
+            last_char_index = i
+
+    # If no non-whitespace characters are found (should be caught by text.strip() check, but for robustness)
+    if first_char_index == -1:
+        return text
+
+    leading_whitespace = text[:first_char_index]
+    content = text[first_char_index : last_char_index + 1]
+    trailing_whitespace = text[last_char_index + 1 :]
+
+    return f"{leading_whitespace}<translate>{content}</translate>{trailing_whitespace}"
+
+def process_syntax_highlight(text):
+    """
+    Processes <syntaxhighlight> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<syntaxhighlight') and text.endswith('</syntaxhighlight>')), "Invalid syntax highlight tag"
+    # Get inside the <syntaxhighlight> tag
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = _wrap_in_translate(content)
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_table(text):
+    """
+    Processes table blocks in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('{|') and text.endswith('|}')), "Invalid table tag"
+    return text
+
+def process_blockquote(text):
+    """
+    Processes blockquote tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<blockquote>') and text.endswith('</blockquote>')), "Invalid blockquote tag"
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = _wrap_in_translate(content)
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_poem_tag(text):
+    """
+    Processes <poem> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<poem') and text.endswith('</poem>')), "Invalid poem tag"
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = _wrap_in_translate(content)
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_code_tag(text, tvar_code_id=0):
+    """
+    Processes <code> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<code') and text.endswith('</code>')), "Invalid code tag"
+    # Get inside the <code> tag
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = f'<tvar name=code{tvar_code_id}>{content}</tvar>'
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_div(text):
+    """
+    Processes <div> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<div') and text.endswith('</div>')), "Invalid div tag"
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = _wrap_in_translate(content)
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_hiero(text):
+    """
+    Processes <hiero> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<hiero>') and text.endswith('</hiero>')), "Invalid hiero tag"
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = _wrap_in_translate(content)
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_sub_sup(text):
+    """
+    Processes <sub> and <sup> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert((text.startswith('<sub>') and text.endswith('</sub>')) or
+           (text.startswith('<sup>') and text.endswith('</sup>'))), "Invalid sub/sup tag"
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = _wrap_in_translate(content)
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_math(text):
+    """
+    Processes <math> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<math>') and text.endswith('</math>')), "Invalid math tag"
+    return text
+
+def process_small_tag(text):
+    """
+    Processes <small> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<small>') and text.endswith('</small>')), "Invalid small tag"
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = _wrap_in_translate(content)
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_nowiki(text):
+    """
+    Processes <nowiki> tags in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    assert(text.startswith('<nowiki>') and text.endswith('</nowiki>')), "Invalid nowiki tag"
+    start_tag_end = text.find('>') + 1
+    end_tag_start = text.rfind('<')
+    if start_tag_end >= end_tag_start:
+        return text 
+    prefix = text[:start_tag_end]
+    content = text[start_tag_end:end_tag_start].strip()
+    suffix = text[end_tag_start:]
+    if not content:
+        return text
+    # Wrap the content in <translate> tags
+    wrapped_content = _wrap_in_translate(content)
+    return f"{prefix}{wrapped_content}{suffix}"
+
+def process_item(text):
+    """
+    Processes list items in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    offset = 0
+    if text.startswith(';'):
+        offset = 1
+    elif text.startswith(':'):
+        offset = 1
+    elif text.startswith('#'):
+        while text[offset] == '#':
+            offset += 1
+    elif text.startswith('*'):
+        while text[offset] == '*':
+            offset += 1
+    # Add translate tags around the item content
+    item_content = text[offset:].strip()
+    if not item_content:
+        return text
+    return text[:offset] + ' ' + _wrap_in_translate(item_content) + '\n'
+
+class double_brackets_types(Enum):
+    wikilink = 1
+    category = 2
+    inline_icon = 3
+    not_inline_icon_file = 4
+    special = 5
+    invalid_file = 6
+
+def _process_file(s, tvar_inline_icon_id=0): 
+    # Define keywords that should NOT be translated when found as parameters
+    NON_TRANSLATABLE_KEYWORDS = {
+        'left', 'right', 'centre', 'center', 'thumb', 'frameless', 'border', 'none', 
+        'upright', 'baseline', 'middle', 'sub', 'super', 'text-top', 'text-bottom', '{{dirstart}}', '{{dirend}}'
+    }
+    NON_TRANSLATABLE_KEYWORDS_PREFIXES = {
+        'link=', 'upright=', 'alt='
+    }
+    NOT_INLINE_KEYWORDS = {
+        'left', 'right', 'centre', 'center', 'thumb', 'frameless', 'border', 'none', '{{dirstart}}', '{{dirend}}'
+    }
+    file_aliases = ['File:', 'file:', 'Image:', 'image:']
+
+    tokens = []
+    
+    inner_content = s[2:-2]  # Remove the leading [[ and trailing ]]
+    tokens = inner_content.split('|')
+    tokens = [token.strip() for token in tokens]  # Clean up whitespace around tokens
+    
+    # The first token shall start with a file alias
+    # e.g., "File:Example.jpg" or "Image:Example.png"
+    if not tokens or not tokens[0].startswith(tuple(file_aliases)):
+        return line, double_brackets_types.invalid_file
+    
+    # The first token is a file link
+    filename = tokens[0].split(':', 1)[1] if ':' in tokens[0] else tokens[0]
+    tokens[0] = f'File:{filename}' 
+    
+    # Substitute 'left' with {{dirstart}}
+    while 'left' in tokens:
+        tokens[tokens.index('left')] = '{{dirstart}}'
+    # Substitute 'right' with {{dirend}}
+    while 'right' in tokens:
+        tokens[tokens.index('right')] = '{{dirend}}'
+    
+    ############################
+    # Managing inline icons
+    #############################
+    is_inline_icon = True
+    for token in tokens:
+        if token in NOT_INLINE_KEYWORDS:
+            is_inline_icon = False
+            break
+    if is_inline_icon :
+        # Check if it contains 'alt=' followed by an emoji
+        for token in tokens[1:]:
+            if token.startswith('alt='):
+                alt_text = token[len('alt='):].strip()
+                if not any(is_emoji_unicode(char) for char in alt_text):
+                    is_inline_icon = False
+                    break
+            elif token not in NON_TRANSLATABLE_KEYWORDS:
+                is_inline_icon = False
+                break
+            elif any(token.startswith(prefix) for prefix in NON_TRANSLATABLE_KEYWORDS_PREFIXES):
+                is_inline_icon = False
+                break
+        
+    if is_inline_icon:
+        # return something like: <tvar name="icon">[[File:smiley.png|alt=🙂]]</tvar>
+        returnline = f'<tvar name=icon{tvar_inline_icon_id}>[[' + '|'.join(tokens) + ']]</tvar>'
+        return returnline, double_brackets_types.inline_icon
+    
+    ############################
+    # Managing general files
+    #############################
+    
+    output_parts = []
+    
+    # The first token is the file name (e.g., "File:Example.jpg")
+    # We substitute any occurrences of "Image:" with "File:"
+    output_parts.append(tokens[0])
+
+    pixel_regex = re.compile(r'\d+(?:x\d+)?px')  # Matches pixel values like "100px" or "100x50px)"
+    for token in tokens[1:]:
+        # Check for 'alt='
+        if token.startswith('alt='):
+            alt_text = token[len('alt='):].strip()
+            output_parts.append('alt='+_wrap_in_translate(alt_text))
+        # Check if the token is a known non-translatable keyword
+        elif token in NON_TRANSLATABLE_KEYWORDS:
+            output_parts.append(token)
+        # If the token starts with a known non-translatable prefix, keep it as is
+        elif any(token.startswith(prefix) for prefix in NON_TRANSLATABLE_KEYWORDS_PREFIXES):
+            output_parts.append(token)
+        # If the token is a pixel value, keep it as is
+        elif pixel_regex.match(token):
+            output_parts.append(token)
+        # Otherwise, assume it's a caption or other translatable text
+        else:
+            output_parts.append(f"<translate>{token}</translate>")
+
+    # Reconstruct the line with the transformed parts
+    returnline = '[[' + '|'.join(output_parts) + ']]' 
+    return returnline, double_brackets_types.not_inline_icon_file
+    
+def process_double_brackets(text, tvar_id=0):
+    """
+    Processes internal links in the wikitext.
+    It wraps the content in <translate> tags.
+    """
+    if not (text.startswith("[[") and text.endswith("]]")) :
+        print(f"Input >{text}< must be wrapped in double brackets [[ ]]")
+        sys.exit(1)
+    # Split the link into parts, handling both internal links and links with display text
+    
+    inner_wl = text[2:-2]  # Remove the leading [[ and trailing ]]
+    parts = inner_wl.split('|')
+    
+    # part 0
+    category_aliases = ['Category:', 'category:', 'Cat:', 'cat:']
+    file_aliases = ['File:', 'file:', 'Image:', 'image:']
+    
+    parts[0] = parts[0].strip()  # Clean up the first part
+    # Check if the first part is a category or file alias
+    if parts[0].startswith(tuple(category_aliases)):
+        # Handle category links
+        cat_name = parts[0].split(':', 1)[1] if ':' in parts[0] else parts[0]
+        return f'[[Category:{cat_name}{{{{#translation:}}}}]]', double_brackets_types.category
+    elif parts[0].startswith(tuple(file_aliases)):
+        # Handle file links
+        return _process_file(text)
+    elif parts[0].startswith('Special:'):
+        # Handle special pages
+        return f'[[{parts[0]}]]', double_brackets_types.special
+    
+    # Assuming it's a regular internal link
+    if len(parts) == 1:
+        return f'[[<tvar name={tvar_id}>Special:MyLanguage/{capitalise_first_letter(parts[0])}</tvar>|{parts[0]}]]', double_brackets_types.wikilink
+    if len(parts) == 2 :
+        return f'[[<tvar name={tvar_id}>Special:MyLanguage/{capitalise_first_letter(parts[0])}</tvar>|{parts[1]}]]', double_brackets_types.wikilink
+    return text
+
+def process_external_link(text, tvar_url_id=0):
+    """
+    Processes external links in the format [http://example.com Description] and ensures
+    that only the description part is wrapped in <translate> tags, leaving the URL untouched.
+    """
+    match = re.match(r'\[(https?://[^\s]+)\s+([^\]]+)\]', text)
+
+    if match:
+        url_part = match.group(1)
+        description_part = match.group(2)
+        # Wrap only the description part in <translate> tags, leave the URL untouched
+        return f'[<tvar name=url{tvar_url_id}>{url_part}</tvar> {description_part}]'
+    return text
+
+def process_template(text):
+    """
+    Processes the text to ensure that only the content outside of double curly braces {{ ... }} is wrapped in <translate> tags,
+    while preserving the template content inside the braces without translating it.
+    """
+    assert(text.startswith('{{') and text.endswith('}}')), "Invalid template tag"
+    # Split the template content from the rest of the text
+    inner_content = text[2:-2].strip()  # Remove the leading {{ and trailing }}
+    inner_content = capitalise_first_letter(inner_content)  # Capitalise the first letter of the inner content
+    
+    # If the inner content is empty, return an empty string
+    if not inner_content :
+        return text
+    
+    # Wrap the inner content in <translate> tags
+    return '{{' + inner_content + '}}'
+
+def process_raw_url(text):
+    """
+    Processes raw URLs in the wikitext.
+    It wraps the URL in <translate> tags.
+    """
+    # This function assumes the text is a raw URL, e.g., "http://example.com"
+    # and wraps it in <translate> tags.
+    if not text.strip():
+        return text
+    return text.strip()
+
+
+# --- Main Tokenisation Logic ---
+
+def convert_to_translatable_wikitext(wikitext):
+    """
+    Converts standard wikitext to translatable wikitext by wrapping
+    translatable text with <translate> tags, while preserving and
+    correctly handling special wikitext elements.
+    This function tokenizes the entire text, not line by line.
+    """
+    if not wikitext:
+        return ""
+    
+    # add an extra newline at the beginning, useful to process items at the beginning of the text
+    wikitext = '\n' + wikitext
+
+    parts = []
+    last = 0
+    curr = 0
+    text_length = len(wikitext)
+
+    while curr < text_length :
+        found = None
+        # Syntax highlight block
+        pattern = '<syntaxhighlight'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</syntaxhighlight>', curr) + len('</syntaxhighlight>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_syntax_highlight))
+            curr = end_pos
+            last = curr
+            continue 
+        # Table block
+        pattern = '{|'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('|}', curr) + len('|}')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_table))
+            curr = end_pattern
+            last = curr
+            continue
+        # Blockquote
+        pattern = '<blockquote>'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</blockquote>', curr) + len('</blockquote>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_blockquote))
+            curr = end_pattern
+            last = curr
+            continue
+        # Poem tag
+        pattern = '<poem'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</poem>', curr) + len('</poem>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_poem_tag))
+            curr = end_pattern
+            last = curr
+            continue
+        # Code tag
+        pattern = '<code'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</code>', curr) + len('</code>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_code_tag))
+            curr = end_pattern
+            last = curr
+            continue
+        # Div tag
+        pattern = '<div'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</div>', curr) + len('</div>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_div))
+            curr = end_pattern
+            last = curr
+            continue
+        # Hiero tag
+        pattern = '<hiero>'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</hiero>', curr) + len('</hiero>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_hiero))
+            curr = end_pattern
+            last = curr
+            continue
+        # Sub tag
+        pattern = '<sub>'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</sub>', curr) + len('</sub>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_sub_sup))
+            curr = end_pattern
+            last = curr
+            continue
+        # Sup tag
+        pattern = '<sup>'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</sup>', curr) + len('</sup>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_sub_sup))
+            curr = end_pattern
+            last = curr
+            continue
+        # Math tag
+        pattern = '<math>'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</math>', curr) + len('</math>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_math))
+            curr = end_pattern
+            last = curr
+            continue
+        # Small tag
+        pattern = '<small>'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</small>', curr) + len('</small>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_small_tag))
+            curr = end_pattern
+            last = curr
+            continue
+        # Nowiki tag
+        pattern = '<nowiki>'
+        if wikitext.startswith(pattern, curr):
+            end_pattern = wikitext.find('</nowiki>', curr) + len('</nowiki>')
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pattern], process_nowiki))
+            curr = end_pattern
+            last = curr
+            continue
+        # br tag
+        patterns = ['<br>', '<br/>', '<br />']
+        for p in patterns:
+            if wikitext.startswith(p, curr):
+                end_pattern = curr + len(p)
+                if last < curr:
+                    parts.append((wikitext[last:curr], _wrap_in_translate))
+                parts.append((wikitext[curr:end_pattern], lambda x: x))
+                curr = end_pattern
+                last = curr
+                found = True
+                break
+        if found:
+            continue
+        # Lists
+        patterns_newline = ['\n*', '\n#', '\n:', '\n;']
+        if any(wikitext.startswith(p, curr) for p in patterns_newline) :
+            curr += 1 # Discard the newline character
+            parts.append((wikitext[last:curr], _wrap_in_translate))
+            # Iterate through the list items
+            patterns = ['*', '#', ':', ';']
+            while any(wikitext.startswith(p, curr) for p in patterns) :
+                end_pattern = wikitext.find('\n', curr)
+                if end_pattern == -1:
+                    end_pattern = text_length
+                else :
+                    end_pattern += 1 # Include the newline in the part
+                parts.append((wikitext[curr:end_pattern], process_item))
+                curr = end_pattern
+                last = curr
+            continue
+        # Internal links
+        pattern = '[['
+        if wikitext.startswith(pattern, curr):
+            # Count the number of opening double brackets '[[' and closing ']]' to find the end
+            end_pos = curr + 2
+            bracket_count = 1
+            while end_pos < text_length and bracket_count > 0:
+                if wikitext.startswith('[[', end_pos):
+                    bracket_count += 1
+                    end_pos += 2
+                elif wikitext.startswith(']]', end_pos):
+                    bracket_count -= 1
+                    end_pos += 2
+                else:   
+                    end_pos += 1
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            if end_pos > curr + 2:  # Ensure we have a valid link
+                parts.append((wikitext[curr:end_pos], process_double_brackets))
+            curr = end_pos
+            last = curr
+            continue
+        # External links
+        pattern = '[http'
+        if wikitext.startswith(pattern, curr):
+            # Find the end of the external link
+            end_pos = wikitext.find(']', curr)
+            if end_pos == -1:
+                end_pos = text_length
+            else :
+                end_pos += 1 # Include the closing ']' in the part
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pos + 1], process_external_link))
+            curr = end_pos
+            last = curr
+            continue
+        # Templates
+        pattern = '{{'
+        if wikitext.startswith(pattern, curr):
+            # Find the end of the template
+            end_pos = wikitext.find('}}', curr) + 2
+            if end_pos == 1:
+                end_pos = text_length
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pos], process_template))
+            curr = end_pos
+            last = curr
+            continue
+        # Raw URLs
+        pattern = 'http'
+        if wikitext.startswith(pattern, curr):
+            # Find the end of the URL (space or end of string)
+            end_pos = wikitext.find(' ', curr)
+            if end_pos == -1:
+                end_pos = text_length
+            if last < curr:
+                parts.append((wikitext[last:curr], _wrap_in_translate))
+            parts.append((wikitext[curr:end_pos], process_raw_url))
+            curr = end_pos
+            last = curr
+            continue
+        # Behaviour switches
+        for switch in behaviour_switches:
+            if wikitext.startswith(switch, curr):
+                end_pos = curr + len(switch)
+                if last < curr:
+                    parts.append((wikitext[last:curr], _wrap_in_translate))
+                parts.append((wikitext[curr:end_pos], lambda x: x))
+                curr = end_pos
+                last = curr
+                
+        
+        curr += 1  # Move to the next character if no pattern matched
+        
+    # Add any remaining text after the last processed part
+    if last < text_length:
+        parts.append((wikitext[last:], _wrap_in_translate))
+    
+    """
+    print ('*' * 20)
+    for i, (part, handler) in enumerate(parts):
+        print(f"--- Start element {i} with handler {handler.__name__} ---")
+        print(part) 
+        print(f"---\n") 
+        
+    print ('*' * 20)
+    """
+    
+    # Process links
+    tvar_id = 0
+    tvar_url_id = 0
+    tvar_code_id = 0
+    tvar_inline_icon_id = 0
+    for i, (part, handler) in enumerate(parts):
+        # Handlers for links require a tvar_id
+        if handler == process_double_brackets:
+            new_part, double_brackets_type = handler(part, tvar_id)
+            if double_brackets_type in [double_brackets_types.wikilink, double_brackets_types.special, double_brackets_types.inline_icon]:
+                new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
+            else :
+                new_handler = lambda x: x  # No further processing for categories and files
+            parts[i] = (new_part, new_handler)
+            tvar_id += 1
+        elif handler == process_external_link:
+            new_part = handler(part, tvar_url_id)
+            new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
+            parts[i] = (new_part, new_handler)
+            tvar_url_id += 1
+        elif handler == process_code_tag:
+            new_part = handler(part, tvar_code_id)
+            new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
+            parts[i] = (new_part, new_handler)
+            tvar_code_id += 1
+        elif handler == process_double_brackets :
+            new_part, double_brackets_type = handler(part, tvar_inline_icon_id)
+            if double_brackets_type == double_brackets_types.inline_icon:
+                new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
+                tvar_inline_icon_id += 1
+            else:
+                new_handler = lambda x: x
+            
+    # Scan again the parts: merge consecutive parts handled by _wrap_in_translate
+    _parts = []
+    if parts:
+        current_part, current_handler = parts[0]
+        for part, handler in parts[1:]:
+            if handler == _wrap_in_translate and current_handler == _wrap_in_translate:
+                # Merge the parts
+                current_part += part
+            else:
+                _parts.append((current_part, current_handler))
+                current_part, current_handler = part, handler
+        # Add the last accumulated part
+        _parts.append((current_part, current_handler))
+        
+    # Process the parts with their respective handlers
+    processed_parts = [handler(part) for part, handler in _parts]            
+    
+    # Debug output
+    """
+    print("Processed parts:")
+    for i, (ppart, (part, handler)) in enumerate(zip(processed_parts, _parts)):
+        print(f"--- Start element {i} with handler {handler.__name__} ---")
+        print(part)
+        print(f"---\n") 
+        print(ppart)  
+        print(f"---\n") 
+    """
+    
+    # Join the processed parts into a single string
+    return ''.join(processed_parts)[1:]  # Remove the leading newline added at the beginning
\ No newline at end of file

From 37195aa7c5a72dedc30be004a2062c939c2f5bcf Mon Sep 17 00:00:00 2001
From: super-nabla <supernabla@outlook.it>
Date: Sun, 14 Dec 2025 22:31:05 +0100
Subject: [PATCH 3/8] change project structure

---
 pyproject.toml                                |  12 ++
 translatable_wikitext_converter/__init__.py   |   0
 .../app.py                                    |   2 +-
 translatable_wikitext_converter/butta.py      | 113 +++++++++++
 .../templates}/home.html                      |   0
 .../tests.py                                  |   7 +-
 .../wikitranslator.py                         |  74 ++------
 .../wikitranslator_utils.py                   | 177 ++++++++++++++++++
 8 files changed, 318 insertions(+), 67 deletions(-)
 create mode 100644 pyproject.toml
 create mode 100644 translatable_wikitext_converter/__init__.py
 rename app.py => translatable_wikitext_converter/app.py (96%)
 create mode 100644 translatable_wikitext_converter/butta.py
 rename {templates => translatable_wikitext_converter/templates}/home.html (100%)
 rename tests.py => translatable_wikitext_converter/tests.py (96%)
 rename wikitranslator.py => translatable_wikitext_converter/wikitranslator.py (92%)
 create mode 100644 translatable_wikitext_converter/wikitranslator_utils.py

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..397eacb
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,12 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "translatable-wikitext-converter"
+version = "0.1.0"
+description = "Convert wikitext into translatable wikitext"
+authors = [
+    { name = "Gopa Vasanth" }
+]
+requires-python = ">=3.9"
diff --git a/translatable_wikitext_converter/__init__.py b/translatable_wikitext_converter/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app.py b/translatable_wikitext_converter/app.py
similarity index 96%
rename from app.py
rename to translatable_wikitext_converter/app.py
index af70954..357aee5 100644
--- a/app.py
+++ b/translatable_wikitext_converter/app.py
@@ -1,7 +1,7 @@
 from flask import Flask, request, render_template, jsonify
 from flask_cors import CORS  # Import flask-cors
 
-from wikitranslator import convert_to_translatable_wikitext
+from .wikitranslator import convert_to_translatable_wikitext
 
 app = Flask(__name__)
 CORS(app)  # Enable CORS for all routes
diff --git a/translatable_wikitext_converter/butta.py b/translatable_wikitext_converter/butta.py
new file mode 100644
index 0000000..6cd3995
--- /dev/null
+++ b/translatable_wikitext_converter/butta.py
@@ -0,0 +1,113 @@
+import re
+
+def fix_section_title_spacing_internal(title: str) -> str:
+    """
+    Detects a section title and ensures there is exactly one space
+    between the '=' characters and the title text.
+    """
+    # Pattern: (={2,}) [optional space] (.+?) [optional space] \1
+    pattern = re.compile(r'(={2,})\s*(.+?)\s*\1', re.DOTALL)
+
+    # Replacement: \1 [space] \2 [space] \1
+    return pattern.sub(r'\1 \2 \1', title)
+
+
+
+# --- Main Function to Fix Wiki Page Spacing ---
+
+def fix_wiki_page_spacing(wiki_text: str) -> str:
+    """
+    Applies the section title spacing fix and enforces consistent newlines
+    before (one blank line: \n\n) and after (one blank line: \n\n) 
+    every section heading (Level 2 or higher).
+    
+    This method guarantees the output format:
+    ...[Content]\n\n== Title ==\n\n[Next content]...
+    
+    :param wiki_text: The full text of the wiki page.
+    :return: The corrected wiki page text.
+    """
+    
+    # Pattern to match and replace a heading and its surrounding whitespace:
+    # 1. (.*?)           : Group 1: Non-greedy capture of all content before the heading.
+    # 2. [\r\n\s]* : Non-capturing group for all existing whitespace/newlines before the heading.
+    # 3. (^={2,}.*?={2,}$) : Group 2: The actual heading line, anchored to the start of a line (re.M).
+    # 4. [\r\n\s]* : Non-capturing group for all existing whitespace/newlines after the heading.
+    
+    # We use re.M (multiline) and re.DOTALL (dot matches newline)
+    heading_and_surroundings_pattern = re.compile(
+        r'(.*?)[\r\n\s]*(^={2,}.*?={2,}$)[\r\n\s]*', re.M | re.DOTALL
+    )
+
+    def heading_replacer_full_format(match):
+        """
+        Callback function for re.sub that fixes spacing and enforces \n\n separation.
+        """
+        # Group 1: Content preceding the heading
+        content_before = match.group(1).rstrip()
+        # Group 2: The raw heading line
+        raw_heading = match.group(2)
+        
+        # 1. Fix the internal spacing of the heading
+        corrected_heading = fix_section_title_spacing_internal(raw_heading)
+        
+        # 2. Determine the prefix separator: \n\n
+        # If the heading is the first thing on the page (i.e., content_before is empty),
+        # we don't want to prepend \n\n. Otherwise, we do.
+        if content_before:
+            prefix = '\n\n'
+        else:
+            prefix = ''
+        
+        # 3. The replacement structure:
+        # {Content Before}{Prefix}\n{Corrected Heading}\n\n
+        # The content that follows this match will immediately follow the final \n\n.
+        return f'{content_before}{prefix}{corrected_heading}\n\n'
+
+    # Apply the fix globally
+    corrected_text = heading_and_surroundings_pattern.sub(
+        heading_replacer_full_format, 
+        wiki_text
+    )
+    
+    # Clean up any residual excess newlines at the very beginning of the page
+    return corrected_text.lstrip('\r\n')
+
+
+def main():
+    """Hard-coded wiki page text for testing and debugging."""
+    
+    # Text demonstrates various input issues:
+    # 1. Title 1: No internal space, no newline after content. (Needs \n\n before and after)
+    # 2. Title 2: Too much internal space, one newline after content.
+    # 3. Title 3: Correct internal space, three newlines after content.
+    # 4. Title 4: Starts immediately after content (missing newline before).
+    
+    raw_wiki_page_text = (
+        "This is the header text.\n"
+        "This is the last line of the header.\n" # Content before first heading
+        "==Topic1==\n\n\n"                       # Missing \n before, too many \n after
+        "Content for topic 1.\n"
+        "Content continues...\n"
+        "===  Topic2  ===\n"                    # Missing \n before, one \n after
+        "Content for topic 2.\n"             
+        "== Topic3 ==\n\n\n"
+        "Content for topic 3. Correct space, too many \n after.\n"
+        "Some more content.\n"
+        "====Topic4====\n"                      # Missing \n before, missing \n after
+        "Final content."
+    )
+
+    print("--- Original Wiki Page Text ---\n")
+    print(raw_wiki_page_text)
+    print("\n" + "="*60 + "\n")
+
+    corrected_text = fix_wiki_page_spacing(raw_wiki_page_text)
+
+    print("--- Corrected Wiki Page Text (Enforcing: \n\n== Title ==\n\n) ---\n")
+    print(corrected_text)
+    print("\n" + "="*60 + "\n")
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/templates/home.html b/translatable_wikitext_converter/templates/home.html
similarity index 100%
rename from templates/home.html
rename to translatable_wikitext_converter/templates/home.html
diff --git a/tests.py b/translatable_wikitext_converter/tests.py
similarity index 96%
rename from tests.py
rename to translatable_wikitext_converter/tests.py
index 29d0e68..b7d079a 100644
--- a/tests.py
+++ b/translatable_wikitext_converter/tests.py
@@ -1,12 +1,13 @@
 import unittest
-from app import convert_to_translatable_wikitext
+
+from translatable_wikitext_converter.app import convert_to_translatable_wikitext
 
 class TestTranslatableWikitext(unittest.TestCase):
 
     def test_section_headers(self):
         self.assertEqual(
             convert_to_translatable_wikitext("==HELLO=="),
-            "<translate>==HELLO==</translate>"  # Removed the \n\n that was expected
+            "<translate>== HELLO ==</translate>"  # Removed the \n\n that was expected
         )
 
     def test_file_tag_translations(self):
@@ -46,7 +47,7 @@ def test_simple_internal_link(self):
     def test_multiline_text(self):
         self.assertEqual(
             convert_to_translatable_wikitext('\nhi iam charan\n<br>\nhappy\n\n'),
-            '\n<translate>hi iam charan</translate>\n<br>\n<translate>happy</translate>\n\n' 
+            '<translate>hi iam charan</translate>\n<br>\n<translate>happy</translate>\n\n' 
         )
     
     def test_double_namespace_processing(self):
diff --git a/wikitranslator.py b/translatable_wikitext_converter/wikitranslator.py
similarity index 92%
rename from wikitranslator.py
rename to translatable_wikitext_converter/wikitranslator.py
index 3ff4997..80e57e7 100644
--- a/wikitranslator.py
+++ b/translatable_wikitext_converter/wikitranslator.py
@@ -1,69 +1,14 @@
-import re
 from enum import Enum
-import sys
+import re, sys
 
-behaviour_switches = ['__NOTOC__', '__FORCETOC__', '__TOC__', '__NOEDITSECTION__', '__NEWSECTIONLINK__', '__NONEWSECTIONLINK__', '__NOGALLERY__', '__HIDDENCAT__', '__EXPECTUNUSEDCATEGORY__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOTITLECONVERT__', '__NOTC__', '__START__', '__END__', '__INDEX__', '__NOINDEX__', '__STATICREDIRECT__', '__EXPECTUNUSEDTEMPLATE__', '__NOGLOBAL__', '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', '__ARCHIVEDTALK__', '__NOTALK__', '__EXPECTWITHOUTSCANS__']
-
-# --- Helper Functions for Processing Different Wikitext Elements ---
-# These functions are designed to handle specific wikitext structures.
-# Some will recursively call the main `convert_to_translatable_wikitext`
-# function to process their internal content, ensuring nested elements
-# are also handled correctly.
+from .wikitranslator_utils import (
+    capitalise_first_letter,
+    is_emoji_unicode,
+    fix_wiki_page_spacing,
+    _wrap_in_translate
+)
 
-def capitalise_first_letter(text):
-    """
-    Capitalises the first letter of the given text.
-    If the text is empty or consists only of whitespace, it returns the text unchanged.
-    """
-    if not text or not text.strip():
-        return text
-    return text[0].upper() + text[1:]
-
-def is_emoji_unicode(char):
-    # This is a very simplified set of common emoji ranges.
-    # A comprehensive list would be much longer and more complex.
-    # See https://www.unicode.org/Public/emoji/ for full details.
-    if 0x1F600 <= ord(char) <= 0x1F64F:  # Emoticons
-        return True
-    if 0x1F300 <= ord(char) <= 0x1F5FF:  # Miscellaneous Symbols and Pictographs
-        return True
-    if 0x1F680 <= ord(char) <= 0x1F6FF:  # Transport and Map Symbols
-        return True
-    if 0x2600 <= ord(char) <= 0x26FF:    # Miscellaneous Symbols
-        return True
-    if 0x2700 <= ord(char) <= 0x27BF:    # Dingbats
-        return True
-    # Add more ranges as needed for full coverage
-    return False
-
-def _wrap_in_translate(text):
-    """
-    Wraps the given text with <translate> tags.
-    It ensures that empty or whitespace-only strings are not wrapped.
-    The <translate> tags are added around the non-whitespace content,
-    preserving leading and trailing whitespace.
-    """
-    if not text or not text.strip():
-        return text
-
-    # Find the first and last non-whitespace characters
-    first_char_index = -1
-    last_char_index = -1
-    for i, char in enumerate(text):
-        if char not in (' ', '\n', '\t', '\r', '\f', '\v'): # Check for common whitespace characters
-            if first_char_index == -1:
-                first_char_index = i
-            last_char_index = i
-
-    # If no non-whitespace characters are found (should be caught by text.strip() check, but for robustness)
-    if first_char_index == -1:
-        return text
-
-    leading_whitespace = text[:first_char_index]
-    content = text[first_char_index : last_char_index + 1]
-    trailing_whitespace = text[last_char_index + 1 :]
-
-    return f"{leading_whitespace}<translate>{content}</translate>{trailing_whitespace}"
+behaviour_switches = ['__NOTOC__', '__FORCETOC__', '__TOC__', '__NOEDITSECTION__', '__NEWSECTIONLINK__', '__NONEWSECTIONLINK__', '__NOGALLERY__', '__HIDDENCAT__', '__EXPECTUNUSEDCATEGORY__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOTITLECONVERT__', '__NOTC__', '__START__', '__END__', '__INDEX__', '__NOINDEX__', '__STATICREDIRECT__', '__EXPECTUNUSEDTEMPLATE__', '__NOGLOBAL__', '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', '__ARCHIVEDTALK__', '__NOTALK__', '__EXPECTWITHOUTSCANS__']
 
 def process_syntax_highlight(text):
     """
@@ -474,6 +419,9 @@ def convert_to_translatable_wikitext(wikitext):
     if not wikitext:
         return ""
     
+    wikitext = wikitext.replace('\r\n', '\n').replace('\r', '\n')
+    wikitext = fix_wiki_page_spacing(wikitext)
+    
     # add an extra newline at the beginning, useful to process items at the beginning of the text
     wikitext = '\n' + wikitext
 
diff --git a/translatable_wikitext_converter/wikitranslator_utils.py b/translatable_wikitext_converter/wikitranslator_utils.py
new file mode 100644
index 0000000..fc70c96
--- /dev/null
+++ b/translatable_wikitext_converter/wikitranslator_utils.py
@@ -0,0 +1,177 @@
+# --- Utility Functions for Wikitext Conversion ---
+# This module contains helper functions that are used across the
+# wikitext conversion process. These functions handle tasks such as
+# capitalising text, checking for emojis, and wrapping text in
+# translation tags.
+
+import re, sys
+
+# --- Helper Functions for Processing Different Wikitext Elements ---
+# These functions are designed to handle specific wikitext structures.
+# Some will recursively call the main `convert_to_translatable_wikitext`
+# function to process their internal content, ensuring nested elements
+# are also handled correctly.
+
+def capitalise_first_letter(text):
+    """
+    Capitalises the first letter of the given text.
+    If the text is empty or consists only of whitespace, it returns the text unchanged.
+    """
+    if not text or not text.strip():
+        return text
+    return text[0].upper() + text[1:]
+
+def is_emoji_unicode(char):
+    # This is a very simplified set of common emoji ranges.
+    # A comprehensive list would be much longer and more complex.
+    # See https://www.unicode.org/Public/emoji/ for full details.
+    if 0x1F600 <= ord(char) <= 0x1F64F:  # Emoticons
+        return True
+    if 0x1F300 <= ord(char) <= 0x1F5FF:  # Miscellaneous Symbols and Pictographs
+        return True
+    if 0x1F680 <= ord(char) <= 0x1F6FF:  # Transport and Map Symbols
+        return True
+    if 0x2600 <= ord(char) <= 0x26FF:    # Miscellaneous Symbols
+        return True
+    if 0x2700 <= ord(char) <= 0x27BF:    # Dingbats
+        return True
+    # Add more ranges as needed for full coverage
+    return False
+
+def _wrap_in_translate(text):
+    """
+    Wraps the given text with <translate> tags, preserving leading/trailing whitespace.
+    """
+    if not text or not text.strip():
+        return text
+
+    # Logic for finding non-whitespace content (as defined in your current code)
+    first_char_index = -1
+    last_char_index = -1
+    for i, char in enumerate(text):
+        if char not in (' ', '\n', '\t', '\r', '\f', '\v'):
+            if first_char_index == -1:
+                first_char_index = i
+            last_char_index = i
+
+    if first_char_index == -1:
+        return text
+
+    leading_whitespace = text[:first_char_index]
+    content = text[first_char_index : last_char_index + 1]
+    trailing_whitespace = text[last_char_index + 1 :]
+
+    return f"{leading_whitespace}<translate>{content}</translate>{trailing_whitespace}"
+
+
+############################################
+# Functions for Fixing Wiki Page Spacing #
+############################################
+
+def fix_section_title_spacing_internal(title: str) -> str:
+    """
+    Detects a section title and ensures there is exactly one space
+    between the '=' characters and the title text.
+    """
+    # Pattern: (={2,}) [optional space] (.+?) [optional space] \1
+    pattern = re.compile(r'(={2,})\s*(.+?)\s*\1', re.DOTALL)
+
+    # Replacement: \1 [space] \2 [space] \1
+    return pattern.sub(r'\1 \2 \1', title)
+
+# --- Main Function to Fix Wiki Page Spacing ---
+
+def fix_wiki_page_spacing(wiki_text: str) -> str:
+    """
+    Applies the section title spacing fix and enforces consistent newlines
+    before (one blank line: \n\n) and after (one blank line: \n\n) 
+    every section heading (Level 2 or higher).
+    
+    This method guarantees the output format:
+    ...[Content]\n\n== Title ==\n\n[Next content]...
+    
+    :param wiki_text: The full text of the wiki page.
+    :return: The corrected wiki page text.
+    """
+    
+    # Pattern to match and replace a heading and its surrounding whitespace:
+    # 1. (.*?)           : Group 1: Non-greedy capture of all content before the heading.
+    # 2. [\r\n\s]* : Non-capturing group for all existing whitespace/newlines before the heading.
+    # 3. (^={2,}.*?={2,}$) : Group 2: The actual heading line, anchored to the start of a line (re.M).
+    # 4. [\n\s]* : Non-capturing group for all existing whitespace/newlines after the heading.
+    
+    # We use re.M (multiline) and re.DOTALL (dot matches newline)
+    heading_and_surroundings_pattern = re.compile(
+        r'(.*?)[\r\n\s]*(^={2,}.*?={2,}$)[\r\n\s]*', re.M | re.DOTALL
+    )
+
+    def heading_replacer_full_format(match):
+        """
+        Callback function for re.sub that fixes spacing and enforces \n\n separation.
+        """
+        # Group 1: Content preceding the heading
+        content_before = match.group(1).rstrip()
+        # Group 2: The raw heading line
+        raw_heading = match.group(2)
+        
+        # 1. Fix the internal spacing of the heading
+        corrected_heading = fix_section_title_spacing_internal(raw_heading)
+        
+        # 2. Determine the prefix separator: \n\n
+        # If the heading is the first thing on the page (i.e., content_before is empty),
+        # we don't want to prepend \n\n. Otherwise, we do.
+        if content_before:
+            prefix = '\n\n'
+        else:
+            prefix = ''
+        
+        # 3. The replacement structure:
+        # {Content Before}{Prefix}\n{Corrected Heading}\n\n
+        # The content that follows this match will immediately follow the final \n\n.
+        return f'{content_before}{prefix}{corrected_heading}\n\n'
+
+    # Apply the fix globally
+    corrected_text = heading_and_surroundings_pattern.sub(
+        heading_replacer_full_format, 
+        wiki_text
+    )
+    
+    # Clean up any residual excess newlines at the very beginning of the page
+    return corrected_text.lstrip('\n')
+
+# Aggiunto per permettere l'esecuzione del main
+if __name__ == '__main__':
+    
+    # --- Dati di Test ---
+    # Contiene vari casi di spaziatura non corretta per le sezioni:
+    # 1. Spazi interni errati (sia troppi che mancanti).
+    # 2. Spazi esterni errati (troppe newline o nessuna newline).
+    # 3. Intestazione all'inizio della pagina (non deve avere \n\n prima).
+    # 4. Contenuto in mezzo.
+    
+    test_wikitext = """
+
+== Ciao ==
+
+ciao
+== Ciao ==
+ciao
+== Ciao ==
+
+ciao
+"""
+
+    print("--- Test della funzione fix_wiki_page_spacing ---")
+    print("Testo Wiki Originale:\n" + "-"*30)
+    print(test_wikitext)
+    print("-" * 30)
+
+    # Esecuzione della funzione
+    corrected_wikitext = fix_wiki_page_spacing(test_wikitext)
+
+    print("\nTesto Wiki Corretto:\n" + "="*30)
+    
+    # Usiamo repr() per mostrare chiaramente tutte le newline (\n) e gli spazi
+    print(corrected_wikitext)
+    print("=" * 30)
+    
\ No newline at end of file

From 9b45ccbbb6111ba7124d834cc306f4353576191f Mon Sep 17 00:00:00 2001
From: super-nabla <supernabla@outlook.it>
Date: Sun, 14 Dec 2025 22:48:08 +0100
Subject: [PATCH 4/8] fix spacing

---
 .../wikitranslator.py                         | 12 ++---
 .../wikitranslator_utils.py                   | 52 +++++++++++++++++--
 2 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/translatable_wikitext_converter/wikitranslator.py b/translatable_wikitext_converter/wikitranslator.py
index 80e57e7..e75533d 100644
--- a/translatable_wikitext_converter/wikitranslator.py
+++ b/translatable_wikitext_converter/wikitranslator.py
@@ -724,15 +724,15 @@ def convert_to_translatable_wikitext(wikitext):
     processed_parts = [handler(part) for part, handler in _parts]            
     
     # Debug output
-    """
+    #"""
     print("Processed parts:")
     for i, (ppart, (part, handler)) in enumerate(zip(processed_parts, _parts)):
         print(f"--- Start element {i} with handler {handler.__name__} ---")
-        print(part)
+        print(f"@{part}@")
         print(f"---\n") 
-        print(ppart)  
+        print(f'@{ppart}@')  
         print(f"---\n") 
-    """
+    #"""
     
-    # Join the processed parts into a single string
-    return ''.join(processed_parts)[1:]  # Remove the leading newline added at the beginning
\ No newline at end of file
+    # Join the processed parts into a single string and remove extra leading newline
+    return ''.join(processed_parts).lstrip('\n')
\ No newline at end of file
diff --git a/translatable_wikitext_converter/wikitranslator_utils.py b/translatable_wikitext_converter/wikitranslator_utils.py
index fc70c96..37860d8 100644
--- a/translatable_wikitext_converter/wikitranslator_utils.py
+++ b/translatable_wikitext_converter/wikitranslator_utils.py
@@ -6,6 +6,9 @@
 
 import re, sys
 
+# Pattern to identify section headers (Level 2 or higher)
+SECTION_HEADER_PATTERN = re.compile(r'(={2,})\s*(.+?)\s*\1', re.DOTALL)
+
 # --- Helper Functions for Processing Different Wikitext Elements ---
 # These functions are designed to handle specific wikitext structures.
 # Some will recursively call the main `convert_to_translatable_wikitext`
@@ -40,14 +43,18 @@ def is_emoji_unicode(char):
 
 def _wrap_in_translate(text):
     """
-    Wraps the given text with <translate> tags, preserving leading/trailing whitespace.
+    Wraps the text with <translate> tags.
+    If the content starts or ends with a section header, it includes the preceding
+    or succeeding newline in the translation block.
     """
     if not text or not text.strip():
         return text
 
-    # Logic for finding non-whitespace content (as defined in your current code)
+    # 1. Find the indices of the non-whitespace content
     first_char_index = -1
     last_char_index = -1
+    
+    # We loop to find the first/last character that is NOT whitespace
     for i, char in enumerate(text):
         if char not in (' ', '\n', '\t', '\r', '\f', '\v'):
             if first_char_index == -1:
@@ -55,14 +62,51 @@ def _wrap_in_translate(text):
             last_char_index = i
 
     if first_char_index == -1:
+        # If no non-whitespace characters are found, return the original text
         return text
 
+    # Initial split
     leading_whitespace = text[:first_char_index]
     content = text[first_char_index : last_char_index + 1]
     trailing_whitespace = text[last_char_index + 1 :]
+    
+    # 2. Initial adjustment (To include the newline above the header)
+    
+    # We check if the content starts with a section header
+    # (We use .match() on content to see if the header is at the very beginning)
+    match_start = SECTION_HEADER_PATTERN.match(content)
+    
+    if match_start and leading_whitespace.endswith('\n'):
+        # If there is a header and the line above is a '\n', we move the '\n' from leading to content
+        
+        # We subtract the '\n' from leading_whitespace
+        leading_whitespace = leading_whitespace[:-1] 
+        
+        # We recalculate content to include the preceding '\n'
+        content = text[first_char_index - 1 : last_char_index + 1]
+        
+        # We update first_char_index for subsequent calculations (even if not used here)
+        first_char_index -= 1
 
-    return f"{leading_whitespace}<translate>{content}</translate>{trailing_whitespace}"
 
+    # 3. Final adjustment (To include the newline below the header)
+
+    # We find the last match (to see if the header finishes the content block)
+    last_match = None
+    for m in SECTION_HEADER_PATTERN.finditer(content):
+        last_match = m
+        
+    if last_match and last_match.end() == len(content) and trailing_whitespace.startswith('\n'):
+        # If the header is the last thing and the subsequent block starts with '\n', we include it
+        
+        # We remove the '\n' from trailing_whitespace
+        trailing_whitespace = trailing_whitespace[1:]
+        
+        # We extend content to include the subsequent '\n'
+        content = text[first_char_index : last_char_index + 2] # +2 because index is 0-based
+
+    # 4. Returning the result
+    return f"{leading_whitespace}<translate>{content}</translate>{trailing_whitespace}"
 
 ############################################
 # Functions for Fixing Wiki Page Spacing #
@@ -74,7 +118,7 @@ def fix_section_title_spacing_internal(title: str) -> str:
     between the '=' characters and the title text.
     """
     # Pattern: (={2,}) [optional space] (.+?) [optional space] \1
-    pattern = re.compile(r'(={2,})\s*(.+?)\s*\1', re.DOTALL)
+    pattern = SECTION_HEADER_PATTERN
 
     # Replacement: \1 [space] \2 [space] \1
     return pattern.sub(r'\1 \2 \1', title)

From 1ff39b3435f76c50b6590ba3ccd7e85461016b5c Mon Sep 17 00:00:00 2001
From: super-nabla <supernabla@outlook.it>
Date: Sun, 14 Dec 2025 23:14:18 +0100
Subject: [PATCH 5/8] fix tests

---
 translatable_wikitext_converter/tests.py          | 10 +++++-----
 translatable_wikitext_converter/wikitranslator.py | 11 +++++++++--
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/translatable_wikitext_converter/tests.py b/translatable_wikitext_converter/tests.py
index b7d079a..32b3fcb 100644
--- a/translatable_wikitext_converter/tests.py
+++ b/translatable_wikitext_converter/tests.py
@@ -7,7 +7,7 @@ class TestTranslatableWikitext(unittest.TestCase):
     def test_section_headers(self):
         self.assertEqual(
             convert_to_translatable_wikitext("==HELLO=="),
-            "<translate>== HELLO ==</translate>"  # Removed the \n\n that was expected
+            """<translate>\n== HELLO ==\n</translate>""" 
         )
 
     def test_file_tag_translations(self):
@@ -47,7 +47,7 @@ def test_simple_internal_link(self):
     def test_multiline_text(self):
         self.assertEqual(
             convert_to_translatable_wikitext('\nhi iam charan\n<br>\nhappy\n\n'),
-            '<translate>hi iam charan</translate>\n<br>\n<translate>happy</translate>\n\n' 
+            '<translate>hi iam charan</translate>\n<br>\n<translate>happy</translate>' 
         )
     
     def test_double_namespace_processing(self):
@@ -174,19 +174,19 @@ def test_empty_string_input(self):
     def test_whitespace_only_input(self):
         self.assertEqual(
             convert_to_translatable_wikitext("   \n\t "),
-            "   \n\t "
+            "\t"
         )
 
     def test_list_items(self):
         self.assertEqual(
             convert_to_translatable_wikitext("* Item 1\n** Sub-item 1.1\n* Item 2"),
-            "* <translate>Item 1</translate>\n** <translate>Sub-item 1.1</translate>\n* <translate>Item 2</translate>\n"
+            "* <translate>Item 1</translate>\n** <translate>Sub-item 1.1</translate>\n* <translate>Item 2</translate>"
         )
 
     def test_definition_list(self):
         self.assertEqual(
             convert_to_translatable_wikitext(";Term\n:Definition\n:Description"),
-            "; <translate>Term</translate>\n: <translate>Definition</translate>\n: <translate>Description</translate>\n"
+            "; <translate>Term</translate>\n: <translate>Definition</translate>\n: <translate>Description</translate>"
         )
 
 if __name__ == '__main__':
diff --git a/translatable_wikitext_converter/wikitranslator.py b/translatable_wikitext_converter/wikitranslator.py
index e75533d..002c316 100644
--- a/translatable_wikitext_converter/wikitranslator.py
+++ b/translatable_wikitext_converter/wikitranslator.py
@@ -734,5 +734,12 @@ def convert_to_translatable_wikitext(wikitext):
         print(f"---\n") 
     #"""
     
-    # Join the processed parts into a single string and remove extra leading newline
-    return ''.join(processed_parts).lstrip('\n')
\ No newline at end of file
+    # Join the processed parts into a single string
+    out_wikitext =  ''.join(processed_parts)
+    
+    # Keep removing all trailing and leading newlines and spaces
+    while out_wikitext.startswith('\n') or out_wikitext.startswith(' ') or out_wikitext.endswith('\n') or out_wikitext.endswith(' '):
+        out_wikitext = out_wikitext.strip('\n')
+        out_wikitext = out_wikitext.strip(' ')
+    
+    return out_wikitext
\ No newline at end of file

From 304f6a893e607a18c3be5121dddb4b1485d9d7f9 Mon Sep 17 00:00:00 2001
From: super-nabla <supernabla@outlook.it>
Date: Mon, 15 Dec 2025 01:32:28 +0100
Subject: [PATCH 6/8] update wikilink logic

---
 translatable_wikitext_converter/app.py        |   8 +-
 translatable_wikitext_converter/tests.py      | 119 +++++--
 .../wikitranslator.py                         | 328 +++++++++++++++---
 3 files changed, 371 insertions(+), 84 deletions(-)

diff --git a/translatable_wikitext_converter/app.py b/translatable_wikitext_converter/app.py
index 357aee5..5ed33e1 100644
--- a/translatable_wikitext_converter/app.py
+++ b/translatable_wikitext_converter/app.py
@@ -1,7 +1,7 @@
 from flask import Flask, request, render_template, jsonify
 from flask_cors import CORS  # Import flask-cors
 
-from .wikitranslator import convert_to_translatable_wikitext
+from .wikitranslator import tag_for_translation
 
 app = Flask(__name__)
 CORS(app)  # Enable CORS for all routes
@@ -17,8 +17,8 @@ def redirect_to_home():
 @app.route('/convert', methods=['POST'])
 def convert():
     wikitext = request.form.get('wikitext', '')
-    converted_text = convert_to_translatable_wikitext(wikitext)
-    return render_template('home.html', original=wikitext, converted=converted_text)
+    tagged = tag_for_translation(wikitext)
+    return render_template('home.html', original=wikitext, converted=tagged)
 
 @app.route('/api/convert', methods=['GET', 'POST'])
 def api_convert():
@@ -47,4 +47,4 @@ def api_convert():
         })
 
 if __name__ == '__main__':
-    app.run(debug=True)
+    app.run(debug=True, port=5001)
diff --git a/translatable_wikitext_converter/tests.py b/translatable_wikitext_converter/tests.py
index 32b3fcb..00226cb 100644
--- a/translatable_wikitext_converter/tests.py
+++ b/translatable_wikitext_converter/tests.py
@@ -1,18 +1,18 @@
 import unittest
 
-from translatable_wikitext_converter.app import convert_to_translatable_wikitext
+from translatable_wikitext_converter.app import tag_for_translation
 
 class TestTranslatableWikitext(unittest.TestCase):
 
     def test_section_headers(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("==HELLO=="),
+            tag_for_translation("==HELLO=="),
             """<translate>\n== HELLO ==\n</translate>""" 
         )
 
     def test_file_tag_translations(self):
         self.assertEqual(
-            convert_to_translatable_wikitext(
+            tag_for_translation(
                 '[[File:landscape.jpg |thumb |left | alt=sunset |Photo of a beautiful landscape]]'
             ),
             '[[File:landscape.jpg|thumb|{{dirstart}}|alt=<translate>sunset</translate>|<translate>Photo of a beautiful landscape</translate>]]'
@@ -20,174 +20,219 @@ def test_file_tag_translations(self):
 
     def test_internal_and_external_links(self):
         self.assertEqual(
-            convert_to_translatable_wikitext(
+            tag_for_translation(
                 'This is a text with an [[internal link]] and an [https://openstreetmap.org external link].'
             ),
-            '<translate>This is a text with an [[<tvar name=0>Special:MyLanguage/Internal link</tvar>|internal link]] and an [<tvar name=url0>https://openstreetmap.org</tvar> external link].</translate>'
+            '<translate>This is a text with an [[<tvar name="1">Special:MyLanguage/Internal link</tvar>|internal link]] and an [<tvar name="2">https://openstreetmap.org</tvar> external link].</translate>'
         )
     
     def test_category_with_translation(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("[[Category:Wikipedia]]"),
+            tag_for_translation("[[Category:Wikipedia]]"),
             "[[Category:Wikipedia{{#translation:}}]]"
         )
     
     def test_notoc_preserved(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("__NOTOC__"),
+            tag_for_translation("__NOTOC__"),
             "__NOTOC__"
         )
     
     def test_simple_internal_link(self):
         self.assertEqual(
-            convert_to_translatable_wikitext('[[link]]'),
-            '<translate>[[<tvar name=0>Special:MyLanguage/Link</tvar>|link]]</translate>'
+            tag_for_translation('[[link]]'),
+            '<translate>[[<tvar name="1">Special:MyLanguage/Link</tvar>|link]]</translate>'
         )
     
     def test_multiline_text(self):
         self.assertEqual(
-            convert_to_translatable_wikitext('\nhi iam charan\n<br>\nhappy\n\n'),
+            tag_for_translation('\nhi iam charan\n<br>\nhappy\n\n'),
             '<translate>hi iam charan</translate>\n<br>\n<translate>happy</translate>' 
         )
     
     def test_double_namespace_processing(self):
         self.assertEqual(
-            convert_to_translatable_wikitext(
+            tag_for_translation(
                 '[[File:pretty hello word.png | alt=Hello everybody!]] [[File:smiley.png|alt=🙂]] How are you?'
             ),
-            '[[File:pretty hello word.png|alt=<translate>Hello everybody!</translate>]] <translate><tvar name=icon0>[[File:smiley.png|alt=🙂]]</tvar> How are you?</translate>'
+            '[[File:pretty hello word.png|alt=<translate>Hello everybody!</translate>]] <translate><tvar name="1">[[File:smiley.png|alt=🙂]]</tvar> How are you?</translate>'
         )
     
     def test_double_namespace_without_list_case_1(self):
         self.assertEqual(
-            convert_to_translatable_wikitext(
+            tag_for_translation(
                 '[[Help]]ing'
             ),
-            '<translate>[[<tvar name=0>Special:MyLanguage/Help</tvar>|Help]]ing</translate>'
+            '<translate>[[<tvar name="1">Special:MyLanguage/Help</tvar>|Help]]ing</translate>'
         )
     
     def test_double_namespace_without_list_case_2(self):
         self.assertEqual(
-            convert_to_translatable_wikitext(
+            tag_for_translation(
                 '[[Help]] ing'
             ),
-            '<translate>[[<tvar name=0>Special:MyLanguage/Help</tvar>|Help]] ing</translate>'
+            '<translate>[[<tvar name="1">Special:MyLanguage/Help</tvar>|Help]] ing</translate>'
         )
 
     def test_template_simple(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("{{Template Name}}"),
+            tag_for_translation("{{Template Name}}"),
             "{{Template Name}}"
         )
 
     def test_template_with_parameters(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("{{Template|param1=Value 1|Value 2}}"),
+            tag_for_translation("{{Template|param1=Value 1|Value 2}}"),
             "{{Template|param1=Value 1|Value 2}}"
         )
 
     def test_template_nested_in_text(self):
         self.assertEqual(
-            convert_to_translatable_wikitext('Some text with {{a template here}} and more text.'),
+            tag_for_translation('Some text with {{a template here}} and more text.'),
             '<translate>Some text with</translate> {{A template here}} <translate>and more text.</translate>'
         )
 
     def test_nowiki_tag(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("Some text with <nowiki>[[Raw link]]</nowiki> content."),
+            tag_for_translation("Some text with <nowiki>[[Raw link]]</nowiki> content."),
             "<translate>Some text with</translate> <nowiki><translate>[[Raw link]]</translate></nowiki> <translate>content.</translate>"
         )
     
     def test_blockquote_tag(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("<blockquote>This is a quote.</blockquote>"),
+            tag_for_translation("<blockquote>This is a quote.</blockquote>"),
             "<blockquote><translate>This is a quote.</translate></blockquote>"
         )
 
     def test_poem_tag(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("<poem>Line 1\nLine 2</poem>"),
+            tag_for_translation("<poem>Line 1\nLine 2</poem>"),
             "<poem><translate>Line 1\nLine 2</translate></poem>"
         )
 
     def test_code_tag_with_tvar(self):
         # Assuming process_code_tag assigns tvar names sequentially starting from 0
         self.assertEqual(
-            convert_to_translatable_wikitext("Here is <code>some code</code> for you."),
-            "<translate>Here is <code><tvar name=code0>some code</tvar></code> for you.</translate>"
+            tag_for_translation("Here is <code>some code</code> for you."),
+            """<translate>Here is <code><tvar name="1">some code</tvar></code> for you.</translate>"""
         )
 
     def test_div_tag(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("<div>Div content here.</div>"),
+            tag_for_translation("<div>Div content here.</div>"),
             "<div><translate>Div content here.</translate></div>"
         )
 
     def test_hiero_tag(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("<hiero>hieroglyphics</hiero>"),
+            tag_for_translation("<hiero>hieroglyphics</hiero>"),
             "<hiero><translate>hieroglyphics</translate></hiero>"
         )
 
     def test_sub_sup_tags(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("H<sub>2</sub>O and E=mc<sup>2</sup>"),
+            tag_for_translation("H<sub>2</sub>O and E=mc<sup>2</sup>"),
             "<translate>H</translate><sub><translate>2</translate></sub><translate>O and E=mc</translate><sup><translate>2</translate></sup>"
         )
 
     def test_math_tag(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("<math>x^2 + y^2 = z^2</math>"),
+            tag_for_translation("<math>x^2 + y^2 = z^2</math>"),
             "<math>x^2 + y^2 = z^2</math>"
         )
 
     def test_small_tag(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("<small>Small text</small>"),
+            tag_for_translation("<small>Small text</small>"),
             "<small><translate>Small text</translate></small>"
         )
   
     def test_image_with_upright(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("[[File:Example.jpg|upright=1.5|A larger image]]"),
+            tag_for_translation("[[File:Example.jpg|upright=1.5|A larger image]]"),
             "[[File:Example.jpg|upright=1.5|<translate>A larger image</translate>]]"
         )
 
     def test_multiple_elements_in_one_line(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("Hello world! [[Link]] {{Template}} [https://meta.wikimedia.org/wiki/Main_Page Home]"),
-            '<translate>Hello world! [[<tvar name=0>Special:MyLanguage/Link</tvar>|Link]]</translate> {{Template}} <translate>[<tvar name=url0>https://meta.wikimedia.org/wiki/Main_Page</tvar> Home]</translate>'
+            tag_for_translation("Hello world! [[Link]] {{Template}} [https://meta.wikimedia.org/wiki/Main_Page Home]"),
+            '<translate>Hello world! [[<tvar name="1">Special:MyLanguage/Link</tvar>|Link]]</translate> {{Template}} <translate>[<tvar name="1">https://meta.wikimedia.org/wiki/Main_Page</tvar> Home]</translate>'
         )
 
     def test_text_around_br_tag(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("First line.<br>Second line."),
+            tag_for_translation("First line.<br>Second line."),
             "<translate>First line.</translate><br><translate>Second line.</translate>"
         )
 
     def test_empty_string_input(self):
         self.assertEqual(
-            convert_to_translatable_wikitext(""),
+            tag_for_translation(""),
             ""
         )
     
     def test_whitespace_only_input(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("   \n\t "),
+            tag_for_translation("   \n\t "),
             "\t"
         )
 
     def test_list_items(self):
         self.assertEqual(
-            convert_to_translatable_wikitext("* Item 1\n** Sub-item 1.1\n* Item 2"),
+            tag_for_translation("* Item 1\n** Sub-item 1.1\n* Item 2"),
             "* <translate>Item 1</translate>\n** <translate>Sub-item 1.1</translate>\n* <translate>Item 2</translate>"
         )
 
     def test_definition_list(self):
         self.assertEqual(
-            convert_to_translatable_wikitext(";Term\n:Definition\n:Description"),
+            tag_for_translation(";Term\n:Definition\n:Description"),
             "; <translate>Term</translate>\n: <translate>Definition</translate>\n: <translate>Description</translate>"
         )
+        
+    def test_standard_internal_link(self):
+        # Standard link without prefix or pipe. Should use Special:MyLanguage.
+        # Assumes tag_for_translation calls the logic that produces <tvar>...</tvar>
+        self.assertEqual(
+            tag_for_translation("[[Some Page]]"),
+            """<translate>[[<tvar name="1">Special:MyLanguage/Some Page</tvar>|Some Page]]</translate>"""
+        )
+
+    def test_internal_link_with_display_text(self):
+        # Standard link with display text. Should use Special:MyLanguage.
+        self.assertEqual(
+            tag_for_translation("[[About|Read more here]]"),
+            """<translate>[[<tvar name="1">Special:MyLanguage/About</tvar>|Read more here]]</translate>"""
+        )
+
+    def test_simple_language_prefix_no_pipe(self):
+        # Link starting with a simple language code (e.g., 'bn:'). Should NOT use Special:MyLanguage.
+        # Should auto-generate the display text without the prefix.
+        self.assertEqual(
+            tag_for_translation("[[:it:mozzarella]]"),
+            """<translate>[[<tvar name="1">:it:mozzarella</tvar>|mozzarella]]</translate>"""
+        )
+
+    def test_complex_interwiki_prefix(self):
+        # Link using a complex interwiki prefix (e.g., :bn:s: for Bengali Wikisource).
+        # This tests the segment parsing fix implemented. Should NOT use Special:MyLanguage.
+        self.assertEqual(
+            tag_for_translation("[[:bn:s:article Title]]"),
+            """<translate>[[<tvar name="1">:bn:s:article Title</tvar>|article Title]]</translate>"""
+        )
+
+    def test_simple_english_special_handling(self):
+        # Link with the 'en:' prefix, which has special handling using the {{lwp|...}} template.
+        self.assertEqual(
+            tag_for_translation("[[:en:kerala]]"),
+            """<translate>[[<tvar name="1">{{lwp|Kerala}}</tvar>|kerala]]</translate>"""
+        )
+        
+    def test_complex_english_special_handling(self):
+        # Link with the 'en:' prefix, which has special handling using the {{lwp|...}} template.
+        self.assertEqual(
+            tag_for_translation("[[:en:kerala|text]]"),
+            """<translate>[[<tvar name="1">{{lwp|Kerala}}</tvar>|text]]</translate>"""
+        )
 
 if __name__ == '__main__':
     unittest.main(exit=False, failfast=True)
diff --git a/translatable_wikitext_converter/wikitranslator.py b/translatable_wikitext_converter/wikitranslator.py
index 002c316..cc614b7 100644
--- a/translatable_wikitext_converter/wikitranslator.py
+++ b/translatable_wikitext_converter/wikitranslator.py
@@ -16,19 +16,7 @@ def process_syntax_highlight(text):
     It wraps the content in <translate> tags.
     """
     assert(text.startswith('<syntaxhighlight') and text.endswith('</syntaxhighlight>')), "Invalid syntax highlight tag"
-    # Get inside the <syntaxhighlight> tag
-    start_tag_end = text.find('>') + 1
-    end_tag_start = text.rfind('<')
-    if start_tag_end >= end_tag_start:
-        return text 
-    prefix = text[:start_tag_end]
-    content = text[start_tag_end:end_tag_start].strip()
-    suffix = text[end_tag_start:]
-    if not content:
-        return text
-    # Wrap the content in <translate> tags
-    wrapped_content = _wrap_in_translate(content)
-    return f"{prefix}{wrapped_content}{suffix}"
+    return "<tvar>" + text + "</tvar>"
 
 def process_table(text):
     """
@@ -76,7 +64,7 @@ def process_poem_tag(text):
     wrapped_content = _wrap_in_translate(content)
     return f"{prefix}{wrapped_content}{suffix}"
 
-def process_code_tag(text, tvar_code_id=0):
+def process_code_tag(text):
     """
     Processes <code> tags in the wikitext.
     It wraps the content in <translate> tags.
@@ -93,7 +81,7 @@ def process_code_tag(text, tvar_code_id=0):
     if not content:
         return text
     # Wrap the content in <translate> tags
-    wrapped_content = f'<tvar name=code{tvar_code_id}>{content}</tvar>'
+    wrapped_content = f'<tvar>{content}</tvar>'
     return f"{prefix}{wrapped_content}{suffix}"
 
 def process_div(text):
@@ -220,7 +208,7 @@ def process_item(text):
     item_content = text[offset:].strip()
     if not item_content:
         return text
-    return text[:offset] + ' ' + _wrap_in_translate(item_content) + '\n'
+    return text[:offset] + ' ' + convert_to_translatable_wikitext(item_content) + '\n'
 
 class double_brackets_types(Enum):
     wikilink = 1
@@ -229,8 +217,8 @@ class double_brackets_types(Enum):
     not_inline_icon_file = 4
     special = 5
     invalid_file = 6
-
-def _process_file(s, tvar_inline_icon_id=0): 
+    
+def _process_file(s): 
     # Define keywords that should NOT be translated when found as parameters
     NON_TRANSLATABLE_KEYWORDS = {
         'left', 'right', 'centre', 'center', 'thumb', 'frameless', 'border', 'none', 
@@ -291,7 +279,7 @@ def _process_file(s, tvar_inline_icon_id=0):
         
     if is_inline_icon:
         # return something like: <tvar name="icon">[[File:smiley.png|alt=🙂]]</tvar>
-        returnline = f'<tvar name=icon{tvar_inline_icon_id}>[[' + '|'.join(tokens) + ']]</tvar>'
+        returnline = f'<tvar>[[' + '|'.join(tokens) + ']]</tvar>'
         return returnline, double_brackets_types.inline_icon
     
     ############################
@@ -327,7 +315,7 @@ def _process_file(s, tvar_inline_icon_id=0):
     returnline = '[[' + '|'.join(output_parts) + ']]' 
     return returnline, double_brackets_types.not_inline_icon_file
     
-def process_double_brackets(text, tvar_id=0):
+def process_double_brackets(text):
     """
     Processes internal links in the wikitext.
     It wraps the content in <translate> tags.
@@ -344,7 +332,9 @@ def process_double_brackets(text, tvar_id=0):
     category_aliases = ['Category:', 'category:', 'Cat:', 'cat:']
     file_aliases = ['File:', 'file:', 'Image:', 'image:']
     
-    parts[0] = parts[0].strip()  # Clean up the first part
+    # strip all parts
+    parts = [part.strip() for part in parts]
+    
     # Check if the first part is a category or file alias
     if parts[0].startswith(tuple(category_aliases)):
         # Handle category links
@@ -357,14 +347,174 @@ def process_double_brackets(text, tvar_id=0):
         # Handle special pages
         return f'[[{parts[0]}]]', double_brackets_types.special
     
-    # Assuming it's a regular internal link
+    #############################
+    # Managing wikilinks
+    #############################
+
+    # List of recognised prefixes for Wikimedia projects (e.g., wikipedia, commons)
+    # and local/national chapters (e.g., wmde, wmit).
+    interwiki_prefixes = [
+        # Main Projects
+        "wikipedia", "w",
+        "wiktionary", "wikt",
+        "wikinews", "n",
+        "wikibooks", "b",
+        "wikiquote", "q",
+        "wikisource", "s",
+        "oldwikisource", "s:mul",
+        "wikispecies", "species",
+        "wikiversity", "v",
+        "wikivoyage", "voy",
+        "wikimedia", "foundation", "wmf",
+        "commons", "c",
+        "metawiki", "metawikimedia", "metawikipedia", "meta", "m",
+        "incubator",
+        "strategy",
+        "mediawikiwiki", "mw",
+        "mediazilla", "bugzilla",
+        "phabricator", "phab",
+        "testwiki",
+        "wikidata", "d",
+        "wikifunctions", "f",
+        "wikitech",
+        "toolforge",
+
+        # National Chapters
+        "wmar", "wmau", "wmbd", "wmbe", "wmbr", "wmca", "wmcz", "wmdk",
+        "wmde", "wmfi", "wmhk", "wmhu", "wmin", "wmid", "wmil", "wmit",
+        "wmnl", "wmmk", "wmno", "wmpl", "wmru", "wmrs", "wmes", "wmse",
+        "wmch", "wmtw", "wmua", "wmuk",
+
+        # Other Wikimedia Prefixes
+        "betawikiversity", "v:mul",
+        "download", "dbdump", "gerrit", "mail", "mailarchive",
+        "outreach", "otrs", "OTRSwiki", "quality", "spcom",
+        "ticket", "tools", "tswiki", "svn", "sulutil",
+        "rev", "wmania", "wm2016", "wm2017"
+    ]
+    # Convert the list to a set for efficient lookup/checking.
+    interwiki_prefixes_set = set(interwiki_prefixes)
+    # Regex to identify if the link starts with a language code (e.g., 'it:', 'bn:').
+    LANGUAGE_CODE_PATTERN = re.compile(r'^[a-z]{2,3}:')
+
+    # Determine the link target (before the pipe) and the display text (after the pipe).
+    link_title = parts[0]
+    # If a pipe is present, use the part after it; otherwise, use the link target itself.
+    display_text = parts[1] if len(parts) > 1 else parts[0]
+
+    # --- 1. Checking for Project/Chapter/Interwiki Prefixes ---
+
+    # We try to extract the prefix (e.g. ":bn:" from ":bn:Page")
+    first_part_lower = link_title.lower()
+
+    has_known_prefix = False
+
+    # A. Check 1: Simple Language Code Match (e.g., ":it:", ":bn:")
+    # This covers the explicit requirement: "se inizia con un codice linguistico e i due punti..."
+    if LANGUAGE_CODE_PATTERN.match(first_part_lower):
+        has_known_prefix = True
+
+    # B. Check 2: Complex Prefix Parsing (Covers "w:", "commons:", "wmde:", or combined forms)
+    elif ':' in first_part_lower:
+        # Split the link by colon, excluding the last part which is the page title.
+        # Example: ":bn:s:Page" -> segments: ['','bn','s']
+        # Example: ":w:de:Page" -> segments: ['', 'w','de']
+        # Example: ":commons:File" -> segments: ['', 'commons']
+        
+        segments = first_part_lower.split(':')
+        
+        # We look at all segments except the last one (which is the actual page title).
+        # We stop the search if the last segment (the title) is empty, which happens for links ending in a colon.
+        # e.g., 'w:' splits to ['w', ''] -> we check 'w'.
+        limit = len(segments) - 1
+        if segments[-1] == '':
+            limit = len(segments) - 2
+        
+        # Iterate through all prefix segments
+        for segment in segments[:limit]:
+            # The empty string segment resulting from a leading colon (e.g., ':w:de:Page' -> first segment is '') is ignored.
+            if segment:
+                # Check if the segment is a known project/chapter prefix.
+                if segment in interwiki_prefixes_set:
+                    has_known_prefix = True
+                    break # Stop checking once any known prefix is found
+
+                # Check if the segment is a language code (e.g., 'de' in 'w:de:Page').
+                # We can't use the regex pattern here as it checks for start-of-string.
+                # A quick check for typical language code length (2 or 3 chars) is used as a proxy, 
+                # although a full language code check would be more robust.
+                if 2 <= len(segment) <= 3: 
+                    # Assuming a 2/3 letter segment that isn't a known prefix is treated as a language code
+                    # for the purpose of avoiding Special:MyLanguage.
+                    has_known_prefix = True
+                    break
+            
+    # If the link is complex (multiple colons) or contains a known prefix, 
+    # then it is an interwiki link and should not be routed through Special:MyLanguage.
+    # The check below remains the same, but 'has_known_prefix' is now robustly set.
+
+    if has_known_prefix or ':' in link_title:
+        # If it has a prefix (linguistic or project/chapter), DO NOT use Special:MyLanguage.
+
+        # --- 2. Special handling for the ":en:" prefix ---
+        if first_part_lower.startswith(':en:'):
+            # For links starting with ':en:', rewrite using the {{lwp|...}} template.
+            
+            # The suffix is the page title *without* the ":en:" prefix.
+            en_suffix = link_title[4:] # Removes ":en:"
+            capitalised_en_suffix = capitalise_first_letter(en_suffix)
+            # Case 1: No pipe (e.g., "[[en:About]]")
+            if len(parts) == 1:
+                # Target: {{lwp|About}}. Display text: About (en_suffix).
+                return f'[[<tvar>{{{{lwp|{capitalised_en_suffix}}}}}</tvar>|{en_suffix}]]', double_brackets_types.wikilink
+
+            # Case 2: With pipe (e.g., "[[en:About|Read More]]")
+            if len(parts) == 2:
+                # Target: {{lwp|About}}. Display text: Read More (display_text).
+                return f'[[<tvar>{{{{lwp|{capitalised_en_suffix}}}}}</tvar>|{display_text}]]', double_brackets_types.wikilink
+
+        # --- 3. Handling all other interwiki/prefixed links (e.g., ":it:", "w:", "wmde:") ---
+
+        # Find the index of the *last* colon to correctly separate the page title
+        # from the potentially complex prefix (e.g., extract 'Page' from 'bn:Page').
+        if link_title.rfind(':') != -1:
+            # Extract the page title by finding the content after the final colon.
+            title_without_prefix = link_title[link_title.rfind(':') + 1:]
+        else:
+            # Should not happen for prefixed links, but handles the fallback gracefully.
+            title_without_prefix = link_title
+
+        # Case 1: No pipe (e.g., "[[bn:Page]]" or "[[w:Page]]")
+        if len(parts) == 1:
+            # Link target remains link_title (e.g., bn:Page). 
+            # Display text is the title *without* the prefix (e.g., Page).
+            return f'[[<tvar>{link_title}</tvar>|{title_without_prefix}]]', double_brackets_types.wikilink
+
+        # Case 2: With pipe (e.g., "[[bn:Page|Text]]")
+        if len(parts) == 2:
+            # Link target remains link_title (e.g., bn:Page). 
+            # Display text is the text after the pipe (e.g., Text).
+            return f'[[<tvar>{link_title}</tvar>|{display_text}]]', double_brackets_types.wikilink
+
+    # --- 4. Standard internal links (No special prefix found) ---
+
+    # For standard internal links, the target must be prefixed with Special:MyLanguage
+    # to enable automatic localisation. 'capitalise_first_letter' is required here.
+    
+    # Case 1: No pipe (e.g., [[Page]])
     if len(parts) == 1:
-        return f'[[<tvar name={tvar_id}>Special:MyLanguage/{capitalise_first_letter(parts[0])}</tvar>|{parts[0]}]]', double_brackets_types.wikilink
-    if len(parts) == 2 :
-        return f'[[<tvar name={tvar_id}>Special:MyLanguage/{capitalise_first_letter(parts[0])}</tvar>|{parts[1]}]]', double_brackets_types.wikilink
+        # Target: Special:MyLanguage/Page. Display text: Page (link_title).
+        return f'[[<tvar>Special:MyLanguage/{capitalise_first_letter(link_title)}</tvar>|{link_title}]]', double_brackets_types.wikilink
+
+    # Case 2: With pipe (e.g., [[Page|Text]])
+    if len(parts) == 2:
+        # Target: Special:MyLanguage/Page. Display text: Text (display_text).
+        return f'[[<tvar>Special:MyLanguage/{capitalise_first_letter(link_title)}</tvar>|{display_text}]]', double_brackets_types.wikilink
+
+    # Fallback for unexpected link format (e.g., more than one pipe).
     return text
 
-def process_external_link(text, tvar_url_id=0):
+def process_external_link(text):
     """
     Processes external links in the format [http://example.com Description] and ensures
     that only the description part is wrapped in <translate> tags, leaving the URL untouched.
@@ -375,7 +525,7 @@ def process_external_link(text, tvar_url_id=0):
         url_part = match.group(1)
         description_part = match.group(2)
         # Wrap only the description part in <translate> tags, leave the URL untouched
-        return f'[<tvar name=url{tvar_url_id}>{url_part}</tvar> {description_part}]'
+        return f'[<tvar>{url_part}</tvar> {description_part}]'
     return text
 
 def process_template(text):
@@ -406,6 +556,9 @@ def process_raw_url(text):
         return text
     return text.strip()
 
+def tag_for_translation(text):
+    converted_text = convert_to_translatable_wikitext(text)
+    return set_tvar_names(converted_text)
 
 # --- Main Tokenisation Logic ---
 
@@ -439,7 +592,7 @@ def convert_to_translatable_wikitext(wikitext):
             if last < curr:
                 parts.append((wikitext[last:curr], _wrap_in_translate))
             parts.append((wikitext[curr:end_pattern], process_syntax_highlight))
-            curr = end_pos
+            curr = end_pattern
             last = curr
             continue 
         # Table block
@@ -674,37 +827,33 @@ def convert_to_translatable_wikitext(wikitext):
     """
     
     # Process links
-    tvar_id = 0
-    tvar_url_id = 0
-    tvar_code_id = 0
-    tvar_inline_icon_id = 0
     for i, (part, handler) in enumerate(parts):
         # Handlers for links require a tvar_id
         if handler == process_double_brackets:
-            new_part, double_brackets_type = handler(part, tvar_id)
+            new_part, double_brackets_type = handler(part)
             if double_brackets_type in [double_brackets_types.wikilink, double_brackets_types.special, double_brackets_types.inline_icon]:
                 new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
             else :
                 new_handler = lambda x: x  # No further processing for categories and files
             parts[i] = (new_part, new_handler)
-            tvar_id += 1
         elif handler == process_external_link:
-            new_part = handler(part, tvar_url_id)
+            new_part = handler(part)
             new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
             parts[i] = (new_part, new_handler)
-            tvar_url_id += 1
         elif handler == process_code_tag:
-            new_part = handler(part, tvar_code_id)
+            new_part = handler(part)
             new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
             parts[i] = (new_part, new_handler)
-            tvar_code_id += 1
         elif handler == process_double_brackets :
-            new_part, double_brackets_type = handler(part, tvar_inline_icon_id)
+            new_part, double_brackets_type = handler(part)
             if double_brackets_type == double_brackets_types.inline_icon:
                 new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
-                tvar_inline_icon_id += 1
             else:
                 new_handler = lambda x: x
+        elif handler == process_syntax_highlight :
+            new_part = handler(part)
+            new_handler = _wrap_in_translate  # Change handler to _wrap_in_translate
+            parts[i] = (new_part, new_handler)
             
     # Scan again the parts: merge consecutive parts handled by _wrap_in_translate
     _parts = []
@@ -724,7 +873,7 @@ def convert_to_translatable_wikitext(wikitext):
     processed_parts = [handler(part) for part, handler in _parts]            
     
     # Debug output
-    #"""
+    """
     print("Processed parts:")
     for i, (ppart, (part, handler)) in enumerate(zip(processed_parts, _parts)):
         print(f"--- Start element {i} with handler {handler.__name__} ---")
@@ -732,7 +881,7 @@ def convert_to_translatable_wikitext(wikitext):
         print(f"---\n") 
         print(f'@{ppart}@')  
         print(f"---\n") 
-    #"""
+    """
     
     # Join the processed parts into a single string
     out_wikitext =  ''.join(processed_parts)
@@ -742,4 +891,97 @@ def convert_to_translatable_wikitext(wikitext):
         out_wikitext = out_wikitext.strip('\n')
         out_wikitext = out_wikitext.strip(' ')
     
-    return out_wikitext
\ No newline at end of file
+    return out_wikitext
+
+def set_tvar_names(input_text: str) -> str:
+    """
+    Sets the 'name' attribute of every <tvar> tag inside a <translate> block,
+    using an increasing counter (starting from 1) for each <translate> block.
+
+    This version assumes <tvar> tags are initially simple, e.g., <tvar> or <tvar/>.
+
+    Args:
+        input_text: The input string containing <translate> and <tvar> tags.
+
+    Returns:
+        The modified string with the 'name' attributes set.
+    """
+
+    # 1. Regular expression to find all <translate> blocks, including content.
+    # We use re.DOTALL to ensure the match spans multiple lines.
+    translate_pattern = re.compile(r'(<translate>.*?<\/translate>)', re.DOTALL)
+
+    def process_translate_block(full_block_match):
+        """
+        Callback function for re.sub that processes one <translate> block.
+        It finds all simple <tvar> tags inside and gives them an incremental 'name' attribute.
+        """
+        # The entire matched <translate> block
+        full_block = full_block_match.group(0)
+        
+        # Initialise the counter for the current block
+        count = 1
+        
+        def substitute_simple_tvar(tvar_match):
+            """
+            Inner callback function to substitute a simple <tvar> and increment the counter.
+            """
+            nonlocal count
+            
+            # The match group 1 captures the opening tag parts: '<tvar'
+            opening_part = tvar_match.group(1)
+            
+            # Construct the modified tag: insert name="count" before the closing bracket
+            # We assume a simple structure like <tvar> becomes <tvar name="1">
+            # or <tvar/> becomes <tvar name="1"/>
+            
+            # This expression handles both <tvar> and <tvar/> by replacing the final '>' or '/>'
+            # with the insertion plus the captured closing part (group 2).
+            name_attribute = f' name="{count}"'
+            
+            # Group 2 captures the closing element (either '>' or '/>')
+            closing_part = tvar_match.group(2)
+            
+            new_tag = f'{opening_part}{name_attribute}{closing_part}'
+            
+            # Increment the counter for the next <tvar>
+            count += 1
+            
+            return new_tag
+            
+        # Internal pattern: finds <tvar> or <tvar/> where 'name' is not present.
+        # This is a robust pattern for HTML/XML tags where an attribute is to be inserted
+        # right before the closing bracket.
+        
+        # Group 1: (<tvar\s*) - The opening tag up to the first space/end
+        # Group 2: (/?\s*>) - The closing angle bracket (possibly with / for self-closing)
+        # We need to ensure we don't accidentally match existing 'name' attributes.
+        
+        # Simpler pattern for *all* <tvar> tags, assuming no existing name:
+        tvar_pattern_inner = re.compile(r'(<tvar\s*)(/?\s*>)', re.DOTALL)
+
+        # To strictly avoid tags that *already* contain 'name':
+        # We use a negative lookahead to ensure "name=" is not present inside <tvar...>
+        # This pattern is more complex but safer:
+        tvar_pattern_safer = re.compile(r'(<tvar(?![^>]*name=)[^>]*)(>)', re.IGNORECASE | re.DOTALL)
+        
+        # We will utilise the simpler pattern, assuming the context is pre-processing before translation:
+        tvar_pattern_to_use = re.compile(r'(<tvar\s*)(/?\s*>)', re.DOTALL)
+
+        # Apply the substitution to all <tvar> tags within the current block
+        modified_block = re.sub(
+            tvar_pattern_to_use,
+            substitute_simple_tvar,
+            full_block
+        )
+        
+        return modified_block
+        
+    # 2. Apply the block processor function to all <translate> blocks.
+    final_result = re.sub(
+        translate_pattern,
+        process_translate_block,
+        input_text
+    )
+    
+    return final_result
\ No newline at end of file

From cf1dc5f8b8639501125b7e0c76fb13a8aeef22df Mon Sep 17 00:00:00 2001
From: super-nabla <supernabla@outlook.it>
Date: Mon, 15 Dec 2025 01:38:42 +0100
Subject: [PATCH 7/8] change port

---
 translatable_wikitext_converter/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/translatable_wikitext_converter/app.py b/translatable_wikitext_converter/app.py
index 5ed33e1..9b7805b 100644
--- a/translatable_wikitext_converter/app.py
+++ b/translatable_wikitext_converter/app.py
@@ -47,4 +47,4 @@ def api_convert():
         })
 
 if __name__ == '__main__':
-    app.run(debug=True, port=5001)
+    app.run(debug=True, port=5000)

From 52c115cffe173f5c140ce8a502a34870a5c64ebc Mon Sep 17 00:00:00 2001
From: Super nabla <supernabla@outlook.it>
Date: Mon, 15 Dec 2025 01:43:44 +0100
Subject: [PATCH 8/8] Update README with application run instructions

Added instructions for running the application and tests.
---
 README.md | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1298885..ca1f1ab 100644
--- a/README.md
+++ b/README.md
@@ -37,13 +37,21 @@
 
     ```bash
         pip install -r requirements.txt
+        pip install -e .
     ```
 
 4. **Run the Application**
+   ```bash
+   flask --app ./translatable_wikitext_converter/app.py  run --port 5000
+   ```
+   As an alternative:
     ```bash
-        python app.py
+        python -m translatable_wikitext_converter.app
     ```
-
+5. **Run the tests**
+   ```bash
+   python ./translatable_wikitext_converter/tests.py
+   ```
 The application will start on http://127.0.0.1:5000.
 
 ## Usage