diff --git a/HTMLBeautify.py b/HTMLBeautify.py
index 00f6638..c07798a 100644
--- a/HTMLBeautify.py
+++ b/HTMLBeautify.py
@@ -7,149 +7,183 @@
# url: http://reyman.name/
# e-mail: ross[at]reyman[dot]name
-import sublime, sublime_plugin, re
+import sublime, sublime_plugin, re, bs4
class HtmlBeautifyCommand(sublime_plugin.TextCommand):
- def run(self, edit):
- # this file contains the tags that will be indented/unindented, etc.
- settings = sublime.load_settings('HTMLBeautify.sublime-settings')
+ def regex_beautify(self, settings, rawcode):
+
+ # the contents of these tags will not be indented
+ ignored_tag_opening = settings.get('ignored_tag_opening')
+ ignored_tag_closing = settings.get('ignored_tag_closing')
+
+ # the content of these tags will be indented
+ tag_indent = settings.get('tag_indent')
+
+ # these tags will be un-indented
+ tag_unindent = settings.get('tag_unindent')
+
+ # the line will be un-indented and next line will be indented
+ tag_unindent_line = settings.get('tag_unindent_line')
+
+ # these tags may occur inline and should not indent/unindent
+ tag_pos_inline = settings.get('tag_pos_inline')
+
+ # remove extra line (empty)
+ remove_extraline = settings.get('remove_extraline')
+
+ # flatten tags and contents to column 1, removing tabs!
+ tag_raw_flat_opening = settings.get('tag_raw_flat_opening')
+ tag_raw_flat_closing = settings.get('tag_raw_flat_closing')
+
+ indent_with = settings.get('indent_with', " ")
+
+ # put each line into a list
+ rawcode_list = rawcode.split('\n')
+ # print rawcode_list
+
+ # cycle through each list item (line of rawcode_list)
+ rawcode_flat = ""
+ is_block_ignored = False
+ is_block_raw = False
+
+ for item in rawcode_list:
+ # print item.strip()
+ # remove extra "spacer" lines
+ if item == "" and remove_extraline:
+ continue
+ # ignore raw code
+ if re.search(tag_raw_flat_closing, item, re.IGNORECASE):
+ tmp = item.strip()
+ is_block_raw = False
+ elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
+ tmp = item.strip()
+ is_block_raw = True
+ # find ignored blocks and retain indentation, otherwise: strip whitespace
+ if re.search(ignored_tag_closing, item, re.IGNORECASE):
+ tmp = item.strip()
+ is_block_ignored = False
+ elif re.search(ignored_tag_opening, item, re.IGNORECASE):
+ # count tabs used in ignored tags (for use later)
+ ignored_block_tab_count = item.count('\t')
+ tmp = item.strip()
+ is_block_ignored = True
+ # not filtered so just output it
+ else:
+ if is_block_raw == True:
+ # remove tabs from raw_flat content
+ tmp = re.sub('\t', '', item)
+ elif is_block_ignored == True:
+ tab_count = item.count('\t') - ignored_block_tab_count
+ tmp = '\t' * tab_count + item.strip()
+ else:
+ tmp = item.strip()
- # the contents of these tags will not be indented
- ignored_tag_opening = settings.get('ignored_tag_opening')
- ignored_tag_closing = settings.get('ignored_tag_closing')
+ rawcode_flat = rawcode_flat + tmp + '\n'
+
+ # print rawcode_flat
+
+ # put each line into a list (again)
+ rawcode_flat_list = rawcode_flat.split('\n')
+ # print rawcode_flat_list
+
+ # cycle through each list item (line of rawode_flat_list) again - this time: add indentation!
+ beautified_code = ""
+
+ indent_level = 0
+ is_block_ignored = False
+ is_block_raw = False
+
+ for item in rawcode_flat_list:
+ # if a one-line, inline tag, just process it
+ if re.search(tag_pos_inline, item, re.IGNORECASE):
+ tmp = (indent_with * indent_level) + item
+ # if unindent, move left
+ elif re.search(tag_unindent, item, re.IGNORECASE):
+ indent_level = indent_level - 1
+ tmp = (indent_with * indent_level) + item
+ elif re.search(tag_unindent_line, item, re.IGNORECASE):
+ tmp = (indent_with * (indent_level - 1)) + item
+ # if indent, move right
+ elif re.search(tag_indent, item, re.IGNORECASE):
+ tmp = (indent_with * indent_level) + item
+ indent_level = indent_level + 1
+ # if raw, flatten! no indenting!
+ elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
+ tmp = item
+ is_block_raw = True
+ elif re.search(tag_raw_flat_closing, item, re.IGNORECASE):
+ tmp = item
+ is_block_raw = False
+ else:
+ if is_block_raw == True:
+ tmp = item
+ # otherwise, just leave same level
+ else:
+ tmp = (indent_with * indent_level) + item
- # the content of these tags will be indented
- tag_indent = settings.get('tag_indent')
+ beautified_code = beautified_code + tmp + '\n'
- # these tags will be un-indented
- tag_unindent = settings.get('tag_unindent')
+ # remove leading and trailing white space
+ beautified_code = beautified_code.strip()
- # the line will be un-indented and next line will be indented
- tag_unindent_line = settings.get('tag_unindent_line')
+ return beautified_code
- # these tags may occur inline and should not indent/unindent
- tag_pos_inline = settings.get('tag_pos_inline')
+ def parser_beautify(self, settings, markup):
- # remove extra line (empty)
- remove_extraline = settings.get('remove_extraline')
+ indent_with = settings.get('indent_with', " ")
- # flatten tags and contents to column 1, removing tabs!
- tag_raw_flat_opening = settings.get('tag_raw_flat_opening')
- tag_raw_flat_closing = settings.get('tag_raw_flat_closing')
+ soup = bs4.BeautifulSoup(markup)
+ fixed = soup.prettify()
- # determine if applying to a selection or applying to the whole document
- if self.view.sel()[0].empty():
- # nothing selected: process the entire file
- region = sublime.Region(0, self.view.size())
- sublime.status_message('Beautifying Entire File')
- rawcode = self.view.substr(region)
- # print region
- else:
- # process only selected region
- region = self.view.line(self.view.sel()[0])
- sublime.status_message('Beautifying Selection Only')
- rawcode = self.view.substr(self.view.sel()[0])
- # print region
-
- # print rawcode
-
- # remove leading and trailing white space
- rawcode = rawcode.strip()
- # print rawcode
-
- # put each line into a list
- rawcode_list = re.split('\n', rawcode)
- # print rawcode_list
-
- # cycle through each list item (line of rawcode_list)
- rawcode_flat = ""
- is_block_ignored = False
- is_block_raw = False
-
- for item in rawcode_list:
- # print item.strip()
- # remove extra "spacer" lines
- if item == "" and remove_extraline:
- continue
- # ignore raw code
- if re.search(tag_raw_flat_closing, item, re.IGNORECASE):
- tmp = item.strip()
- is_block_raw = False
- elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
- tmp = item.strip()
- is_block_raw = True
- # find ignored blocks and retain indentation, otherwise: strip whitespace
- if re.search(ignored_tag_closing, item, re.IGNORECASE):
- tmp = item.strip()
- is_block_ignored = False
- elif re.search(ignored_tag_opening, item, re.IGNORECASE):
- # count tabs used in ignored tags (for use later)
- ignored_block_tab_count = item.count('\t')
- tmp = item.strip()
- is_block_ignored = True
- # not filtered so just output it
- else:
- if is_block_raw == True:
- # remove tabs from raw_flat content
- tmp = re.sub('\t', '', item)
- elif is_block_ignored == True:
- tab_count = item.count('\t') - ignored_block_tab_count
- tmp = '\t' * tab_count + item.strip()
- else:
- tmp = item.strip()
-
- rawcode_flat = rawcode_flat + tmp + '\n'
-
- # print rawcode_flat
-
- # put each line into a list (again)
- rawcode_flat_list = re.split('\n', rawcode_flat)
- # print rawcode_flat_list
-
- # cycle through each list item (line of rawode_flat_list) again - this time: add indentation!
- beautified_code = ""
-
- indent_level = 0
- is_block_ignored = False
- is_block_raw = False
-
- for item in rawcode_flat_list:
- # if a one-line, inline tag, just process it
- if re.search(tag_pos_inline, item, re.IGNORECASE):
- tmp = ("\t" * indent_level) + item
- # if unindent, move left
- elif re.search(tag_unindent, item, re.IGNORECASE):
- indent_level = indent_level - 1
- tmp = ("\t" * indent_level) + item
- elif re.search(tag_unindent_line, item, re.IGNORECASE):
- tmp = ("\t" * (indent_level - 1)) + item
- # if indent, move right
- elif re.search(tag_indent, item, re.IGNORECASE):
- tmp = ("\t" * indent_level) + item
- indent_level = indent_level + 1
- # if raw, flatten! no indenting!
- elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
- tmp = item
- is_block_raw = True
- elif re.search(tag_raw_flat_closing, item, re.IGNORECASE):
- tmp = item
- is_block_raw = False
- else:
- if is_block_raw == True:
- tmp = item
- # otherwise, just leave same level
- else:
- tmp = ("\t" * indent_level) + item
+ # So, BS4 is annoying and has no way to override the indentation char for the beautified
+ # output, it's hard coded to single-space-per-level. As such, we have to reprocess
+ # the resulting output and reindent it with whatever indentation char
+ # we want.
+ code_lines = fixed.split('\n')
+
+ out = []
+ for cline in code_lines:
+ short = cline.lstrip(" ")
+ out.append(indent_with * (len(cline) - len(short)) + short)
+
+ ret = "\n".join(out)
+ return ret.strip()
+
+
+ def run(self, edit):
+
+ # this file contains the tags that will be indented/unindented, etc.
+ settings = sublime.load_settings('HTMLBeautify.sublime-settings')
+
+ use_parser = settings.get('use_parser', False)
+
+ # determine if applying to a selection or applying to the whole document
+ if self.view.sel()[0].empty():
+ # nothing selected: process the entire file
+ region = sublime.Region(0, self.view.size())
+ sublime.status_message('Beautifying Entire File')
+ rawcode = self.view.substr(region)
+ # print region
+ else:
+ # process only selected region
+ region = self.view.line(self.view.sel()[0])
+ sublime.status_message('Beautifying Selection Only')
+ rawcode = self.view.substr(self.view.sel()[0])
+ # print region
- beautified_code = beautified_code + tmp + '\n'
- # remove leading and trailing white space
- beautified_code = beautified_code.strip()
+ # remove leading and trailing white space
+ rawcode = rawcode.strip()
+ # print rawcode
- # print beautified_code
+ if use_parser:
+ beautified_code = self.parser_beautify(settings, rawcode)
+ else:
+ beautified_code = self.regex_beautify(settings, rawcode)
+ # print beautified_code
- # replace the code in Sublime Text
- self.view.replace(edit, region, beautified_code)
+ # replace the code in Sublime Text
+ self.view.replace(edit, region, beautified_code)
- # done
+ # done
diff --git a/dependencies.json b/dependencies.json
new file mode 100644
index 0000000..5559a35
--- /dev/null
+++ b/dependencies.json
@@ -0,0 +1,7 @@
+{
+ "*": {
+ "*": [
+ "bs4"
+ ]
+ }
+}
\ No newline at end of file