From a0f2e0faaf7d1e31084addac66bb9ffece9393d4 Mon Sep 17 00:00:00 2001 From: Fake-Name Date: Wed, 26 Jul 2017 20:17:43 -0700 Subject: [PATCH 1/2] Optionally use BS4/prettify() to format code. It's more robust, and a lot more performant. This significantly satisfies https://github.com/rareyman/HTMLBeautify/issues/49. --- HTMLBeautify.py | 279 +++++++++++++++++++++++++--------------------- dependencies.json | 7 ++ 2 files changed, 162 insertions(+), 124 deletions(-) create mode 100644 dependencies.json diff --git a/HTMLBeautify.py b/HTMLBeautify.py index 00f6638..da72cd0 100644 --- a/HTMLBeautify.py +++ b/HTMLBeautify.py @@ -7,149 +7,180 @@ # url: http://reyman.name/ # e-mail: ross[at]reyman[dot]name -import sublime, sublime_plugin, re +import sublime, sublime_plugin, re, bs4 class HtmlBeautifyCommand(sublime_plugin.TextCommand): - def run(self, edit): - # this file contains the tags that will be indented/unindented, etc. - settings = sublime.load_settings('HTMLBeautify.sublime-settings') + def regex_beautify(self, settings, rawcode): + + # the contents of these tags will not be indented + ignored_tag_opening = settings.get('ignored_tag_opening') + ignored_tag_closing = settings.get('ignored_tag_closing') + + # the content of these tags will be indented + tag_indent = settings.get('tag_indent') + + # these tags will be un-indented + tag_unindent = settings.get('tag_unindent') + + # the line will be un-indented and next line will be indented + tag_unindent_line = settings.get('tag_unindent_line') + + # these tags may occur inline and should not indent/unindent + tag_pos_inline = settings.get('tag_pos_inline') + + # remove extra line (empty) + remove_extraline = settings.get('remove_extraline') + + # flatten tags and contents to column 1, removing tabs! + tag_raw_flat_opening = settings.get('tag_raw_flat_opening') + tag_raw_flat_closing = settings.get('tag_raw_flat_closing') + + + # put each line into a list + rawcode_list = rawcode.split('\n') + # print rawcode_list + + # cycle through each list item (line of rawcode_list) + rawcode_flat = "" + is_block_ignored = False + is_block_raw = False + + for item in rawcode_list: + # print item.strip() + # remove extra "spacer" lines + if item == "" and remove_extraline: + continue + # ignore raw code + if re.search(tag_raw_flat_closing, item, re.IGNORECASE): + tmp = item.strip() + is_block_raw = False + elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): + tmp = item.strip() + is_block_raw = True + # find ignored blocks and retain indentation, otherwise: strip whitespace + if re.search(ignored_tag_closing, item, re.IGNORECASE): + tmp = item.strip() + is_block_ignored = False + elif re.search(ignored_tag_opening, item, re.IGNORECASE): + # count tabs used in ignored tags (for use later) + ignored_block_tab_count = item.count('\t') + tmp = item.strip() + is_block_ignored = True + # not filtered so just output it + else: + if is_block_raw == True: + # remove tabs from raw_flat content + tmp = re.sub('\t', '', item) + elif is_block_ignored == True: + tab_count = item.count('\t') - ignored_block_tab_count + tmp = '\t' * tab_count + item.strip() + else: + tmp = item.strip() - # the contents of these tags will not be indented - ignored_tag_opening = settings.get('ignored_tag_opening') - ignored_tag_closing = settings.get('ignored_tag_closing') + rawcode_flat = rawcode_flat + tmp + '\n' + + # print rawcode_flat + + # put each line into a list (again) + rawcode_flat_list = rawcode_flat.split('\n') + # print rawcode_flat_list + + # cycle through each list item (line of rawode_flat_list) again - this time: add indentation! + beautified_code = "" + + indent_level = 0 + is_block_ignored = False + is_block_raw = False + + for item in rawcode_flat_list: + # if a one-line, inline tag, just process it + if re.search(tag_pos_inline, item, re.IGNORECASE): + tmp = ("\t" * indent_level) + item + # if unindent, move left + elif re.search(tag_unindent, item, re.IGNORECASE): + indent_level = indent_level - 1 + tmp = ("\t" * indent_level) + item + elif re.search(tag_unindent_line, item, re.IGNORECASE): + tmp = ("\t" * (indent_level - 1)) + item + # if indent, move right + elif re.search(tag_indent, item, re.IGNORECASE): + tmp = ("\t" * indent_level) + item + indent_level = indent_level + 1 + # if raw, flatten! no indenting! + elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): + tmp = item + is_block_raw = True + elif re.search(tag_raw_flat_closing, item, re.IGNORECASE): + tmp = item + is_block_raw = False + else: + if is_block_raw == True: + tmp = item + # otherwise, just leave same level + else: + tmp = ("\t" * indent_level) + item - # the content of these tags will be indented - tag_indent = settings.get('tag_indent') + beautified_code = beautified_code + tmp + '\n' - # these tags will be un-indented - tag_unindent = settings.get('tag_unindent') + # remove leading and trailing white space + beautified_code = beautified_code.strip() - # the line will be un-indented and next line will be indented - tag_unindent_line = settings.get('tag_unindent_line') + def parser_beautify(self, settings, markup): - # these tags may occur inline and should not indent/unindent - tag_pos_inline = settings.get('tag_pos_inline') + indent_with = settings.get('indent_with', " ") - # remove extra line (empty) - remove_extraline = settings.get('remove_extraline') + soup = bs4.BeautifulSoup(markup) + fixed = soup.prettify() - # flatten tags and contents to column 1, removing tabs! - tag_raw_flat_opening = settings.get('tag_raw_flat_opening') - tag_raw_flat_closing = settings.get('tag_raw_flat_closing') + # So, BS4 is annoying and has no way to override the indentation char for the beautified + # output, it's hard coded to single-space-per-level. As such, we have to reprocess + # the resulting output and reindent it with whatever indentation char + # we want. + code_lines = fixed.split('\n') - # determine if applying to a selection or applying to the whole document - if self.view.sel()[0].empty(): - # nothing selected: process the entire file - region = sublime.Region(0, self.view.size()) - sublime.status_message('Beautifying Entire File') - rawcode = self.view.substr(region) - # print region - else: - # process only selected region - region = self.view.line(self.view.sel()[0]) - sublime.status_message('Beautifying Selection Only') - rawcode = self.view.substr(self.view.sel()[0]) - # print region - - # print rawcode - - # remove leading and trailing white space - rawcode = rawcode.strip() - # print rawcode - - # put each line into a list - rawcode_list = re.split('\n', rawcode) - # print rawcode_list - - # cycle through each list item (line of rawcode_list) - rawcode_flat = "" - is_block_ignored = False - is_block_raw = False - - for item in rawcode_list: - # print item.strip() - # remove extra "spacer" lines - if item == "" and remove_extraline: - continue - # ignore raw code - if re.search(tag_raw_flat_closing, item, re.IGNORECASE): - tmp = item.strip() - is_block_raw = False - elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): - tmp = item.strip() - is_block_raw = True - # find ignored blocks and retain indentation, otherwise: strip whitespace - if re.search(ignored_tag_closing, item, re.IGNORECASE): - tmp = item.strip() - is_block_ignored = False - elif re.search(ignored_tag_opening, item, re.IGNORECASE): - # count tabs used in ignored tags (for use later) - ignored_block_tab_count = item.count('\t') - tmp = item.strip() - is_block_ignored = True - # not filtered so just output it - else: - if is_block_raw == True: - # remove tabs from raw_flat content - tmp = re.sub('\t', '', item) - elif is_block_ignored == True: - tab_count = item.count('\t') - ignored_block_tab_count - tmp = '\t' * tab_count + item.strip() - else: - tmp = item.strip() + out = [] + for cline in code_lines: + short = cline.lstrip(" ") + out.append(indent_with * (len(cline) - len(short)) + short) - rawcode_flat = rawcode_flat + tmp + '\n' + ret = "\n".join(out) + return ret.strip() - # print rawcode_flat - # put each line into a list (again) - rawcode_flat_list = re.split('\n', rawcode_flat) - # print rawcode_flat_list + def run(self, edit): - # cycle through each list item (line of rawode_flat_list) again - this time: add indentation! - beautified_code = "" + # this file contains the tags that will be indented/unindented, etc. + settings = sublime.load_settings('HTMLBeautify.sublime-settings') - indent_level = 0 - is_block_ignored = False - is_block_raw = False + use_parser = settings.get('use_parser', False) - for item in rawcode_flat_list: - # if a one-line, inline tag, just process it - if re.search(tag_pos_inline, item, re.IGNORECASE): - tmp = ("\t" * indent_level) + item - # if unindent, move left - elif re.search(tag_unindent, item, re.IGNORECASE): - indent_level = indent_level - 1 - tmp = ("\t" * indent_level) + item - elif re.search(tag_unindent_line, item, re.IGNORECASE): - tmp = ("\t" * (indent_level - 1)) + item - # if indent, move right - elif re.search(tag_indent, item, re.IGNORECASE): - tmp = ("\t" * indent_level) + item - indent_level = indent_level + 1 - # if raw, flatten! no indenting! - elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): - tmp = item - is_block_raw = True - elif re.search(tag_raw_flat_closing, item, re.IGNORECASE): - tmp = item - is_block_raw = False - else: - if is_block_raw == True: - tmp = item - # otherwise, just leave same level - else: - tmp = ("\t" * indent_level) + item + # determine if applying to a selection or applying to the whole document + if self.view.sel()[0].empty(): + # nothing selected: process the entire file + region = sublime.Region(0, self.view.size()) + sublime.status_message('Beautifying Entire File') + rawcode = self.view.substr(region) + # print region + else: + # process only selected region + region = self.view.line(self.view.sel()[0]) + sublime.status_message('Beautifying Selection Only') + rawcode = self.view.substr(self.view.sel()[0]) + # print region - beautified_code = beautified_code + tmp + '\n' - # remove leading and trailing white space - beautified_code = beautified_code.strip() + # remove leading and trailing white space + rawcode = rawcode.strip() + # print rawcode - # print beautified_code + if use_parser: + beautified_code = self.parser_beautify(settings, rawcode) + else: + beautified_code = self.regex_beautify(settings, rawcode) + # print beautified_code - # replace the code in Sublime Text - self.view.replace(edit, region, beautified_code) + # replace the code in Sublime Text + self.view.replace(edit, region, beautified_code) - # done + # done diff --git a/dependencies.json b/dependencies.json new file mode 100644 index 0000000..5559a35 --- /dev/null +++ b/dependencies.json @@ -0,0 +1,7 @@ +{ + "*": { + "*": [ + "bs4" + ] + } +} \ No newline at end of file From 0e77ff6e165234f694df54df6b9cedf66bea890a Mon Sep 17 00:00:00 2001 From: Fake-Name Date: Wed, 26 Jul 2017 20:26:39 -0700 Subject: [PATCH 2/2] Make the regex reindenter also use the indent-with param, if present. --- HTMLBeautify.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/HTMLBeautify.py b/HTMLBeautify.py index da72cd0..c07798a 100644 --- a/HTMLBeautify.py +++ b/HTMLBeautify.py @@ -36,6 +36,7 @@ def regex_beautify(self, settings, rawcode): tag_raw_flat_opening = settings.get('tag_raw_flat_opening') tag_raw_flat_closing = settings.get('tag_raw_flat_closing') + indent_with = settings.get('indent_with', " ") # put each line into a list rawcode_list = rawcode.split('\n') @@ -96,16 +97,16 @@ def regex_beautify(self, settings, rawcode): for item in rawcode_flat_list: # if a one-line, inline tag, just process it if re.search(tag_pos_inline, item, re.IGNORECASE): - tmp = ("\t" * indent_level) + item + tmp = (indent_with * indent_level) + item # if unindent, move left elif re.search(tag_unindent, item, re.IGNORECASE): indent_level = indent_level - 1 - tmp = ("\t" * indent_level) + item + tmp = (indent_with * indent_level) + item elif re.search(tag_unindent_line, item, re.IGNORECASE): - tmp = ("\t" * (indent_level - 1)) + item + tmp = (indent_with * (indent_level - 1)) + item # if indent, move right elif re.search(tag_indent, item, re.IGNORECASE): - tmp = ("\t" * indent_level) + item + tmp = (indent_with * indent_level) + item indent_level = indent_level + 1 # if raw, flatten! no indenting! elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): @@ -119,13 +120,15 @@ def regex_beautify(self, settings, rawcode): tmp = item # otherwise, just leave same level else: - tmp = ("\t" * indent_level) + item + tmp = (indent_with * indent_level) + item beautified_code = beautified_code + tmp + '\n' # remove leading and trailing white space beautified_code = beautified_code.strip() + return beautified_code + def parser_beautify(self, settings, markup): indent_with = settings.get('indent_with', " ")