diff --git a/HTMLBeautify.py b/HTMLBeautify.py index 00f6638..c07798a 100644 --- a/HTMLBeautify.py +++ b/HTMLBeautify.py @@ -7,149 +7,183 @@ # url: http://reyman.name/ # e-mail: ross[at]reyman[dot]name -import sublime, sublime_plugin, re +import sublime, sublime_plugin, re, bs4 class HtmlBeautifyCommand(sublime_plugin.TextCommand): - def run(self, edit): - # this file contains the tags that will be indented/unindented, etc. - settings = sublime.load_settings('HTMLBeautify.sublime-settings') + def regex_beautify(self, settings, rawcode): + + # the contents of these tags will not be indented + ignored_tag_opening = settings.get('ignored_tag_opening') + ignored_tag_closing = settings.get('ignored_tag_closing') + + # the content of these tags will be indented + tag_indent = settings.get('tag_indent') + + # these tags will be un-indented + tag_unindent = settings.get('tag_unindent') + + # the line will be un-indented and next line will be indented + tag_unindent_line = settings.get('tag_unindent_line') + + # these tags may occur inline and should not indent/unindent + tag_pos_inline = settings.get('tag_pos_inline') + + # remove extra line (empty) + remove_extraline = settings.get('remove_extraline') + + # flatten tags and contents to column 1, removing tabs! + tag_raw_flat_opening = settings.get('tag_raw_flat_opening') + tag_raw_flat_closing = settings.get('tag_raw_flat_closing') + + indent_with = settings.get('indent_with', " ") + + # put each line into a list + rawcode_list = rawcode.split('\n') + # print rawcode_list + + # cycle through each list item (line of rawcode_list) + rawcode_flat = "" + is_block_ignored = False + is_block_raw = False + + for item in rawcode_list: + # print item.strip() + # remove extra "spacer" lines + if item == "" and remove_extraline: + continue + # ignore raw code + if re.search(tag_raw_flat_closing, item, re.IGNORECASE): + tmp = item.strip() + is_block_raw = False + elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): + tmp = item.strip() + is_block_raw = True + # find ignored blocks and retain indentation, otherwise: strip whitespace + if re.search(ignored_tag_closing, item, re.IGNORECASE): + tmp = item.strip() + is_block_ignored = False + elif re.search(ignored_tag_opening, item, re.IGNORECASE): + # count tabs used in ignored tags (for use later) + ignored_block_tab_count = item.count('\t') + tmp = item.strip() + is_block_ignored = True + # not filtered so just output it + else: + if is_block_raw == True: + # remove tabs from raw_flat content + tmp = re.sub('\t', '', item) + elif is_block_ignored == True: + tab_count = item.count('\t') - ignored_block_tab_count + tmp = '\t' * tab_count + item.strip() + else: + tmp = item.strip() - # the contents of these tags will not be indented - ignored_tag_opening = settings.get('ignored_tag_opening') - ignored_tag_closing = settings.get('ignored_tag_closing') + rawcode_flat = rawcode_flat + tmp + '\n' + + # print rawcode_flat + + # put each line into a list (again) + rawcode_flat_list = rawcode_flat.split('\n') + # print rawcode_flat_list + + # cycle through each list item (line of rawode_flat_list) again - this time: add indentation! + beautified_code = "" + + indent_level = 0 + is_block_ignored = False + is_block_raw = False + + for item in rawcode_flat_list: + # if a one-line, inline tag, just process it + if re.search(tag_pos_inline, item, re.IGNORECASE): + tmp = (indent_with * indent_level) + item + # if unindent, move left + elif re.search(tag_unindent, item, re.IGNORECASE): + indent_level = indent_level - 1 + tmp = (indent_with * indent_level) + item + elif re.search(tag_unindent_line, item, re.IGNORECASE): + tmp = (indent_with * (indent_level - 1)) + item + # if indent, move right + elif re.search(tag_indent, item, re.IGNORECASE): + tmp = (indent_with * indent_level) + item + indent_level = indent_level + 1 + # if raw, flatten! no indenting! + elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): + tmp = item + is_block_raw = True + elif re.search(tag_raw_flat_closing, item, re.IGNORECASE): + tmp = item + is_block_raw = False + else: + if is_block_raw == True: + tmp = item + # otherwise, just leave same level + else: + tmp = (indent_with * indent_level) + item - # the content of these tags will be indented - tag_indent = settings.get('tag_indent') + beautified_code = beautified_code + tmp + '\n' - # these tags will be un-indented - tag_unindent = settings.get('tag_unindent') + # remove leading and trailing white space + beautified_code = beautified_code.strip() - # the line will be un-indented and next line will be indented - tag_unindent_line = settings.get('tag_unindent_line') + return beautified_code - # these tags may occur inline and should not indent/unindent - tag_pos_inline = settings.get('tag_pos_inline') + def parser_beautify(self, settings, markup): - # remove extra line (empty) - remove_extraline = settings.get('remove_extraline') + indent_with = settings.get('indent_with', " ") - # flatten tags and contents to column 1, removing tabs! - tag_raw_flat_opening = settings.get('tag_raw_flat_opening') - tag_raw_flat_closing = settings.get('tag_raw_flat_closing') + soup = bs4.BeautifulSoup(markup) + fixed = soup.prettify() - # determine if applying to a selection or applying to the whole document - if self.view.sel()[0].empty(): - # nothing selected: process the entire file - region = sublime.Region(0, self.view.size()) - sublime.status_message('Beautifying Entire File') - rawcode = self.view.substr(region) - # print region - else: - # process only selected region - region = self.view.line(self.view.sel()[0]) - sublime.status_message('Beautifying Selection Only') - rawcode = self.view.substr(self.view.sel()[0]) - # print region - - # print rawcode - - # remove leading and trailing white space - rawcode = rawcode.strip() - # print rawcode - - # put each line into a list - rawcode_list = re.split('\n', rawcode) - # print rawcode_list - - # cycle through each list item (line of rawcode_list) - rawcode_flat = "" - is_block_ignored = False - is_block_raw = False - - for item in rawcode_list: - # print item.strip() - # remove extra "spacer" lines - if item == "" and remove_extraline: - continue - # ignore raw code - if re.search(tag_raw_flat_closing, item, re.IGNORECASE): - tmp = item.strip() - is_block_raw = False - elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): - tmp = item.strip() - is_block_raw = True - # find ignored blocks and retain indentation, otherwise: strip whitespace - if re.search(ignored_tag_closing, item, re.IGNORECASE): - tmp = item.strip() - is_block_ignored = False - elif re.search(ignored_tag_opening, item, re.IGNORECASE): - # count tabs used in ignored tags (for use later) - ignored_block_tab_count = item.count('\t') - tmp = item.strip() - is_block_ignored = True - # not filtered so just output it - else: - if is_block_raw == True: - # remove tabs from raw_flat content - tmp = re.sub('\t', '', item) - elif is_block_ignored == True: - tab_count = item.count('\t') - ignored_block_tab_count - tmp = '\t' * tab_count + item.strip() - else: - tmp = item.strip() - - rawcode_flat = rawcode_flat + tmp + '\n' - - # print rawcode_flat - - # put each line into a list (again) - rawcode_flat_list = re.split('\n', rawcode_flat) - # print rawcode_flat_list - - # cycle through each list item (line of rawode_flat_list) again - this time: add indentation! - beautified_code = "" - - indent_level = 0 - is_block_ignored = False - is_block_raw = False - - for item in rawcode_flat_list: - # if a one-line, inline tag, just process it - if re.search(tag_pos_inline, item, re.IGNORECASE): - tmp = ("\t" * indent_level) + item - # if unindent, move left - elif re.search(tag_unindent, item, re.IGNORECASE): - indent_level = indent_level - 1 - tmp = ("\t" * indent_level) + item - elif re.search(tag_unindent_line, item, re.IGNORECASE): - tmp = ("\t" * (indent_level - 1)) + item - # if indent, move right - elif re.search(tag_indent, item, re.IGNORECASE): - tmp = ("\t" * indent_level) + item - indent_level = indent_level + 1 - # if raw, flatten! no indenting! - elif re.search(tag_raw_flat_opening, item, re.IGNORECASE): - tmp = item - is_block_raw = True - elif re.search(tag_raw_flat_closing, item, re.IGNORECASE): - tmp = item - is_block_raw = False - else: - if is_block_raw == True: - tmp = item - # otherwise, just leave same level - else: - tmp = ("\t" * indent_level) + item + # So, BS4 is annoying and has no way to override the indentation char for the beautified + # output, it's hard coded to single-space-per-level. As such, we have to reprocess + # the resulting output and reindent it with whatever indentation char + # we want. + code_lines = fixed.split('\n') + + out = [] + for cline in code_lines: + short = cline.lstrip(" ") + out.append(indent_with * (len(cline) - len(short)) + short) + + ret = "\n".join(out) + return ret.strip() + + + def run(self, edit): + + # this file contains the tags that will be indented/unindented, etc. + settings = sublime.load_settings('HTMLBeautify.sublime-settings') + + use_parser = settings.get('use_parser', False) + + # determine if applying to a selection or applying to the whole document + if self.view.sel()[0].empty(): + # nothing selected: process the entire file + region = sublime.Region(0, self.view.size()) + sublime.status_message('Beautifying Entire File') + rawcode = self.view.substr(region) + # print region + else: + # process only selected region + region = self.view.line(self.view.sel()[0]) + sublime.status_message('Beautifying Selection Only') + rawcode = self.view.substr(self.view.sel()[0]) + # print region - beautified_code = beautified_code + tmp + '\n' - # remove leading and trailing white space - beautified_code = beautified_code.strip() + # remove leading and trailing white space + rawcode = rawcode.strip() + # print rawcode - # print beautified_code + if use_parser: + beautified_code = self.parser_beautify(settings, rawcode) + else: + beautified_code = self.regex_beautify(settings, rawcode) + # print beautified_code - # replace the code in Sublime Text - self.view.replace(edit, region, beautified_code) + # replace the code in Sublime Text + self.view.replace(edit, region, beautified_code) - # done + # done diff --git a/dependencies.json b/dependencies.json new file mode 100644 index 0000000..5559a35 --- /dev/null +++ b/dependencies.json @@ -0,0 +1,7 @@ +{ + "*": { + "*": [ + "bs4" + ] + } +} \ No newline at end of file