From a0f2e0faaf7d1e31084addac66bb9ffece9393d4 Mon Sep 17 00:00:00 2001
From: Fake-Name <something@fake-url.com>
Date: Wed, 26 Jul 2017 20:17:43 -0700
Subject: [PATCH 1/2] Optionally use BS4/prettify() to format code. It's more
 robust, and a lot more performant. This significantly satisfies
 https://github.com/rareyman/HTMLBeautify/issues/49.

---
 HTMLBeautify.py   | 279 +++++++++++++++++++++++++---------------------
 dependencies.json |   7 ++
 2 files changed, 162 insertions(+), 124 deletions(-)
 create mode 100644 dependencies.json

diff --git a/HTMLBeautify.py b/HTMLBeautify.py
index 00f6638..da72cd0 100644
--- a/HTMLBeautify.py
+++ b/HTMLBeautify.py
@@ -7,149 +7,180 @@
 # url:			http://reyman.name/
 # e-mail:		ross[at]reyman[dot]name
 
-import sublime, sublime_plugin, re
+import sublime, sublime_plugin, re,  bs4
 
 class HtmlBeautifyCommand(sublime_plugin.TextCommand):
-		def run(self, edit):
 
-			# this file contains the tags that will be indented/unindented, etc.
-			settings = sublime.load_settings('HTMLBeautify.sublime-settings')
+	def regex_beautify(self, settings, rawcode):
+
+		# the contents of these tags will not be indented
+		ignored_tag_opening = settings.get('ignored_tag_opening')
+		ignored_tag_closing = settings.get('ignored_tag_closing')
+
+		# the content of these tags will be indented
+		tag_indent 					= settings.get('tag_indent')
+
+		# these tags will be un-indented
+		tag_unindent 				= settings.get('tag_unindent')
+
+		# the line will be un-indented and next line will be indented
+		tag_unindent_line			= settings.get('tag_unindent_line')
+
+		# these tags may occur inline and should not indent/unindent
+		tag_pos_inline 				= settings.get('tag_pos_inline')
+
+		# remove extra line (empty)
+		remove_extraline 			= settings.get('remove_extraline')
+
+		# flatten tags and contents to column 1, removing tabs!
+		tag_raw_flat_opening 			= settings.get('tag_raw_flat_opening')
+		tag_raw_flat_closing 			= settings.get('tag_raw_flat_closing')
+
+
+		# put each line into a list
+		rawcode_list = rawcode.split('\n')
+		# print rawcode_list
+
+		# cycle through each list item (line of rawcode_list)
+		rawcode_flat = ""
+		is_block_ignored = False
+		is_block_raw = False
+
+		for item in rawcode_list:
+			# print item.strip()
+			# remove extra "spacer" lines
+			if item == "" and remove_extraline:
+				continue
+			# ignore raw code
+			if re.search(tag_raw_flat_closing, item, re.IGNORECASE):
+				tmp = item.strip()
+				is_block_raw = False
+			elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
+				tmp = item.strip()
+				is_block_raw = True
+			# find ignored blocks and retain indentation, otherwise: strip whitespace
+			if re.search(ignored_tag_closing, item, re.IGNORECASE):
+				tmp = item.strip()
+				is_block_ignored = False
+			elif re.search(ignored_tag_opening, item, re.IGNORECASE):
+				# count tabs used in ignored tags (for use later)
+				ignored_block_tab_count = item.count('\t')
+				tmp = item.strip()
+				is_block_ignored = True
+			# not filtered so just output it
+			else:
+				if is_block_raw == True:
+					# remove tabs from raw_flat content
+					tmp = re.sub('\t', '', item)
+				elif is_block_ignored == True:
+					tab_count = item.count('\t') - ignored_block_tab_count
+					tmp = '\t' * tab_count + item.strip()
+				else:
+					tmp = item.strip()
 
-			# the contents of these tags will not be indented
-			ignored_tag_opening = settings.get('ignored_tag_opening')
-			ignored_tag_closing = settings.get('ignored_tag_closing')
+			rawcode_flat = rawcode_flat + tmp + '\n'
+
+		# print rawcode_flat
+
+		# put each line into a list (again)
+		rawcode_flat_list = rawcode_flat.split('\n')
+		# print rawcode_flat_list
+
+		# cycle through each list item (line of rawode_flat_list) again - this time: add indentation!
+		beautified_code = ""
+
+		indent_level = 0
+		is_block_ignored = False
+		is_block_raw = False
+
+		for item in rawcode_flat_list:
+			# if a one-line, inline tag, just process it
+			if re.search(tag_pos_inline, item, re.IGNORECASE):
+				tmp = ("\t" * indent_level) + item
+			# if unindent, move left
+			elif re.search(tag_unindent, item, re.IGNORECASE):
+				indent_level = indent_level - 1
+				tmp = ("\t" * indent_level) + item
+			elif re.search(tag_unindent_line, item, re.IGNORECASE):
+				tmp = ("\t" * (indent_level - 1)) + item
+			# if indent, move right
+			elif re.search(tag_indent, item, re.IGNORECASE):
+				tmp = ("\t" * indent_level) + item
+				indent_level = indent_level + 1
+			# if raw, flatten! no indenting!
+			elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
+				tmp = item
+				is_block_raw = True
+			elif re.search(tag_raw_flat_closing, item, re.IGNORECASE):
+				tmp = item
+				is_block_raw = False
+			else:
+				if is_block_raw == True:
+					tmp = item
+				# otherwise, just leave same level
+				else:
+					tmp = ("\t" * indent_level) + item
 
-			# the content of these tags will be indented
-			tag_indent 					= settings.get('tag_indent')
+			beautified_code = beautified_code + tmp + '\n'
 
-			# these tags will be un-indented
-			tag_unindent 				= settings.get('tag_unindent')
+		# remove leading and trailing white space
+		beautified_code = beautified_code.strip()
 
-			# the line will be un-indented and next line will be indented
-			tag_unindent_line			= settings.get('tag_unindent_line')
+	def parser_beautify(self, settings, markup):
 
-			# these tags may occur inline and should not indent/unindent
-			tag_pos_inline 				= settings.get('tag_pos_inline')
+		indent_with = settings.get('indent_with', "    ")
 
-			# remove extra line (empty)
-			remove_extraline 			= settings.get('remove_extraline')
+		soup = bs4.BeautifulSoup(markup)
+		fixed = soup.prettify()
 
-			# flatten tags and contents to column 1, removing tabs!
-			tag_raw_flat_opening 			= settings.get('tag_raw_flat_opening')
-			tag_raw_flat_closing 			= settings.get('tag_raw_flat_closing')
+		# So, BS4 is annoying and has no way to override the indentation char for the beautified
+		# output, it's hard coded to single-space-per-level. As such, we have to reprocess
+		# the resulting output and reindent it with whatever indentation char
+		# we want.
+		code_lines = fixed.split('\n')
 
-			# determine if applying to a selection or applying to the whole document
-			if self.view.sel()[0].empty():
-				# nothing selected: process the entire file
-				region = sublime.Region(0, self.view.size())
-				sublime.status_message('Beautifying Entire File')
-				rawcode = self.view.substr(region)
-				# print region
-			else:
-				# process only selected region
-				region = self.view.line(self.view.sel()[0])
-				sublime.status_message('Beautifying Selection Only')
-				rawcode = self.view.substr(self.view.sel()[0])
-				# print region
-
-			# print rawcode
-
-			# remove leading and trailing white space
-			rawcode = rawcode.strip()
-			# print rawcode
-
-			# put each line into a list
-			rawcode_list = re.split('\n', rawcode)
-			# print rawcode_list
-
-			# cycle through each list item (line of rawcode_list)
-			rawcode_flat = ""
-			is_block_ignored = False
-			is_block_raw = False
-
-			for item in rawcode_list:
-				# print item.strip()
-				# remove extra "spacer" lines
-				if item == "" and remove_extraline:
-					continue
-				# ignore raw code
-				if re.search(tag_raw_flat_closing, item, re.IGNORECASE):
-					tmp = item.strip()
-					is_block_raw = False
-				elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
-					tmp = item.strip()
-					is_block_raw = True
-				# find ignored blocks and retain indentation, otherwise: strip whitespace
-				if re.search(ignored_tag_closing, item, re.IGNORECASE):
-					tmp = item.strip()
-					is_block_ignored = False
-				elif re.search(ignored_tag_opening, item, re.IGNORECASE):
-					# count tabs used in ignored tags (for use later)
-					ignored_block_tab_count = item.count('\t')
-					tmp = item.strip()
-					is_block_ignored = True
-				# not filtered so just output it
-				else:
-					if is_block_raw == True:
-						# remove tabs from raw_flat content
-						tmp = re.sub('\t', '', item)
-					elif is_block_ignored == True:
-						tab_count = item.count('\t') - ignored_block_tab_count
-						tmp = '\t' * tab_count + item.strip()
-					else:
-						tmp = item.strip()
+		out = []
+		for cline in code_lines:
+			short = cline.lstrip(" ")
+			out.append(indent_with * (len(cline) - len(short)) + short)
 
-				rawcode_flat = rawcode_flat + tmp + '\n'
+		ret = "\n".join(out)
+		return ret.strip()
 
-			# print rawcode_flat
 
-			# put each line into a list (again)
-			rawcode_flat_list = re.split('\n', rawcode_flat)
-			# print rawcode_flat_list
+	def run(self, edit):
 
-			# cycle through each list item (line of rawode_flat_list) again - this time: add indentation!
-			beautified_code = ""
+		# this file contains the tags that will be indented/unindented, etc.
+		settings = sublime.load_settings('HTMLBeautify.sublime-settings')
 
-			indent_level = 0
-			is_block_ignored = False
-			is_block_raw = False
+		use_parser = settings.get('use_parser', False)
 
-			for item in rawcode_flat_list:
-				# if a one-line, inline tag, just process it
-				if re.search(tag_pos_inline, item, re.IGNORECASE):
-					tmp = ("\t" * indent_level) + item
-				# if unindent, move left
-				elif re.search(tag_unindent, item, re.IGNORECASE):
-					indent_level = indent_level - 1
-					tmp = ("\t" * indent_level) + item
-				elif re.search(tag_unindent_line, item, re.IGNORECASE):
-					tmp = ("\t" * (indent_level - 1)) + item
-				# if indent, move right
-				elif re.search(tag_indent, item, re.IGNORECASE):
-					tmp = ("\t" * indent_level) + item
-					indent_level = indent_level + 1
-				# if raw, flatten! no indenting!
-				elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
-					tmp = item
-					is_block_raw = True
-				elif re.search(tag_raw_flat_closing, item, re.IGNORECASE):
-					tmp = item
-					is_block_raw = False
-				else:
-					if is_block_raw == True:
-						tmp = item
-					# otherwise, just leave same level
-					else:
-						tmp = ("\t" * indent_level) + item
+		# determine if applying to a selection or applying to the whole document
+		if self.view.sel()[0].empty():
+			# nothing selected: process the entire file
+			region = sublime.Region(0, self.view.size())
+			sublime.status_message('Beautifying Entire File')
+			rawcode = self.view.substr(region)
+			# print region
+		else:
+			# process only selected region
+			region = self.view.line(self.view.sel()[0])
+			sublime.status_message('Beautifying Selection Only')
+			rawcode = self.view.substr(self.view.sel()[0])
+			# print region
 
-				beautified_code = beautified_code + tmp + '\n'
 
-			# remove leading and trailing white space
-			beautified_code = beautified_code.strip()
+		# remove leading and trailing white space
+		rawcode = rawcode.strip()
+		# print rawcode
 
-			# print beautified_code
+		if use_parser:
+			beautified_code = self.parser_beautify(settings, rawcode)
+		else:
+			beautified_code = self.regex_beautify(settings, rawcode)
+		# print beautified_code
 
-			# replace the code in Sublime Text
-			self.view.replace(edit, region, beautified_code)
+		# replace the code in Sublime Text
+		self.view.replace(edit, region, beautified_code)
 
-			# done
+		# done
diff --git a/dependencies.json b/dependencies.json
new file mode 100644
index 0000000..5559a35
--- /dev/null
+++ b/dependencies.json
@@ -0,0 +1,7 @@
+{
+   "*": {
+      "*": [
+         "bs4"
+      ]
+   }
+}
\ No newline at end of file

From 0e77ff6e165234f694df54df6b9cedf66bea890a Mon Sep 17 00:00:00 2001
From: Fake-Name <something@fake-url.com>
Date: Wed, 26 Jul 2017 20:26:39 -0700
Subject: [PATCH 2/2] Make the regex reindenter also use the indent-with param,
 if present.

---
 HTMLBeautify.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/HTMLBeautify.py b/HTMLBeautify.py
index da72cd0..c07798a 100644
--- a/HTMLBeautify.py
+++ b/HTMLBeautify.py
@@ -36,6 +36,7 @@ def regex_beautify(self, settings, rawcode):
 		tag_raw_flat_opening 			= settings.get('tag_raw_flat_opening')
 		tag_raw_flat_closing 			= settings.get('tag_raw_flat_closing')
 
+		indent_with = settings.get('indent_with', "    ")
 
 		# put each line into a list
 		rawcode_list = rawcode.split('\n')
@@ -96,16 +97,16 @@ def regex_beautify(self, settings, rawcode):
 		for item in rawcode_flat_list:
 			# if a one-line, inline tag, just process it
 			if re.search(tag_pos_inline, item, re.IGNORECASE):
-				tmp = ("\t" * indent_level) + item
+				tmp = (indent_with * indent_level) + item
 			# if unindent, move left
 			elif re.search(tag_unindent, item, re.IGNORECASE):
 				indent_level = indent_level - 1
-				tmp = ("\t" * indent_level) + item
+				tmp = (indent_with * indent_level) + item
 			elif re.search(tag_unindent_line, item, re.IGNORECASE):
-				tmp = ("\t" * (indent_level - 1)) + item
+				tmp = (indent_with * (indent_level - 1)) + item
 			# if indent, move right
 			elif re.search(tag_indent, item, re.IGNORECASE):
-				tmp = ("\t" * indent_level) + item
+				tmp = (indent_with * indent_level) + item
 				indent_level = indent_level + 1
 			# if raw, flatten! no indenting!
 			elif re.search(tag_raw_flat_opening, item, re.IGNORECASE):
@@ -119,13 +120,15 @@ def regex_beautify(self, settings, rawcode):
 					tmp = item
 				# otherwise, just leave same level
 				else:
-					tmp = ("\t" * indent_level) + item
+					tmp = (indent_with * indent_level) + item
 
 			beautified_code = beautified_code + tmp + '\n'
 
 		# remove leading and trailing white space
 		beautified_code = beautified_code.strip()
 
+		return beautified_code
+
 	def parser_beautify(self, settings, markup):
 
 		indent_with = settings.get('indent_with', "    ")