singlebrook · johnnyshields · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -6,19 +6,19 @@ name: Test
   - pull_request
 
 jobs:
-  # rubocop:
-  #   runs-on: ubuntu-latest
-  #   env:
-  #     CI: true
-  #   steps:
-  #     - uses: actions/checkout@v3
-  #     - name: Set up Ruby 3.4
-  #       uses: ruby/setup-ruby@v1
-  #       with:
-  #         ruby-version: 3.4
-  #         bundler-cache: true
-  #     - name: Run RuboCop
-  #       run: bundle exec rubocop --parallel
+  rubocop:
+    runs-on: ubuntu-latest
+    env:
+      CI: true
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Ruby 3.3
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: 3.3
+          bundler-cache: true
+      - name: Run RuboCop
+        run: bundle exec rubocop --parallel
 
   test:
     name: "${{matrix.ruby}} ${{matrix.os || 'ubuntu-latest'}}"

diff --git a/.rubocop.yml b/.rubocop.yml
@@ -0,0 +1,42 @@
+# TODO: Enable plugins when upgrading to Rubocop
+# plugins:
+#   - rubocop-performance
+#   - rubocop-rake
+#   - rubocop-rspec
+
+AllCops:
+  TargetRubyVersion: 2.4
+  NewCops: enable
+  SuggestExtensions: false
+
+Layout/LineLength:
+  Exclude:
+    - 'spec/**/*'
+
+Metrics/AbcSize:
+  Enabled: false
+
+Metrics/BlockLength:
+  Enabled: false
+
+Metrics/BlockNesting:
+  Enabled: false
+
+Metrics/MethodLength:
+  Enabled: false
+
+Naming/FileName:
+  Exclude:
+    - 'lib/utf8-cleaner.rb'
+
+Style/AsciiComments:
+  Enabled: false
+
+# RSpec/IndexedLet:
+#   Enabled: false
+#
+# RSpec/MessageSpies:
+#   EnforcedStyle: receive
+#
+# RSpec/MultipleMemoizedHelpers:
+#   Enabled: false
diff --git a/Gemfile b/Gemfile
@@ -2,4 +2,12 @@
 
 source 'https://rubygems.org'
 
+gem 'rack-test'
+gem 'rake'
+gem 'rspec'
+gem 'rubocop', '< 1.13.0'
+gem 'rubocop-performance'
+gem 'rubocop-rake'
+gem 'rubocop-rspec'
+
 gemspec
diff --git a/lib/utf8-cleaner/middleware.rb b/lib/utf8-cleaner/middleware.rb
@@ -3,6 +3,8 @@
 require 'active_support/multibyte/unicode'
 
 module UTF8Cleaner
+  # Rack middleware to sanitize non-UTF8 chars in
+  # environment variables and request input.
   class Middleware
     SANITIZE_ENV_KEYS = %w[
       HTTP_REFERER
@@ -35,6 +37,7 @@ def sanitize_env(env)
     def sanitize_env_keys(env)
       SANITIZE_ENV_KEYS.each do |key|
         next unless (value = env[key])
+
         env[key] = cleaned_string(value)
       end
     end
@@ -57,10 +60,10 @@ def sanitize_env_rack_input(env)
         return unless input_data && !input_data.ascii_only?
 
         env['rack.input'] = StringIO.new(tidy_bytes(input_data))
-      else
-        # Do not process multipart/form-data since it may contain binary content.
-        # Leave all other unknown content types alone.
       end
+      # Else:
+      # - Do not process multipart/form-data since it may contain binary content.
+      # - Leave all other unknown content types alone.
     end
 
     def read_input(input)

diff --git a/lib/utf8-cleaner/railtie.rb b/lib/utf8-cleaner/railtie.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 module UTF8Cleaner
+  # Railtie to bootstrap UTF8Cleaner::Middleware in a Rails application.
   class Railtie < Rails::Railtie
     initializer('utf8-cleaner.insert_middleware') do |app|
       app.config.middleware.insert_before(0, UTF8Cleaner::Middleware)

diff --git a/lib/utf8-cleaner/uri_string.rb b/lib/utf8-cleaner/uri_string.rb
@@ -6,8 +6,8 @@ class URIString
     attr_accessor :data
 
     HEX_CHARS = '0-9a-fA-F'
-    HEX_CHARS_REGEX = /[#{HEX_CHARS}]/
-    INVALID_PERCENT_ENCODING_REGEX = /%(?![#{HEX_CHARS}]{2})/
+    HEX_CHARS_REGEX = /[#{HEX_CHARS}]/.freeze
+    INVALID_PERCENT_ENCODING_REGEX = /%(?![#{HEX_CHARS}]{2})/.freeze
 
     def initialize(data)
       self.data = data
@@ -34,7 +34,7 @@ def encoded_char_array
       char_array = []
       index = 0
 
-      while index < data.length do
+      while index < data.length
         char = data[index]
 
         if char == '%'
@@ -43,20 +43,20 @@ def encoded_char_array
           skip_next = 2
 
           # If the next character is not a hex char, drop the percent and it
-          unless data[index + 1] =~ HEX_CHARS_REGEX
+          unless HEX_CHARS_REGEX.match?(data[index + 1])
             index += 2
             next
           end
 
           # If the character after that is not a hex char, drop the percent and
           # both of the following chars.
-          unless data[index + 2] =~ HEX_CHARS_REGEX
+          unless HEX_CHARS_REGEX.match?(data[index + 2])
             index += 3
             next
           end
 
           # How long is this character?
-          first_byte = '0x' + (data[index + 1] + data[index + 2]).upcase
+          first_byte = "0x#{(data[index + 1] + data[index + 2]).upcase}"
           bytes = utf8_char_length_in_bytes(first_byte)
 
           # Grab the specified number of encoded bytes
@@ -74,7 +74,7 @@ def encoded_char_array
 
               # If we're dealing with a multibyte character, skip more than two
               # of the next characters, which have already been processed.
-              skip_next = bytes * 3 - 1
+              skip_next = (bytes * 3) - 1
             end
           end
           index += skip_next
@@ -92,9 +92,8 @@ def valid_uri_encoded_utf8(string)
       URI::DEFAULT_PARSER.unescape(string).force_encoding('UTF-8').valid_encoding? &&
         string !~ INVALID_PERCENT_ENCODING_REGEX
     rescue ArgumentError => e
-      if e.message =~ /invalid byte sequence/
-        return false
-      end
+      return false if e.message.include?('invalid byte sequence')
+
       raise e
     end
 
@@ -103,16 +102,13 @@ def valid_uri_encoded_utf8(string)
     def next_n_bytes_from(index, num_bytes)
       return [] if data.length < index + (3 * num_bytes)
 
-      num_bytes.times.map do |n|
+      Array.new(num_bytes) do |n|
         # Look for percent signs in the right places
         pct_index = index + (3 * n)
-        if data[pct_index] == '%'
-          byte = data[pct_index + 1..pct_index + 2]
-        else
-          # An expected percent sign was missing. The whole character is invalid.
-          return []
-        end
-        '%' + byte
+        return [] unless data[pct_index] == '%'
+
+        # An expected percent sign was missing. The whole character is invalid.
+        "%#{data[(pct_index + 1)..(pct_index + 2)]}"
       end
     end