Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,19 @@ name: Test
- pull_request

jobs:
# rubocop:
# runs-on: ubuntu-latest
# env:
# CI: true
# steps:
# - uses: actions/checkout@v3
# - name: Set up Ruby 3.4
# uses: ruby/setup-ruby@v1
# with:
# ruby-version: 3.4
# bundler-cache: true
# - name: Run RuboCop
# run: bundle exec rubocop --parallel
rubocop:
runs-on: ubuntu-latest
env:
CI: true
steps:
- uses: actions/checkout@v3
- name: Set up Ruby 3.3
uses: ruby/setup-ruby@v1
with:
ruby-version: 3.3
bundler-cache: true
- name: Run RuboCop
run: bundle exec rubocop --parallel

test:
name: "${{matrix.ruby}} ${{matrix.os || 'ubuntu-latest'}}"
Expand Down
42 changes: 42 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# TODO: Enable plugins when upgrading to Rubocop
# plugins:
# - rubocop-performance
# - rubocop-rake
# - rubocop-rspec

AllCops:
TargetRubyVersion: 2.4
NewCops: enable
SuggestExtensions: false

Layout/LineLength:
Exclude:
- 'spec/**/*'

Metrics/AbcSize:
Enabled: false

Metrics/BlockLength:
Enabled: false

Metrics/BlockNesting:
Enabled: false

Metrics/MethodLength:
Enabled: false

Naming/FileName:
Exclude:
- 'lib/utf8-cleaner.rb'

Style/AsciiComments:
Enabled: false

# RSpec/IndexedLet:
# Enabled: false
#
# RSpec/MessageSpies:
# EnforcedStyle: receive
#
# RSpec/MultipleMemoizedHelpers:
# Enabled: false
8 changes: 8 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,12 @@

source 'https://rubygems.org'

gem 'rack-test'
gem 'rake'
gem 'rspec'
gem 'rubocop', '< 1.13.0'
gem 'rubocop-performance'
gem 'rubocop-rake'
gem 'rubocop-rspec'

gemspec
9 changes: 6 additions & 3 deletions lib/utf8-cleaner/middleware.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
require 'active_support/multibyte/unicode'

module UTF8Cleaner
# Rack middleware to sanitize non-UTF8 chars in
# environment variables and request input.
class Middleware
SANITIZE_ENV_KEYS = %w[
HTTP_REFERER
Expand Down Expand Up @@ -35,6 +37,7 @@ def sanitize_env(env)
def sanitize_env_keys(env)
SANITIZE_ENV_KEYS.each do |key|
next unless (value = env[key])

env[key] = cleaned_string(value)
end
end
Expand All @@ -57,10 +60,10 @@ def sanitize_env_rack_input(env)
return unless input_data && !input_data.ascii_only?

env['rack.input'] = StringIO.new(tidy_bytes(input_data))
else
# Do not process multipart/form-data since it may contain binary content.
# Leave all other unknown content types alone.
end
# Else:
# - Do not process multipart/form-data since it may contain binary content.
# - Leave all other unknown content types alone.
end

def read_input(input)
Expand Down
1 change: 1 addition & 0 deletions lib/utf8-cleaner/railtie.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# frozen_string_literal: true

module UTF8Cleaner
# Railtie to bootstrap UTF8Cleaner::Middleware in a Rails application.
class Railtie < Rails::Railtie
initializer('utf8-cleaner.insert_middleware') do |app|
app.config.middleware.insert_before(0, UTF8Cleaner::Middleware)
Expand Down
32 changes: 14 additions & 18 deletions lib/utf8-cleaner/uri_string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ class URIString
attr_accessor :data

HEX_CHARS = '0-9a-fA-F'
HEX_CHARS_REGEX = /[#{HEX_CHARS}]/
INVALID_PERCENT_ENCODING_REGEX = /%(?![#{HEX_CHARS}]{2})/
HEX_CHARS_REGEX = /[#{HEX_CHARS}]/.freeze
INVALID_PERCENT_ENCODING_REGEX = /%(?![#{HEX_CHARS}]{2})/.freeze

def initialize(data)
self.data = data
Expand All @@ -34,7 +34,7 @@ def encoded_char_array
char_array = []
index = 0

while index < data.length do
while index < data.length
char = data[index]

if char == '%'
Expand All @@ -43,20 +43,20 @@ def encoded_char_array
skip_next = 2

# If the next character is not a hex char, drop the percent and it
unless data[index + 1] =~ HEX_CHARS_REGEX
unless HEX_CHARS_REGEX.match?(data[index + 1])
index += 2
next
end

# If the character after that is not a hex char, drop the percent and
# both of the following chars.
unless data[index + 2] =~ HEX_CHARS_REGEX
unless HEX_CHARS_REGEX.match?(data[index + 2])
index += 3
next
end

# How long is this character?
first_byte = '0x' + (data[index + 1] + data[index + 2]).upcase
first_byte = "0x#{(data[index + 1] + data[index + 2]).upcase}"
bytes = utf8_char_length_in_bytes(first_byte)

# Grab the specified number of encoded bytes
Expand All @@ -74,7 +74,7 @@ def encoded_char_array

# If we're dealing with a multibyte character, skip more than two
# of the next characters, which have already been processed.
skip_next = bytes * 3 - 1
skip_next = (bytes * 3) - 1
end
end
index += skip_next
Expand All @@ -92,9 +92,8 @@ def valid_uri_encoded_utf8(string)
URI::DEFAULT_PARSER.unescape(string).force_encoding('UTF-8').valid_encoding? &&
string !~ INVALID_PERCENT_ENCODING_REGEX
rescue ArgumentError => e
if e.message =~ /invalid byte sequence/
return false
end
return false if e.message.include?('invalid byte sequence')

raise e
end

Expand All @@ -103,16 +102,13 @@ def valid_uri_encoded_utf8(string)
def next_n_bytes_from(index, num_bytes)
return [] if data.length < index + (3 * num_bytes)

num_bytes.times.map do |n|
Array.new(num_bytes) do |n|
# Look for percent signs in the right places
pct_index = index + (3 * n)
if data[pct_index] == '%'
byte = data[pct_index + 1..pct_index + 2]
else
# An expected percent sign was missing. The whole character is invalid.
return []
end
'%' + byte
return [] unless data[pct_index] == '%'

# An expected percent sign was missing. The whole character is invalid.
"%#{data[(pct_index + 1)..(pct_index + 2)]}"
end
end

Expand Down
Loading
Loading