commit caa5c663befb123d158aa928716c194a11e8f51d
parent acae15af917fd4a27a924dcfe31d52bfe48bee22
Author: Dan Callaghan <djc@djc.id.au>
Date: Mon, 23 May 2022 20:20:46 +1000
scrub strings in fix_encoding! rather than triple-encoding
This will have the same effect, but it should be more efficient since in
the normal case there will be no extra allocations. This matters for
fix_encoding! since it's applied to a lot of strings everywhere.
Diffstat:
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/lib/sup/util.rb b/lib/sup/util.rb
@@ -11,6 +11,7 @@ require 'benchmark'
require 'unicode'
require 'unicode/display_width'
require 'fileutils'
+require 'string-scrub' if /^2\.0\./ =~ RUBY_VERSION
module ExtendedLockfile
def gen_lock_id
@@ -315,11 +316,8 @@ class String
# first try to encode to utf-8 from whatever current encoding
encode!('UTF-8', :invalid => :replace, :undef => :replace)
- # do this anyway in case string is set to be UTF-8, encoding to
- # something else (UTF-16 which can fully represent UTF-8) and back
- # ensures invalid chars are replaced.
- encode!('UTF-16', 'UTF-8', :invalid => :replace, :undef => :replace)
- encode!('UTF-8', 'UTF-16', :invalid => :replace, :undef => :replace)
+ # ensure invalid chars are replaced
+ scrub!
fail "Could not create valid UTF-8 string out of: '#{self.to_s}'." unless valid_encoding?