From 81aa7b25bfec90f983a18cff2b5e46cbd7d5cdcb Mon Sep 17 00:00:00 2001 From: John Shahid Date: Sat, 21 Jul 2012 15:36:59 -0400 Subject: [PATCH 1/3] [encoding] add a new test to make sure that changing encoding of ASCII substrings that originated from binary strings doesn't fail. --- test/ruby/test_encoding.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/ruby/test_encoding.rb b/test/ruby/test_encoding.rb index ef2dc39c4d0328..850f21b4134a5a 100644 --- a/test/ruby/test_encoding.rb +++ b/test/ruby/test_encoding.rb @@ -17,6 +17,17 @@ def test_encoding end end + def test_encoding_of_ascii_originating_from_binary + binary_string = "\x82foo" + ascii_string = binary_string.bytes.to_a.pack('c*')[1..-1] + assert_equal "foo", ascii_string + assert_equal "ASCII-8BIT", ascii_string.encoding.name + utf8_string = ascii_string.encode("UTF-8") + assert_equal "foo", utf8_string + assert_equal "UTF-8", utf8_string.encoding.name + puts "End" + end + def test_enc_names aliases = Encoding.aliases aliases.each do |a, en| From 32c63d97add4d403d158b27de413bb4b0c51a681 Mon Sep 17 00:00:00 2001 From: John Shahid Date: Sun, 29 Jul 2012 14:45:46 -0400 Subject: [PATCH 2/3] move the test to test/ruby/test_transcode.rb and change the test to look like what @tenderlove and @nobu suggested. --- test/ruby/test_encoding.rb | 11 ----------- test/ruby/test_transcode.rb | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/test/ruby/test_encoding.rb b/test/ruby/test_encoding.rb index 850f21b4134a5a..ef2dc39c4d0328 100644 --- a/test/ruby/test_encoding.rb +++ b/test/ruby/test_encoding.rb @@ -17,17 +17,6 @@ def test_encoding end end - def test_encoding_of_ascii_originating_from_binary - binary_string = "\x82foo" - ascii_string = binary_string.bytes.to_a.pack('c*')[1..-1] - assert_equal "foo", ascii_string - assert_equal "ASCII-8BIT", ascii_string.encoding.name - utf8_string = ascii_string.encode("UTF-8") - assert_equal "foo", utf8_string - assert_equal "UTF-8", utf8_string.encoding.name - puts "End" - end - def test_enc_names aliases = Encoding.aliases aliases.each do |a, en| diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 9cb42c61d27f78..0cea999ffaa7f2 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -60,6 +60,30 @@ def check_both_ways2(str1, enc1, str2, enc2) assert_equal(str2.force_encoding(enc2), str1.encode(enc2, enc1)) end + def test_encoding_of_ascii_originating_from_binary + binary_string = [0x82, 0x66, 0x6f, 0x6f] + class << binary_string + # create a copy on write substring that contains + # just the ascii characters (i.e. foo), in JRuby + # the underlying string have the same buffer backing + # it up, but the offset of the string will be 1 instead + # of 0. + def make_cow_substring + pack('C4').slice(1, 3) + end + end + + ascii_string = binary_string.make_cow_substring + assert_equal("foo", ascii_string) + assert_equal(Encoding::ASCII_8BIT, ascii_string.encoding) + utf8_string = nil + assert_nothing_raised("JRUBY-6764") do + utf8_string = ascii_string.encode(Encoding::UTF_8) + end + assert_equal("foo", utf8_string) + assert_equal(Encoding::UTF_8, utf8_string.encoding) + end + def test_encodings check_both_ways("\u307E\u3064\u3082\u3068 \u3086\u304D\u3072\u308D", "\x82\xdc\x82\xc2\x82\xe0\x82\xc6 \x82\xe4\x82\xab\x82\xd0\x82\xeb", 'shift_jis') # まつもと ゆきひろ From a8012ee3b2f56f1213709dae9c55150a640f2ff0 Mon Sep 17 00:00:00 2001 From: John Shahid Date: Thu, 2 Aug 2012 07:40:04 -0400 Subject: [PATCH 3/3] make the test string a little longer. --- test/ruby/test_transcode.rb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 0cea999ffaa7f2..8052c821942aa8 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -61,26 +61,28 @@ def check_both_ways2(str1, enc1, str2, enc2) end def test_encoding_of_ascii_originating_from_binary - binary_string = [0x82, 0x66, 0x6f, 0x6f] + binary_string = [0x82, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, + 0x61, 0x20, 0x76, 0x65, 0x72, 0x79, 0x20, 0x6c, 0x6f, + 0x6e, 0x67, 0x20, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67] class << binary_string # create a copy on write substring that contains - # just the ascii characters (i.e. foo), in JRuby + # just the ascii characters (i.e. this is...), in JRuby # the underlying string have the same buffer backing # it up, but the offset of the string will be 1 instead # of 0. def make_cow_substring - pack('C4').slice(1, 3) + pack('C27').slice(1, 26) end end ascii_string = binary_string.make_cow_substring - assert_equal("foo", ascii_string) + assert_equal("this is a very long string", ascii_string) assert_equal(Encoding::ASCII_8BIT, ascii_string.encoding) utf8_string = nil assert_nothing_raised("JRUBY-6764") do utf8_string = ascii_string.encode(Encoding::UTF_8) end - assert_equal("foo", utf8_string) + assert_equal("this is a very long string", utf8_string) assert_equal(Encoding::UTF_8, utf8_string.encoding) end