From mboxrd@z Thu Jan 1 00:00:00 1970 From: nicolas.pouillard@gmail.com (Nicolas Pouillard) Date: Tue, 19 Feb 2008 11:17:50 +0100 Subject: [sup-talk] [PATCH] Unwrap br0ken URLs. Message-ID: <12034162702820-git-send-email-nicolas.pouillard@gmail.com> --- lib/sup/message-chunks.rb | 2 +- lib/sup/util.rb | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletions(-) diff --git a/lib/sup/message-chunks.rb b/lib/sup/message-chunks.rb index 0606395..98b829c 100644 --- a/lib/sup/message-chunks.rb +++ b/lib/sup/message-chunks.rb @@ -147,7 +147,7 @@ EOS attr_reader :lines def initialize lines - @lines = lines.map { |l| l.chomp.wrap WRAP_LEN }.flatten # wrap + @lines = lines.unwrap_urls.map { |l| l.chomp.wrap WRAP_LEN }.flatten # wrap ## trim off all empty lines except one @lines.pop while @lines.length > 1 && @lines[-1] =~ /^\s*$/ && @lines[-2] =~ /^\s*$/ diff --git a/lib/sup/util.rb b/lib/sup/util.rb index ceaf0b8..99e73b4 100644 --- a/lib/sup/util.rb +++ b/lib/sup/util.rb @@ -401,6 +401,31 @@ class Array def last= e; self[-1] = e end def nonempty?; !empty? end + + URL_CHAR = /[a-zA-Z0-9\-@;\/?:&=%$_.+!*\x27()~,#]/ + URL_CHAR_LESS = /[a-zA-Z0-9\-@;\/?:&=%$_.+!*\x27()~]/ + URL_RE = %r{(?:https?://|ftp://|news://|mailto:|file://)#{URL_CHAR}+} + TRAILING_URL_RE = /#{URL_RE}$/ + LEADING_URL_RE = /^#{URL_RE}/ + URL_PART_RE = /^#{URL_CHAR}+#{URL_CHAR_LESS}/ + + def unwrap_urls + res = [] + len = size + i = 0 + while i < len do + x = self[i] + y = self[i+1] + if y && x =~ TRAILING_URL_RE && y =~ URL_PART_RE && y !~ LEADING_URL_RE + res << x.chomp + y + i += 2 + else + res << x + i += 1 + end + end + res + end end class Time -- 1.5.3.1.109.gacd69