commit 45d06190590cb21d67868b0cfa1e4aa254a637d3
parent ea9aedd24fc0985a136ab4002d85680d37aabde8
Author: William Morgan <wmorgan-sup@masanjin.net>
Date: Mon, 8 Jun 2009 14:07:51 -0400
reduce quote parsing worst-case behavior
Split a quote regex into two to increase performance on certain long strings.
Thanks to Edward Z. Yang.
Diffstat:
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/lib/sup/message.rb b/lib/sup/message.rb
@@ -26,7 +26,6 @@ class Message
QUOTE_PATTERN = /^\s{0,4}[>|\}]/
BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/
- QUOTE_START_PATTERN = /\w.*:$/
SIG_PATTERN = /(^-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/
MAX_SIG_DISTANCE = 15 # lines from the end
@@ -449,7 +448,11 @@ private
when :text
newstate = nil
- if line =~ QUOTE_PATTERN || (line =~ QUOTE_START_PATTERN && nextline =~ QUOTE_PATTERN)
+ ## the following /:$/ followed by /\w/ is an attempt to detect the
+ ## start of a quote. this is split into two regexen because the
+ ## original regex /\w.*:$/ had very poor behavior on long lines
+ ## like ":a:a:a:a:a" that occurred in certain emails.
+ if line =~ QUOTE_PATTERN || (line =~ /:$/ && line =~ /\w/ && nextline =~ QUOTE_PATTERN)
newstate = :quote
elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
newstate = :sig