commit 02e71b982e039fca40b98e4d6d4d3d8fa482240c
parent d97c127e9f43354f883d7b4215df20478f41b991
Author: Eric Weikl <eric.weikl@gmx.net>
Date: Sun, 4 Aug 2013 15:24:59 +0200
Merge branch 'sup-heliotrope/develop' into maildir-sync
Diffstat:
15 files changed, 174 insertions(+), 85 deletions(-)
diff --git a/Gemfile b/Gemfile
@@ -1,3 +1,3 @@
-source 'http://rubygems.org/'
+source 'https://rubygems.org/'
gemspec
diff --git a/bin/sup b/bin/sup
@@ -158,7 +158,11 @@ begin
$die = false
trap("TERM") { |x| $die = true }
- trap("WINCH") { |x| BufferManager.sigwinch_happened! }
+ trap("WINCH") do |x|
+ ::Thread.new do
+ BufferManager.sigwinch_happened!
+ end
+ end
if(s = Redwood::SourceManager.source_for DraftManager.source_name)
DraftManager.source = s
@@ -290,8 +294,8 @@ begin
b.mode.load_in_background if new
when :search
completions = LabelManager.all_labels.map { |l| "label:#{LabelManager.string_for l}" }
- completions = completions.each { |l| l.force_encoding 'UTF-8' if l.methods.include?(:encoding) }
- completions += ["from:", "to:", "after:", "before:", "date:", "limit:", "AND", "OR", "NOT"]
+ completions = completions.each { |l| l.fix_encoding }
+ completions += Index::COMPL_PREFIXES
query = BufferManager.ask_many_with_completions :search, "Search all messages (enter for saved searches): ", completions
unless query.nil?
if query.empty?
@@ -304,7 +308,7 @@ begin
SearchResultsMode.spawn_from_query "is:unread"
when :list_labels
labels = LabelManager.all_labels.map { |l| LabelManager.string_for l }
- labels = labels.each { |l| l.force_encoding 'UTF-8' if l.methods.include?(:encoding) }
+ labels = labels.each { |l| l.fix_encoding }
user_label = bm.ask_with_completions :label, "Show threads with label (enter for listing): ", labels
unless user_label.nil?
diff --git a/lib/sup/buffer.rb b/lib/sup/buffer.rb
@@ -126,14 +126,11 @@ class Buffer
@w.attrset Colormap.color_for(opts[:color] || :none, opts[:highlight])
s ||= ""
maxl = @width - x # maximum display width width
- stringl = maxl # string "length"
# fill up the line with blanks to overwrite old screen contents
@w.mvaddstr y, x, " " * maxl unless opts[:no_fill]
- ## the next horribleness is thanks to ruby's lack of widechar support
- stringl += 1 while stringl < s.length && s[0 ... stringl].display_length < maxl
- @w.mvaddstr y, x, s[0 ... stringl]
+ @w.mvaddstr y, x, s.slice_by_display_length(maxl)
end
def clear
@@ -450,7 +447,7 @@ EOS
def ask_with_completions domain, question, completions, default=nil
ask domain, question, default do |s|
- s.force_encoding 'UTF-8' if s.methods.include?(:encoding)
+ s.fix_encoding
completions.select { |x| x =~ /^#{Regexp::escape s}/iu }.map { |x| [x, x] }
end
end
@@ -467,8 +464,8 @@ EOS
raise "william screwed up completion: #{partial.inspect}"
end
- prefix.force_encoding 'UTF-8' if prefix.methods.include?(:encoding)
- target.force_encoding 'UTF-8' if target.methods.include?(:encoding)
+ prefix.fix_encoding
+ target.fix_encoding
completions.select { |x| x =~ /^#{Regexp::escape target}/i }.map { |x| [prefix + x, x] }
end
end
@@ -477,10 +474,10 @@ EOS
ask domain, question, default do |partial|
prefix, target = partial.split_on_commas_with_remainder
target ||= prefix.pop || ""
- target.force_encoding 'UTF-8' if target.methods.include?(:encoding)
+ target.fix_encoding
prefix = prefix.join(", ") + (prefix.empty? ? "" : ", ")
- prefix.force_encoding 'UTF-8' if prefix.methods.include?(:encoding)
+ prefix.fix_encoding
completions.select { |x| x =~ /^#{Regexp::escape target}/i }.sort_by { |c| [ContactManager.contact_for(c) ? 0 : 1, c] }.map { |x| [prefix + x, x] }
end
@@ -622,7 +619,7 @@ EOS
tf.deactivate
draw_screen :sync => false, :status => status, :title => title
end
- tf.value.tap { |x| x.force_encoding Encoding::UTF_8 if x && x.respond_to?(:encoding) }
+ tf.value.tap { |x| x.fix_encoding if x }
end
def ask_getch question, accept=nil
diff --git a/lib/sup/crypto.rb b/lib/sup/crypto.rb
@@ -275,7 +275,7 @@ EOS
end
plain_data.seek(0, IO::SEEK_SET)
output = plain_data.read
- output.force_encoding Encoding::ASCII_8BIT if output.respond_to? :force_encoding
+ output.transcode(Encoding::ASCII_8BIT, output.encoding)
## TODO: test to see if it is still necessary to do a 2nd run if verify
## fails.
@@ -314,7 +314,7 @@ EOS
msg = RMail::Parser.read output
if msg.header.content_type =~ %r{^multipart/} && !msg.multipart?
output = "MIME-Version: 1.0\n" + output
- output.force_encoding Encoding::ASCII_8BIT if output.respond_to? :force_encoding
+ output.fix_encoding
msg = RMail::Parser.read output
end
end
diff --git a/lib/sup/index.rb b/lib/sup/index.rb
@@ -1,4 +1,5 @@
ENV["XAPIAN_FLUSH_THRESHOLD"] = "1000"
+ENV["XAPIAN_CJK_NGRAM"] = "1"
require 'xapian'
require 'set'
@@ -350,6 +351,48 @@ EOS
class ParseError < StandardError; end
+ # Stemmed
+ NORMAL_PREFIX = {
+ 'subject' => {:prefix => 'S', :exclusive => false},
+ 'body' => {:prefix => 'B', :exclusive => false},
+ 'from_name' => {:prefix => 'FN', :exclusive => false},
+ 'to_name' => {:prefix => 'TN', :exclusive => false},
+ 'name' => {:prefix => %w(FN TN), :exclusive => false},
+ 'attachment' => {:prefix => 'A', :exclusive => false},
+ 'email_text' => {:prefix => 'E', :exclusive => false},
+ '' => {:prefix => %w(S B FN TN A E), :exclusive => false},
+ }
+
+ # Unstemmed
+ BOOLEAN_PREFIX = {
+ 'type' => {:prefix => 'K', :exclusive => true},
+ 'from_email' => {:prefix => 'FE', :exclusive => false},
+ 'to_email' => {:prefix => 'TE', :exclusive => false},
+ 'email' => {:prefix => %w(FE TE), :exclusive => false},
+ 'date' => {:prefix => 'D', :exclusive => true},
+ 'label' => {:prefix => 'L', :exclusive => false},
+ 'source_id' => {:prefix => 'I', :exclusive => true},
+ 'attachment_extension' => {:prefix => 'O', :exclusive => false},
+ 'msgid' => {:prefix => 'Q', :exclusive => true},
+ 'id' => {:prefix => 'Q', :exclusive => true},
+ 'thread' => {:prefix => 'H', :exclusive => false},
+ 'ref' => {:prefix => 'R', :exclusive => false},
+ 'location' => {:prefix => 'J', :exclusive => false},
+ }
+
+ PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
+
+ COMPL_OPERATORS = %w[AND OR NOT]
+ COMPL_PREFIXES = (
+ %w[
+ from to
+ is has label
+ filename filetypem
+ before on in during after
+ limit
+ ] + NORMAL_PREFIX.keys + BOOLEAN_PREFIX.keys
+ ).map{|p|"#{p}:"} + COMPL_OPERATORS
+
## parse a query string from the user. returns a query object
## that can be passed to any index method with a 'query'
## argument.
@@ -524,37 +567,6 @@ EOS
private
- # Stemmed
- NORMAL_PREFIX = {
- 'subject' => {:prefix => 'S', :exclusive => false},
- 'body' => {:prefix => 'B', :exclusive => false},
- 'from_name' => {:prefix => 'FN', :exclusive => false},
- 'to_name' => {:prefix => 'TN', :exclusive => false},
- 'name' => {:prefix => %w(FN TN), :exclusive => false},
- 'attachment' => {:prefix => 'A', :exclusive => false},
- 'email_text' => {:prefix => 'E', :exclusive => false},
- '' => {:prefix => %w(S B FN TN A E), :exclusive => false},
- }
-
- # Unstemmed
- BOOLEAN_PREFIX = {
- 'type' => {:prefix => 'K', :exclusive => true},
- 'from_email' => {:prefix => 'FE', :exclusive => false},
- 'to_email' => {:prefix => 'TE', :exclusive => false},
- 'email' => {:prefix => %w(FE TE), :exclusive => false},
- 'date' => {:prefix => 'D', :exclusive => true},
- 'label' => {:prefix => 'L', :exclusive => false},
- 'source_id' => {:prefix => 'I', :exclusive => true},
- 'attachment_extension' => {:prefix => 'O', :exclusive => false},
- 'msgid' => {:prefix => 'Q', :exclusive => true},
- 'id' => {:prefix => 'Q', :exclusive => true},
- 'thread' => {:prefix => 'H', :exclusive => false},
- 'ref' => {:prefix => 'R', :exclusive => false},
- 'location' => {:prefix => 'J', :exclusive => false},
- }
-
- PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
-
MSGID_VALUENO = 0
THREAD_VALUENO = 1
DATE_VALUENO = 2
diff --git a/lib/sup/message.rb b/lib/sup/message.rb
@@ -264,6 +264,8 @@ class Message
rescue SourceError, SocketError, RMail::EncodingUnsupportedError => e
warn "problem reading message #{id}"
[Chunk::Text.new(error_message.split("\n"))]
+
+ debug "could not load message: #{location.inspect}, exception: #{e.inspect}"
end
end
diff --git a/lib/sup/modes/thread_index_mode.rb b/lib/sup/modes/thread_index_mode.rb
@@ -886,14 +886,14 @@ protected
abbrev =
if cur_width + name.display_length > from_width
- name[0 ... (from_width - cur_width - 1)] + "."
+ name.slice_by_display_length(from_width - cur_width - 1) + "."
elsif cur_width + name.display_length == from_width
- name[0 ... (from_width - cur_width)]
+ name.slice_by_display_length(from_width - cur_width)
else
if last
- name[0 ... (from_width - cur_width)]
+ name.slice_by_display_length(from_width - cur_width)
else
- name[0 ... (from_width - cur_width - 1)] + ","
+ name.slice_by_display_length(from_width - cur_width - 1) + ","
end
end
diff --git a/lib/sup/sent.rb b/lib/sup/sent.rb
@@ -25,7 +25,10 @@ class SentManager
end
def write_sent_message date, from_email, &block
- @source.store_message date, from_email, &block
+ debug "store the sent message (locking sent source..)"
+ @source.poll_lock.synchronize do
+ @source.store_message date, from_email, &block
+ end
PollManager.poll_from @source
end
end
diff --git a/lib/sup/source.rb b/lib/sup/source.rb
@@ -22,30 +22,23 @@ class Source
## read, delete them, or anything else. (Well, it's nice to be able
## to delete them, but that is optional.)
##
- ## On the other hand, Sup assumes that you can assign each message a
- ## unique integer id, such that newer messages have higher ids than
- ## earlier ones, and that those ids stay constant across sessions
- ## (in the absence of some other client going in and fucking
- ## everything up). For example, for mboxes I use the file offset of
- ## the start of the message. If a source does NOT have that
- ## capability, e.g. IMAP, then you have to do a little more work to
- ## simulate it.
+ ## Messages are identified internally based on the message id, and stored
+ ## with an unique document id. Along with the message, source information
+ ## that can contain arbitrary fields (set up by the source) is stored. This
+ ## information will be passed back to the source when a message in the
+ ## index (Sup database) needs to be identified to its source, e.g. when
+ ## re-reading or modifying a unique message.
##
## To write a new source, subclass this class, and implement:
##
- ## - start_offset
- ## - end_offset (exclusive!) (or, #done?)
+ ## - initialize
## - load_header offset
## - load_message offset
## - raw_header offset
## - raw_message offset
- ## - check (optional)
+ ## - store_message (optional)
+ ## - poll (loads new messages)
## - go_idle (optional)
- ## - next (or each, if you prefer): should return a message and an
- ## array of labels.
- ##
- ## ... where "offset" really means unique id. (You can tell I
- ## started with mbox.)
##
## All exceptions relating to accessing the source must be caught
## and rethrown as FatalSourceErrors or OutOfSyncSourceErrors.
@@ -57,8 +50,7 @@ class Source
## Finally, be sure the source is thread-safe, since it WILL be
## pummelled from multiple threads at once.
##
- ## Examples for you to look at: mbox/loader.rb, imap.rb, and
- ## maildir.rb.
+ ## Examples for you to look at: mbox.rb and maildir.rb.
bool_accessor :usual, :archived
attr_reader :uri
diff --git a/lib/sup/util.rb b/lib/sup/util.rb
@@ -7,6 +7,7 @@ require 'pathname'
require 'set'
require 'enumerator'
require 'benchmark'
+require 'unicode'
## time for some monkeypatching!
class Symbol
@@ -254,14 +255,14 @@ class Object
end
class String
- ## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using
- ## the utf8 regex and count those. otherwise, use the byte length.
def display_length
- if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8")
- # scan hack is somewhat slow, worth trying to cache
- @display_length ||= scan(/./u).size
- else
- size
+ @display_length ||= Unicode.width(self, false)
+ end
+
+ def slice_by_display_length len
+ each_char.each_with_object "" do |c, buffer|
+ len -= c.display_length
+ buffer << c if len >= 0
end
end
@@ -348,14 +349,14 @@ class String
def wrap len
ret = []
s = self
- while s.length > len
- cut = s[0 ... len].rindex(/\s/)
+ while s.display_length > len
+ cut = s.slice_by_display_length(len).rindex(/\s/)
if cut
ret << s[0 ... cut]
s = s[(cut + 1) .. -1]
else
- ret << s[0 ... len]
- s = s[len .. -1]
+ ret << s.slice_by_display_length(len)
+ s = s[ret.last.length .. -1]
end
end
ret << s
@@ -379,7 +380,7 @@ class String
# now convert to $encoding
encode!($encoding, :invalid => :replace, :undef => :replace)
- fail "Could not create valid #{$encoding.inspect?} string out of: '#{self.to_s}'." unless valid_encoding?
+ fail "Could not create valid #{$encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding?
self
end
@@ -403,7 +404,7 @@ class String
fix_encoding
end
- fail "Could not create valid #{to_encoding.inspect?} string out of: '#{self.to_s}'." unless valid_encoding?
+ fail "Could not create valid #{to_encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding?
self
end
diff --git a/lib/sup/util/query.rb b/lib/sup/util/query.rb
@@ -1,8 +1,13 @@
module Redwood
module Util
module Query
+ class QueryDescriptionError < ArgumentError; end
+
def self.describe query
- query.description.force_encoding("UTF-8")
+ d = query.description.force_encoding("UTF-8")
+
+ raise QueryDescriptionError.new(d) unless d.valid_encoding?
+ return d
end
end
end
diff --git a/sup.gemspec b/sup.gemspec
@@ -52,6 +52,7 @@ SUP: Please run `sup-psych-ify-config-files` to migrate from 0.13 to 0.14
s.add_runtime_dependency "mime-types", "~> 1.0"
s.add_runtime_dependency "locale", "~> 2.0"
s.add_runtime_dependency "chronic", "~> 0.9.1"
+ s.add_runtime_dependency "unicode", "~> 0.4.4"
s.add_development_dependency "bundler", "~> 1.3"
s.add_development_dependency "rake"
diff --git a/test/unit/util/test_query.rb b/test/unit/util/test_query.rb
@@ -18,5 +18,20 @@ describe Redwood::Util::Query do
desc = Redwood::Util::Query.describe(query)
_ = (life + desc) # No exception thrown
end
+
+ it "returns a valid UTF-8 description of bad input" do
+ msg = "asdfa \xc3\x28 åasdf"
+ query = Xapian::Query.new msg
+ life = 'hæi'
+
+ # this is now possibly UTF-8 string with possibly invalid chars
+ assert_raises Redwood::Util::Query::QueryDescriptionError do
+ desc = Redwood::Util::Query.describe (query)
+ end
+
+ assert_raises Encoding::CompatibilityError do
+ _ = life + query.description
+ end
+ end
end
end
diff --git a/test/unit/util/test_string.rb b/test/unit/util/test_string.rb
@@ -0,0 +1,57 @@
+# encoding: utf-8
+
+require "test_helper"
+
+require "sup/util"
+
+describe "Sup's String extension" do
+ describe "#display_length" do
+ let :data do
+ [
+ ['some words', 10,],
+ ['中文', 4,],
+ ['ä', 1,],
+ ]
+ end
+
+ it "calculates display length of a string" do
+ data.each do |(str, length)|
+ str.display_length.must_equal length
+ end
+ end
+ end
+
+ describe "#slice_by_display_length(len)" do
+ let :data do
+ [
+ ['some words', 6, 'some w'],
+ ['中文', 2, '中'],
+ ['älpha', 3, 'älp'],
+ ]
+ end
+
+ it "slices string by display length" do
+ data.each do |(str, length, sliced)|
+ str.slice_by_display_length(length).must_equal sliced
+ end
+ end
+ end
+
+ describe "#wrap" do
+ let :data do
+ [
+ ['some words', 6, ['some', 'words']],
+ ['some words', 80, ['some words']],
+ ['中文', 2, ['中', '文']],
+ ['中文', 5, ['中文']],
+ ['älpha', 3, ['älp', 'ha']],
+ ]
+ end
+
+ it "wraps string by display length" do
+ data.each do |(str, length, wrapped)|
+ str.wrap(length).must_equal wrapped
+ end
+ end
+ end
+end
diff --git a/test/unit/util/uri.rb b/test/unit/util/test_uri.rb