diff --git a/docdiff.rb b/docdiff.rb index c3ef35e..d504390 100755 --- a/docdiff.rb +++ b/docdiff.rb @@ -61,6 +61,26 @@ class DocDiff doc1.encoding, doc1.eol) after_change = Document.new(line[2].to_s, doc2.encoding, doc2.eol) + Difference.new(before_change.split_to_word_verbose, + after_change.split_to_word_verbose).each{|word| + words << word + } + else # :common_elt_elt, :del_elt, or :add_elt + words << line + end + } + words + end + + def compare_by_wordwrap(doc1, doc2) + lines = compare_by_line(doc1, doc2) + words = Difference.new + lines.each{|line| + if line.first == :change_elt + before_change = Document.new(line[1].to_s, + doc1.encoding, doc1.eol) + after_change = Document.new(line[2].to_s, + doc2.encoding, doc2.eol) Difference.new(before_change.split_to_word, after_change.split_to_word).each{|word| words << word @@ -120,6 +140,7 @@ class DocDiff case option[:resolution] when "line"; then difference = compare_by_line(doc1, doc2) when "word"; then difference = compare_by_line_word(doc1, doc2) + when "wordwrap"; then difference = compare_by_wordwrap(doc1, doc2) when "char"; then difference = compare_by_line_word_char(doc1, doc2) else raise "Unsupported resolution: #{option[:resolution].inspect}" @@ -184,7 +205,7 @@ if $0 == __FILE__ # do_config_stuff default_config = { - :resolution => "word", + :resolution => "wordwrap", :encoding => "auto", :eol => "auto", :format => "html", @@ -198,18 +219,20 @@ if $0 == __FILE__ # if invoked as "worddiff" or "chardiff", # appropriate resolution is set respectively. case File.basename($0, ".*") + when "wordwrapdiff" then; clo[:resolution] = "wordwrap" when "worddiff" then; clo[:resolution] = "word" when "chardiff" then; clo[:resolution] = "char" end ARGV.options {|o| o.def_option('--resolution=RESOLUTION', - possible_resolutions = ['line', 'word', 'char'], + possible_resolutions = ['line', 'word', 'wordwrap', 'char'], 'specify resolution (granularity)', possible_resolutions.join('|') + ' (default is word)' ){|clo[:resolution]| clo[:resolution] ||= "word"} o.def_option('--line', 'set resolution to line'){clo[:resolution] = "line"} o.def_option('--word', 'set resolution to word'){clo[:resolution] = "word"} + o.def_option('--wordwrap', 'set resolution to wordwrap'){clo[:resolution] = "word"} o.def_option('--char', 'set resolution to char'){clo[:resolution] = "char"} o.def_option('--encoding=ENCODING', diff --git a/docdiff/document.rb b/docdiff/document.rb index 092080a..d3bf0b9 100644 --- a/docdiff/document.rb +++ b/docdiff/document.rb @@ -55,6 +55,9 @@ class Document def split_to_word() @body.split_to_word end + def split_to_word_verbose() + @body.split_to_word_verbose + end def split_to_char() @body.split_to_char end