diff --git a/lib/mapi/msg.rb b/lib/mapi/msg.rb index 6414db7..5dd54e0 100644 --- a/lib/mapi/msg.rb +++ b/lib/mapi/msg.rb @@ -173,7 +173,9 @@ def self.parse_nameid obj # parse guids # this is the guids for named properities (other than builtin ones) # i think PS_PUBLIC_STRINGS, and PS_MAPI are builtin. - guids = [PS_PUBLIC_STRINGS] + guids_obj.read.scan(/.{16}/m).map do |str| + # Scan using an ascii pattern - it's binary data we're looking + # at, so we don't want to look for unicode characters + guids = [PS_PUBLIC_STRINGS] + guids_obj.read.scan(/.{16}/mn).map do |str| Ole::Types.load_guid str end @@ -187,7 +189,9 @@ def self.parse_nameid obj # parse actual props. # not sure about any of this stuff really. # should flip a few bits in the real msg, to get a better understanding of how this works. - props = props_obj.read.scan(/.{8}/m).map do |str| + # Scan using an ascii pattern - it's binary data we're looking + # at, so we don't want to look for unicode characters + props = props_obj.read.scan(/.{8}/mn).map do |str| flags, offset = str[4..-1].unpack 'v2' # the property will be serialised as this pseudo property, mapping it to this named property pseudo_prop = 0x8000 + offset @@ -249,11 +253,14 @@ def parse_substg key, encoding, offset, obj def parse_properties obj data = obj.read # don't really understand this that well... + pad = data.length % 16 unless (pad == 0 || pad == 8) and data[0...pad] == "\000" * pad Log.warn "padding was not as expected #{pad} (#{data.length}) -> #{data[0...pad].inspect}" end - data[pad..-1].scan(/.{16}/m).each do |data| + # Scan using an ascii pattern - it's binary data we're looking + # at, so we don't want to look for unicode characters + data[pad..-1].scan(/.{16}/mn).each do |data| property, encoding = ('%08x' % data.unpack('V')).scan /.{4}/ key = property.hex # doesn't make any sense to me. probably because its a serialization of some internal diff --git a/lib/mime.rb b/lib/mime.rb index 99b9fbc..c738aa3 100644 --- a/lib/mime.rb +++ b/lib/mime.rb @@ -21,124 +21,126 @@ # I don't want to lower case things, just for starters. # * Mime was the original place I wrote #to_tree, intended as a quick debug hack. # -class Mime - Hash = begin - require 'orderedhash' - OrderedHash - rescue LoadError - Hash - end - - attr_reader :headers, :body, :parts, :content_type, :preamble, :epilogue - - # Create a Mime object using +str+ as an initial serialization, which must contain headers - # and a body (even if empty). Needs work. - def initialize str, ignore_body=false - headers, @body = $~[1..-1] if str[/(.*?\r?\n)(?:\r?\n(.*))?\Z/m] - - @headers = Hash.new { |hash, key| hash[key] = [] } - @body ||= '' - headers.to_s.scan(/^\S+:\s*.*(?:\n\t.*)*/).each do |header| - @headers[header[/(\S+):/, 1]] << header[/\S+:\s*(.*)/m, 1].gsub(/\s+/m, ' ').strip # this is kind of wrong - end - - # don't have to have content type i suppose - @content_type, attrs = nil, {} - if content_type = @headers['Content-Type'][0] - @content_type, attrs = Mime.split_header content_type - end - - return if ignore_body - - if multipart? - if body.empty? - @preamble = '' - @epilogue = '' - @parts = [] - else - # we need to split the message at the boundary - boundary = attrs['boundary'] or raise "no boundary for multipart message" - - # splitting the body: - parts = body.split(/--#{Regexp.quote boundary}/m) - unless parts[-1] =~ /^--/; warn "bad multipart boundary (missing trailing --)" - else parts[-1][0..1] = '' - end - parts.each_with_index do |part, i| - part =~ /^(\r?\n)?(.*?)(\r?\n)?\Z/m - part.replace $2 - warn "bad multipart boundary" if (1...parts.length-1) === i and !($1 && $3) - end - @preamble = parts.shift - @epilogue = parts.pop - @parts = parts.map { |part| Mime.new part } - end - end - end - - def multipart? - @content_type && @content_type =~ /^multipart/ ? true : false - end - - def inspect - # add some extra here. - "#" - end - - def to_tree - if multipart? - str = "- #{inspect}\n" - parts.each_with_index do |part, i| - last = i == parts.length - 1 - part.to_tree.split(/\n/).each_with_index do |line, j| - str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}" + line + "\n" - end - end - str - else - "- #{inspect}\n" - end - end - - def to_s opts={} - opts = {:boundary_counter => 0}.merge opts - if multipart? - boundary = Mime.make_boundary opts[:boundary_counter] += 1, self - @body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue]. - flatten.join("\r\n--" + boundary) - content_type, attrs = Mime.split_header @headers['Content-Type'][0] - attrs['boundary'] = boundary - @headers['Content-Type'] = [([content_type] + attrs.map { |key, val| %{#{key}="#{val}"} }).join('; ')] - end - - str = '' - @headers.each do |key, vals| - vals.each { |val| str << "#{key}: #{val}\r\n" } - end - str << "\r\n" + @body - end - - def self.split_header header - # FIXME: haven't read standard. not sure what its supposed to do with " in the name, or if other - # escapes are allowed. can't test on windows as " isn't allowed anyway. can be fixed with more - # accurate parser later. - # maybe move to some sort of Header class. but not all headers should be of it i suppose. - # at least add a join_header then, taking name and {}. for use in Mime#to_s (for boundary - # rewrite), and Attachment#to_mime, among others... - attrs = {} - header.scan(/;\s*([^\s=]+)\s*=\s*("[^"]*"|[^\s;]*)\s*/m).each do |key, value| - if attrs[key]; warn "ignoring duplicate header attribute #{key.inspect}" - else attrs[key] = value[/^"/] ? value[1..-2] : value - end - end - - [header[/^[^;]+/].strip, attrs] - end - - # +i+ is some value that should be unique for all multipart boundaries for a given message - def self.make_boundary i, extra_obj = Mime - "----_=_NextPart_#{'%03d' % i}_#{'%08x' % extra_obj.object_id}.#{'%08x' % Time.now}" - end +module Mapi + class Mime + Hash = begin + require 'orderedhash' + OrderedHash + rescue LoadError + Hash + end + + attr_reader :headers, :body, :parts, :content_type, :preamble, :epilogue + + # Create a Mime object using +str+ as an initial serialization, which must contain headers + # and a body (even if empty). Needs work. + def initialize str, ignore_body=false + headers, @body = $~[1..-1] if str[/(.*?\r?\n)(?:\r?\n(.*))?\Z/m] + + @headers = Hash.new { |hash, key| hash[key] = [] } + @body ||= '' + headers.to_s.scan(/^\S+:\s*.*(?:\n\t.*)*/).each do |header| + @headers[header[/(\S+):/, 1]] << header[/\S+:\s*(.*)/m, 1].gsub(/\s+/m, ' ').strip # this is kind of wrong + end + + # don't have to have content type i suppose + @content_type, attrs = nil, {} + if content_type = @headers['Content-Type'][0] + @content_type, attrs = Mime.split_header content_type + end + + return if ignore_body + + if multipart? + if body.empty? + @preamble = '' + @epilogue = '' + @parts = [] + else + # we need to split the message at the boundary + boundary = attrs['boundary'] or raise "no boundary for multipart message" + + # splitting the body: + parts = body.split(/--#{Regexp.quote boundary}/m) + unless parts[-1] =~ /^--/; warn "bad multipart boundary (missing trailing --)" + else parts[-1][0..1] = '' + end + parts.each_with_index do |part, i| + part =~ /^(\r?\n)?(.*?)(\r?\n)?\Z/m + part.replace $2 + warn "bad multipart boundary" if (1...parts.length-1) === i and !($1 && $3) + end + @preamble = parts.shift + @epilogue = parts.pop + @parts = parts.map { |part| Mime.new part } + end + end + end + + def multipart? + @content_type && @content_type =~ /^multipart/ ? true : false + end + + def inspect + # add some extra here. + "#" + end + + def to_tree + if multipart? + str = "- #{inspect}\n" + parts.each_with_index do |part, i| + last = i == parts.length - 1 + part.to_tree.split(/\n/).each_with_index do |line, j| + str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}" + line + "\n" + end + end + str + else + "- #{inspect}\n" + end + end + + def to_s opts={} + opts = {:boundary_counter => 0}.merge opts + if multipart? + boundary = Mime.make_boundary opts[:boundary_counter] += 1, self + @body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue]. + flatten.join("\r\n--" + boundary) + content_type, attrs = Mime.split_header @headers['Content-Type'][0] + attrs['boundary'] = boundary + @headers['Content-Type'] = [([content_type] + attrs.map { |key, val| %{#{key}="#{val}"} }).join('; ')] + end + + str = '' + @headers.each do |key, vals| + vals.each { |val| str << "#{key}: #{val}\r\n" } + end + str << "\r\n" + @body + end + + def self.split_header header + # FIXME: haven't read standard. not sure what its supposed to do with " in the name, or if other + # escapes are allowed. can't test on windows as " isn't allowed anyway. can be fixed with more + # accurate parser later. + # maybe move to some sort of Header class. but not all headers should be of it i suppose. + # at least add a join_header then, taking name and {}. for use in Mime#to_s (for boundary + # rewrite), and Attachment#to_mime, among others... + attrs = {} + header.scan(/;\s*([^\s=]+)\s*=\s*("[^"]*"|[^\s;]*)\s*/m).each do |key, value| + if attrs[key]; warn "ignoring duplicate header attribute #{key.inspect}" + else attrs[key] = value[/^"/] ? value[1..-2] : value + end + end + + [header[/^[^;]+/].strip, attrs] + end + + # +i+ is some value that should be unique for all multipart boundaries for a given message + def self.make_boundary i, extra_obj = Mime + "----_=_NextPart_#{'%03d' % i}_#{'%08x' % extra_obj.object_id}.#{'%08x' % Time.now}" + end + end end =begin @@ -163,3 +165,4 @@ def self.make_boundary i, extra_obj = Mime =end + diff --git a/test/test_mime.rb b/test/test_mime.rb index 92e6be8..e7f4348 100644 --- a/test/test_mime.rb +++ b/test/test_mime.rb @@ -9,7 +9,7 @@ class TestMime < Test::Unit::TestCase # test out the way it partitions a message into parts def test_parsing_no_multipart - mime = Mime.new "Header1: Value1\r\nHeader2: Value2\r\n\r\nBody text." + mime = Mapi::Mime.new "Header1: Value1\r\nHeader2: Value2\r\n\r\nBody text." assert_equal ['Value1'], mime.headers['Header1'] assert_equal 'Body text.', mime.body assert_equal false, mime.multipart? @@ -18,7 +18,7 @@ def test_parsing_no_multipart end def test_boundaries - assert_match(/^----_=_NextPart_001_/, Mime.make_boundary(1)) + assert_match(/^----_=_NextPart_001_/, Mapi::Mime.make_boundary(1)) end end