Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions warc/tests/test_warc.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def f(type):
"Content-Type: application/http; msgtype=response\r\n" +
"P3P: policyref=\"http://www.w3.org/2001/05/P3P/p3p.xml\"\r\n" +
"Page.Ly: v4.1\r\n" +
"BadHeader: \n" +
"WARC-Type: response\r\n" +
"WARC-Record-ID: <urn:uuid:80fb9262-5402-11e1-8206-545200690126>\r\n" +
"WARC-Target-URI: http://example.com/\r\n" +
Expand All @@ -75,6 +76,7 @@ def test_read_header1(self):
assert h.record_id == "<urn:uuid:80fb9262-5402-11e1-8206-545200690126>"
assert h.type == "response"
assert h.content_length == 10
assert 'BadHeader' not in h

def test_empty(self):
reader = WARCReader(StringIO(""))
Expand Down
11 changes: 7 additions & 4 deletions warc/warc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from . import gzip2
from .utils import CaseInsensitiveDict, FilePart

logger = logging.getLogger(__name__)

class WARCHeader(CaseInsensitiveDict):
"""The WARC Header object represents the headers of a WARC record.

Expand Down Expand Up @@ -340,10 +342,11 @@ def read_header(self, fileobj):
if line == "\r\n": # end of headers
break
m = self.RE_HEADER.match(line)
if not m:
raise IOError("Bad header line: %r" % line)
name, value = m.groups()
headers[name] = value
if m:
name, value = m.groups()
headers[name] = value
else:
logger.warning("Bad header line: %r" % line)
return WARCHeader(headers)

def expect(self, fileobj, expected_line, message=None):
Expand Down