diff --git a/src/libextra/json.rs b/src/libextra/json.rs index 75bbd3d8a019d..3938f1a89942b 100644 --- a/src/libextra/json.rs +++ b/src/libextra/json.rs @@ -229,7 +229,6 @@ fn main() { */ use std::char; -use std::cast::transmute; use std::f64; use std::hashmap::HashMap; use std::io; @@ -718,7 +717,7 @@ impl Json { pub struct Parser { priv rdr: T, - priv ch: char, + priv ch: Option, priv line: uint, priv col: uint, } @@ -728,7 +727,7 @@ impl> Parser { pub fn new(rdr: T) -> Parser { let mut p = Parser { rdr: rdr, - ch: '\x00', + ch: Some('\x00'), line: 1, col: 0, }; @@ -756,16 +755,12 @@ impl> Parser { } impl> Parser { - // FIXME: #8971: unsound - fn eof(&self) -> bool { self.ch == unsafe { transmute(-1u32) } } - + fn eof(&self) -> bool { self.ch.is_none() } + fn ch_or_null(&self) -> char { self.ch.unwrap_or('\x00') } fn bump(&mut self) { - match self.rdr.next() { - Some(ch) => self.ch = ch, - None() => self.ch = unsafe { transmute(-1u32) }, // FIXME: #8971: unsound - } + self.ch = self.rdr.next(); - if self.ch == '\n' { + if self.ch_is('\n') { self.line += 1u; self.col = 1u; } else { @@ -773,10 +768,13 @@ impl> Parser { } } - fn next_char(&mut self) -> char { + fn next_char(&mut self) -> Option { self.bump(); self.ch } + fn ch_is(&self, c: char) -> bool { + self.ch == Some(c) + } fn error(&self, msg: ~str) -> Result { Err(Error { line: self.line, col: self.col, msg: msg }) @@ -787,31 +785,32 @@ impl> Parser { if self.eof() { return self.error(~"EOF while parsing value"); } - match self.ch { - 'n' => self.parse_ident("ull", Null), - 't' => self.parse_ident("rue", Boolean(true)), - 'f' => self.parse_ident("alse", Boolean(false)), - '0' .. '9' | '-' => self.parse_number(), - '"' => - match self.parse_str() { - Ok(s) => Ok(String(s)), - Err(e) => Err(e), + match self.ch_or_null() { + 'n' => self.parse_ident("ull", Null), + 't' => self.parse_ident("rue", Boolean(true)), + 'f' => self.parse_ident("alse", Boolean(false)), + '0' .. '9' | '-' => self.parse_number(), + '"' => { + match self.parse_str() { + Ok(s) => Ok(String(s)), + Err(e) => Err(e), + } }, - '[' => self.parse_list(), - '{' => self.parse_object(), - _ => self.error(~"invalid syntax") + '[' => self.parse_list(), + '{' => self.parse_object(), + _ => self.error(~"invalid syntax"), } } fn parse_whitespace(&mut self) { - while self.ch == ' ' || - self.ch == '\n' || - self.ch == '\t' || - self.ch == '\r' { self.bump(); } + while self.ch_is(' ') || + self.ch_is('\n') || + self.ch_is('\t') || + self.ch_is('\r') { self.bump(); } } fn parse_ident(&mut self, ident: &str, value: Json) -> Result { - if ident.chars().all(|c| c == self.next_char()) { + if ident.chars().all(|c| Some(c) == self.next_char()) { self.bump(); Ok(value) } else { @@ -822,7 +821,7 @@ impl> Parser { fn parse_number(&mut self) -> Result { let mut neg = 1.0; - if self.ch == '-' { + if self.ch_is('-') { self.bump(); neg = -1.0; } @@ -832,14 +831,14 @@ impl> Parser { Err(e) => return Err(e) }; - if self.ch == '.' { + if self.ch_is('.') { match self.parse_decimal(res) { Ok(r) => res = r, Err(e) => return Err(e) } } - if self.ch == 'e' || self.ch == 'E' { + if self.ch_is('e') || self.ch_is('E') { match self.parse_exponent(res) { Ok(r) => res = r, Err(e) => return Err(e) @@ -852,32 +851,31 @@ impl> Parser { fn parse_integer(&mut self) -> Result { let mut res = 0.0; - match self.ch { - '0' => { - self.bump(); - - // There can be only one leading '0'. - match self.ch { - '0' .. '9' => return self.error(~"invalid number"), - _ => () - } - } - '1' .. '9' => { - while !self.eof() { - match self.ch { - '0' .. '9' => { - res *= 10.0; - res += ((self.ch as int) - ('0' as int)) as f64; + match self.ch_or_null() { + '0' => { + self.bump(); - self.bump(); - } - _ => break + // There can be only one leading '0'. + match self.ch_or_null() { + '0' .. '9' => return self.error(~"invalid number"), + _ => () + } + }, + '1' .. '9' => { + while !self.eof() { + match self.ch_or_null() { + c @ '0' .. '9' => { + res *= 10.0; + res += ((c as int) - ('0' as int)) as f64; + + self.bump(); + } + _ => break, + } } } - } - _ => return self.error(~"invalid number") + _ => return self.error(~"invalid number"), } - Ok(res) } @@ -885,22 +883,22 @@ impl> Parser { self.bump(); // Make sure a digit follows the decimal place. - match self.ch { - '0' .. '9' => (), - _ => return self.error(~"invalid number") + match self.ch_or_null() { + '0' .. '9' => (), + _ => return self.error(~"invalid number") } let mut res = res; let mut dec = 1.0; while !self.eof() { - match self.ch { - '0' .. '9' => { - dec /= 10.0; - res += (((self.ch as int) - ('0' as int)) as f64) * dec; + match self.ch_or_null() { + c @ '0' .. '9' => { + dec /= 10.0; + res += (((c as int) - ('0' as int)) as f64) * dec; - self.bump(); - } - _ => break + self.bump(); + } + _ => break, } } @@ -913,27 +911,27 @@ impl> Parser { let mut exp = 0u; let mut neg_exp = false; - match self.ch { - '+' => self.bump(), - '-' => { self.bump(); neg_exp = true; } - _ => () + if self.ch_is('+') { + self.bump(); + } else if self.ch_is('-') { + self.bump(); + neg_exp = true; } // Make sure a digit follows the exponent place. - match self.ch { - '0' .. '9' => (), - _ => return self.error(~"invalid number") + match self.ch_or_null() { + '0' .. '9' => (), + _ => return self.error(~"invalid number") } - while !self.eof() { - match self.ch { - '0' .. '9' => { - exp *= 10u; - exp += (self.ch as uint) - ('0' as uint); + match self.ch_or_null() { + c @ '0' .. '9' => { + exp *= 10; + exp += (c as uint) - ('0' as uint); - self.bump(); - } - _ => break + self.bump(); + } + _ => break } } @@ -958,56 +956,55 @@ impl> Parser { } if escape { - match self.ch { - '"' => res.push_char('"'), - '\\' => res.push_char('\\'), - '/' => res.push_char('/'), - 'b' => res.push_char('\x08'), - 'f' => res.push_char('\x0c'), - 'n' => res.push_char('\n'), - 'r' => res.push_char('\r'), - 't' => res.push_char('\t'), - 'u' => { - // Parse \u1234. - let mut i = 0u; - let mut n = 0u; - while i < 4u { - match self.next_char() { - '0' .. '9' => { - n = n * 16u + (self.ch as uint) - - ('0' as uint); - }, - 'a' | 'A' => n = n * 16u + 10u, - 'b' | 'B' => n = n * 16u + 11u, - 'c' | 'C' => n = n * 16u + 12u, - 'd' | 'D' => n = n * 16u + 13u, - 'e' | 'E' => n = n * 16u + 14u, - 'f' | 'F' => n = n * 16u + 15u, - _ => return self.error( - ~"invalid \\u escape (unrecognized hex)") - } - i += 1u; - } - - // Error out if we didn't parse 4 digits. - if i != 4u { - return self.error( - ~"invalid \\u escape (not four digits)"); - } - - res.push_char(char::from_u32(n as u32).unwrap()); - } - _ => return self.error(~"invalid escape") + match self.ch_or_null() { + '"' => res.push_char('"'), + '\\' => res.push_char('\\'), + '/' => res.push_char('/'), + 'b' => res.push_char('\x08'), + 'f' => res.push_char('\x0c'), + 'n' => res.push_char('\n'), + 'r' => res.push_char('\r'), + 't' => res.push_char('\t'), + 'u' => { + // Parse \u1234. + let mut i = 0u; + let mut n = 0u; + while i < 4u && !self.eof() { + self.bump(); + n = match self.ch_or_null() { + c @ '0' .. '9' => n * 16u + (c as uint) - ('0' as uint), + 'a' | 'A' => n * 16u + 10u, + 'b' | 'B' => n * 16u + 11u, + 'c' | 'C' => n * 16u + 12u, + 'd' | 'D' => n * 16u + 13u, + 'e' | 'E' => n * 16u + 14u, + 'f' | 'F' => n * 16u + 15u, + _ => return self.error( + ~"invalid \\u escape (unrecognized hex)") + }; + + i += 1u; + } + + // Error out if we didn't parse 4 digits. + if i != 4u { + return self.error( + ~"invalid \\u escape (not four digits)"); + } + + res.push_char(char::from_u32(n as u32).unwrap()); + } + _ => return self.error(~"invalid escape"), } escape = false; - } else if self.ch == '\\' { + } else if self.ch_is('\\') { escape = true; } else { - if self.ch == '"' { - self.bump(); - return Ok(res); + match self.ch { + Some('"') => { self.bump(); return Ok(res); }, + Some(c) => res.push_char(c), + None => unreachable!() } - res.push_char(self.ch); } } } @@ -1018,7 +1015,7 @@ impl> Parser { let mut values = ~[]; - if self.ch == ']' { + if self.ch_is(']') { self.bump(); return Ok(List(values)); } @@ -1034,10 +1031,13 @@ impl> Parser { return self.error(~"EOF while parsing list"); } - match self.ch { - ',' => self.bump(), - ']' => { self.bump(); return Ok(List(values)); } - _ => return self.error(~"expected `,` or `]`") + if self.ch_is(',') { + self.bump(); + } else if self.ch_is(']') { + self.bump(); + return Ok(List(values)); + } else { + return self.error(~"expected `,` or `]`") } }; } @@ -1048,7 +1048,7 @@ impl> Parser { let mut values = ~TreeMap::new(); - if self.ch == '}' { + if self.ch_is('}') { self.bump(); return Ok(Object(values)); } @@ -1056,7 +1056,7 @@ impl> Parser { while !self.eof() { self.parse_whitespace(); - if self.ch != '"' { + if !self.ch_is('"') { return self.error(~"key must be a string"); } @@ -1067,7 +1067,7 @@ impl> Parser { self.parse_whitespace(); - if self.ch != ':' { + if !self.ch_is(':') { if self.eof() { break; } return self.error(~"expected `:`"); } @@ -1079,13 +1079,13 @@ impl> Parser { } self.parse_whitespace(); - match self.ch { - ',' => self.bump(), - '}' => { self.bump(); return Ok(Object(values)); } - _ => { - if self.eof() { break; } - return self.error(~"expected `,` or `}`"); - } + match self.ch_or_null() { + ',' => self.bump(), + '}' => { self.bump(); return Ok(Object(values)); }, + _ => { + if self.eof() { break; } + return self.error(~"expected `,` or `}`"); + } } } diff --git a/src/librustc/middle/trans/datum.rs b/src/librustc/middle/trans/datum.rs index 2a2421077a8c6..e7ebc2ef526c4 100644 --- a/src/librustc/middle/trans/datum.rs +++ b/src/librustc/middle/trans/datum.rs @@ -545,7 +545,11 @@ fn load<'a>(bcx: &'a Block<'a>, llptr: ValueRef, ty: ty::t) -> ValueRef { if type_is_zero_size(bcx.ccx(), ty) { C_undef(type_of::type_of(bcx.ccx(), ty)) } else if ty::type_is_bool(ty) { - LoadRangeAssert(bcx, llptr, 0, 2, lib::llvm::True) + LoadRangeAssert(bcx, llptr, 0, 2, lib::llvm::False) + } else if ty::type_is_char(ty) { + // a char is a unicode codepoint, and so takes values from 0 + // to 0x10FFFF inclusive only. + LoadRangeAssert(bcx, llptr, 0, 0x10FFFF + 1, lib::llvm::False) } else { Load(bcx, llptr) } diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 77b047a63c386..138f9d7164067 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -12,7 +12,7 @@ use ast; use codemap::{BytePos, CharPos, CodeMap, Pos}; use diagnostic; use parse::lexer::{is_whitespace, with_str_from, Reader}; -use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan}; +use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan}; use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment}; use parse::lexer; use parse::token; @@ -136,11 +136,11 @@ pub fn strip_doc_comment_decoration(comment: &str) -> ~str { fn read_to_eol(rdr: &StringReader) -> ~str { let mut val = ~""; - while rdr.curr.get() != '\n' && !is_eof(rdr) { - val.push_char(rdr.curr.get()); + while !rdr.curr_is('\n') && !is_eof(rdr) { + val.push_char(rdr.curr.get().unwrap()); bump(rdr); } - if rdr.curr.get() == '\n' { bump(rdr); } + if rdr.curr_is('\n') { bump(rdr); } return val; } @@ -152,7 +152,7 @@ fn read_one_line_comment(rdr: &StringReader) -> ~str { } fn consume_non_eol_whitespace(rdr: &StringReader) { - while is_whitespace(rdr.curr.get()) && rdr.curr.get() != '\n' && + while is_whitespace(rdr.curr.get()) && !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); } @@ -171,7 +171,7 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut ~[Comment]) { fn consume_whitespace_counting_blank_lines(rdr: &StringReader, comments: &mut ~[Comment]) { while is_whitespace(rdr.curr.get()) && !is_eof(rdr) { - if rdr.col.get() == CharPos(0u) && rdr.curr.get() == '\n' { + if rdr.col.get() == CharPos(0u) && rdr.curr_is('\n') { push_blank_line_comment(rdr, &mut *comments); } bump(rdr); @@ -196,7 +196,7 @@ fn read_line_comments(rdr: &StringReader, code_to_the_left: bool, debug!(">>> line comments"); let p = rdr.last_pos.get(); let mut lines: ~[~str] = ~[]; - while rdr.curr.get() == '/' && nextch(rdr) == '/' { + while rdr.curr_is('/') && nextch_is(rdr, '/') { let line = read_one_line_comment(rdr); debug!("{}", line); if is_doc_comment(line) { // doc-comments are not put in comments @@ -261,9 +261,9 @@ fn read_block_comment(rdr: &StringReader, let mut curr_line = ~"/*"; // doc-comments are not really comments, they are attributes - if rdr.curr.get() == '*' || rdr.curr.get() == '!' { - while !(rdr.curr.get() == '*' && nextch(rdr) == '/') && !is_eof(rdr) { - curr_line.push_char(rdr.curr.get()); + if rdr.curr_is('*') || rdr.curr_is('!') { + while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) { + curr_line.push_char(rdr.curr.get().unwrap()); bump(rdr); } if !is_eof(rdr) { @@ -281,20 +281,20 @@ fn read_block_comment(rdr: &StringReader, if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); } - if rdr.curr.get() == '\n' { + if rdr.curr_is('\n') { trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); curr_line = ~""; bump(rdr); } else { - curr_line.push_char(rdr.curr.get()); - if rdr.curr.get() == '/' && nextch(rdr) == '*' { + curr_line.push_char(rdr.curr.get().unwrap()); + if rdr.curr_is('/') && nextch_is(rdr, '*') { bump(rdr); bump(rdr); curr_line.push_char('*'); level += 1; } else { - if rdr.curr.get() == '*' && nextch(rdr) == '/' { + if rdr.curr_is('*') && nextch_is(rdr, '/') { bump(rdr); bump(rdr); curr_line.push_char('/'); @@ -310,7 +310,7 @@ fn read_block_comment(rdr: &StringReader, let mut style = if code_to_the_left { Trailing } else { Isolated }; consume_non_eol_whitespace(rdr); - if !is_eof(rdr) && rdr.curr.get() != '\n' && lines.len() == 1u { + if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u { style = Mixed; } debug!("<<< block comment"); @@ -318,20 +318,20 @@ fn read_block_comment(rdr: &StringReader, } fn peeking_at_comment(rdr: &StringReader) -> bool { - return ((rdr.curr.get() == '/' && nextch(rdr) == '/') || - (rdr.curr.get() == '/' && nextch(rdr) == '*')) || - (rdr.curr.get() == '#' && nextch(rdr) == '!'); + return (rdr.curr_is('/') && nextch_is(rdr, '/')) || + (rdr.curr_is('/') && nextch_is(rdr, '*')) || + (rdr.curr_is('#') && nextch_is(rdr, '!')); } fn consume_comment(rdr: &StringReader, code_to_the_left: bool, comments: &mut ~[Comment]) { debug!(">>> consume comment"); - if rdr.curr.get() == '/' && nextch(rdr) == '/' { + if rdr.curr_is('/') && nextch_is(rdr, '/') { read_line_comments(rdr, code_to_the_left, comments); - } else if rdr.curr.get() == '/' && nextch(rdr) == '*' { + } else if rdr.curr_is('/') && nextch_is(rdr, '*') { read_block_comment(rdr, code_to_the_left, comments); - } else if rdr.curr.get() == '#' && nextch(rdr) == '!' { + } else if rdr.curr_is('#') && nextch_is(rdr, '!') { read_shebang_comment(rdr, code_to_the_left, comments); } else { fail!(); } debug!("<<< consume comment"); @@ -363,7 +363,7 @@ pub fn gather_comments_and_literals(span_diagnostic: loop { let mut code_to_the_left = !first_read; consume_non_eol_whitespace(&rdr); - if rdr.curr.get() == '\n' { + if rdr.curr_is('\n') { code_to_the_left = false; consume_whitespace_counting_blank_lines(&rdr, &mut comments); } diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 1bcff324e082f..f6ebfbfcc3b2e 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -16,7 +16,6 @@ use ext::tt::transcribe::{dup_tt_reader, tt_next_token}; use parse::token; use parse::token::{str_to_ident}; -use std::cast::transmute; use std::cell::{Cell, RefCell}; use std::char; use std::num::from_str_radix; @@ -48,13 +47,19 @@ pub struct StringReader { // The column of the next character to read col: Cell, // The last character to be read - curr: Cell, + curr: Cell>, filemap: @codemap::FileMap, /* cached: */ peek_tok: RefCell, peek_span: RefCell, } +impl StringReader { + pub fn curr_is(&self, c: char) -> bool { + self.curr.get() == Some(c) + } +} + pub fn new_string_reader(span_diagnostic: @SpanHandler, filemap: @codemap::FileMap) -> StringReader { @@ -74,7 +79,7 @@ pub fn new_low_level_string_reader(span_diagnostic: @SpanHandler, pos: Cell::new(filemap.start_pos), last_pos: Cell::new(filemap.start_pos), col: Cell::new(CharPos(0)), - curr: Cell::new(initial_char), + curr: Cell::new(Some(initial_char)), filemap: filemap, /* dummy values; not read */ peek_tok: RefCell::new(token::EOF), @@ -246,14 +251,12 @@ pub fn bump(rdr: &StringReader) { rdr.last_pos.set(rdr.pos.get()); let current_byte_offset = byte_offset(rdr, rdr.pos.get()).to_uint(); if current_byte_offset < (rdr.filemap.src).len() { - assert!(rdr.curr.get() != unsafe { - transmute(-1u32) - }); // FIXME: #8971: unsound - let last_char = rdr.curr.get(); + assert!(rdr.curr.get().is_some()); + let last_char = rdr.curr.get().unwrap(); let next = rdr.filemap.src.char_range_at(current_byte_offset); let byte_offset_diff = next.next - current_byte_offset; rdr.pos.set(rdr.pos.get() + Pos::from_uint(byte_offset_diff)); - rdr.curr.set(next.ch); + rdr.curr.set(Some(next.ch)); rdr.col.set(rdr.col.get() + CharPos(1u)); if last_char == '\n' { rdr.filemap.next_line(rdr.last_pos.get()); @@ -265,37 +268,50 @@ pub fn bump(rdr: &StringReader) { Pos::from_uint(current_byte_offset), byte_offset_diff); } } else { - rdr.curr.set(unsafe { transmute(-1u32) }); // FIXME: #8971: unsound + rdr.curr.set(None); } } pub fn is_eof(rdr: &StringReader) -> bool { - rdr.curr.get() == unsafe { transmute(-1u32) } // FIXME: #8971: unsound + rdr.curr.get().is_none() } -pub fn nextch(rdr: &StringReader) -> char { +pub fn nextch(rdr: &StringReader) -> Option { let offset = byte_offset(rdr, rdr.pos.get()).to_uint(); if offset < (rdr.filemap.src).len() { - return rdr.filemap.src.char_at(offset); - } else { return unsafe { transmute(-1u32) }; } // FIXME: #8971: unsound + Some(rdr.filemap.src.char_at(offset)) + } else { + None + } +} +pub fn nextch_is(rdr: &StringReader, c: char) -> bool { + nextch(rdr) == Some(c) } -fn hex_digit_val(c: char) -> int { - if in_range(c, '0', '9') { return (c as int) - ('0' as int); } - if in_range(c, 'a', 'f') { return (c as int) - ('a' as int) + 10; } - if in_range(c, 'A', 'F') { return (c as int) - ('A' as int) + 10; } +fn hex_digit_val(c: Option) -> int { + let d = c.unwrap_or('\x00'); + + if in_range(c, '0', '9') { return (d as int) - ('0' as int); } + if in_range(c, 'a', 'f') { return (d as int) - ('a' as int) + 10; } + if in_range(c, 'A', 'F') { return (d as int) - ('A' as int) + 10; } fail!(); } -pub fn is_whitespace(c: char) -> bool { - return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +pub fn is_whitespace(c: Option) -> bool { + match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace + ' ' | '\n' | '\t' | '\r' => true, + _ => false + } } -fn in_range(c: char, lo: char, hi: char) -> bool { - return lo <= c && c <= hi +fn in_range(c: Option, lo: char, hi: char) -> bool { + match c { + Some(c) => lo <= c && c <= hi, + _ => false + } } -fn is_dec_digit(c: char) -> bool { return in_range(c, '0', '9'); } +fn is_dec_digit(c: Option) -> bool { return in_range(c, '0', '9'); } -fn is_hex_digit(c: char) -> bool { +fn is_hex_digit(c: Option) -> bool { return in_range(c, '0', '9') || in_range(c, 'a', 'f') || in_range(c, 'A', 'F'); } @@ -317,15 +333,15 @@ pub fn is_line_non_doc_comment(s: &str) -> bool { // returns a Some(sugared-doc-attr) if one exists, None otherwise fn consume_any_line_comment(rdr: &StringReader) -> Option { - if rdr.curr.get() == '/' { + if rdr.curr_is('/') { match nextch(rdr) { - '/' => { + Some('/') => { bump(rdr); bump(rdr); // line comments starting with "///" or "//!" are doc-comments - if rdr.curr.get() == '/' || rdr.curr.get() == '!' { + if rdr.curr_is('/') || rdr.curr_is('!') { let start_bpos = rdr.pos.get() - BytePos(3); - while rdr.curr.get() != '\n' && !is_eof(rdr) { + while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); } let ret = with_str_from(rdr, start_bpos, |string| { @@ -344,16 +360,16 @@ fn consume_any_line_comment(rdr: &StringReader) return ret; } } else { - while rdr.curr.get() != '\n' && !is_eof(rdr) { bump(rdr); } + while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); } } // Restart whitespace munch. return consume_whitespace_and_comments(rdr); } - '*' => { bump(rdr); bump(rdr); return consume_block_comment(rdr); } + Some('*') => { bump(rdr); bump(rdr); return consume_block_comment(rdr); } _ => () } - } else if rdr.curr.get() == '#' { - if nextch(rdr) == '!' { + } else if rdr.curr_is('#') { + if nextch_is(rdr, '!') { // I guess this is the only way to figure out if // we're at the beginning of the file... let cmap = @CodeMap::new(); @@ -363,7 +379,7 @@ fn consume_any_line_comment(rdr: &StringReader) } let loc = cmap.lookup_char_pos_adj(rdr.last_pos.get()); if loc.line == 1u && loc.col == CharPos(0u) { - while rdr.curr.get() != '\n' && !is_eof(rdr) { bump(rdr); } + while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); } return consume_whitespace_and_comments(rdr); } } @@ -378,7 +394,7 @@ pub fn is_block_non_doc_comment(s: &str) -> bool { // might return a sugared-doc-attr fn consume_block_comment(rdr: &StringReader) -> Option { // block comments starting with "/**" or "/*!" are doc-comments - let is_doc_comment = rdr.curr.get() == '*' || rdr.curr.get() == '!'; + let is_doc_comment = rdr.curr_is('*') || rdr.curr_is('!'); let start_bpos = rdr.pos.get() - BytePos(if is_doc_comment {3} else {2}); let mut level: int = 1; @@ -390,11 +406,11 @@ fn consume_block_comment(rdr: &StringReader) -> Option { ~"unterminated block comment" }; fatal_span(rdr, start_bpos, rdr.last_pos.get(), msg); - } else if rdr.curr.get() == '/' && nextch(rdr) == '*' { + } else if rdr.curr_is('/') && nextch_is(rdr, '*') { level += 1; bump(rdr); bump(rdr); - } else if rdr.curr.get() == '*' && nextch(rdr) == '/' { + } else if rdr.curr_is('*') && nextch_is(rdr, '/') { level -= 1; bump(rdr); bump(rdr); @@ -424,12 +440,13 @@ fn consume_block_comment(rdr: &StringReader) -> Option { } fn scan_exponent(rdr: &StringReader, start_bpos: BytePos) -> Option<~str> { - let mut c = rdr.curr.get(); + // \x00 hits the `return None` case immediately, so this is fine. + let mut c = rdr.curr.get().unwrap_or('\x00'); let mut rslt = ~""; if c == 'e' || c == 'E' { rslt.push_char(c); bump(rdr); - c = rdr.curr.get(); + c = rdr.curr.get().unwrap_or('\x00'); if c == '-' || c == '+' { rslt.push_char(c); bump(rdr); @@ -448,10 +465,10 @@ fn scan_digits(rdr: &StringReader, radix: uint) -> ~str { let mut rslt = ~""; loop { let c = rdr.curr.get(); - if c == '_' { bump(rdr); continue; } - match char::to_digit(c, radix) { + if c == Some('_') { bump(rdr); continue; } + match c.and_then(|cc| char::to_digit(cc, radix)) { Some(_) => { - rslt.push_char(c); + rslt.push_char(c.unwrap()); bump(rdr); } _ => return rslt @@ -476,7 +493,7 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token { let mut num_str; let mut base = 10u; let mut c = c; - let mut n = nextch(rdr); + let mut n = nextch(rdr).unwrap_or('\x00'); let start_bpos = rdr.last_pos.get(); if c == '0' && n == 'x' { bump(rdr); @@ -492,7 +509,7 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token { base = 2u; } num_str = scan_digits(rdr, base); - c = rdr.curr.get(); + c = rdr.curr.get().unwrap_or('\x00'); nextch(rdr); if c == 'u' || c == 'i' { enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) } @@ -502,13 +519,13 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token { else { Unsigned(ast::TyU) } }; bump(rdr); - c = rdr.curr.get(); + c = rdr.curr.get().unwrap_or('\x00'); if c == '8' { bump(rdr); tp = if signed { Signed(ast::TyI8) } else { Unsigned(ast::TyU8) }; } - n = nextch(rdr); + n = nextch(rdr).unwrap_or('\x00'); if c == '1' && n == '6' { bump(rdr); bump(rdr); @@ -541,8 +558,7 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token { } } let mut is_float = false; - if rdr.curr.get() == '.' && !(ident_start(nextch(rdr)) || nextch(rdr) == - '.') { + if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) { is_float = true; bump(rdr); let dec_part = scan_digits(rdr, 10u); @@ -557,10 +573,10 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token { None => () } - if rdr.curr.get() == 'f' { + if rdr.curr_is('f') { bump(rdr); - c = rdr.curr.get(); - n = nextch(rdr); + c = rdr.curr.get().unwrap_or('\x00'); + n = nextch(rdr).unwrap_or('\x00'); if c == '3' && n == '2' { bump(rdr); bump(rdr); @@ -602,18 +618,23 @@ fn scan_numeric_escape(rdr: &StringReader, n_hex_digits: uint) -> char { let mut accum_int = 0; let mut i = n_hex_digits; let start_bpos = rdr.last_pos.get(); - while i != 0u { + while i != 0u && !is_eof(rdr) { let n = rdr.curr.get(); if !is_hex_digit(n) { fatal_span_char(rdr, rdr.last_pos.get(), rdr.pos.get(), ~"illegal character in numeric character escape", - n); + n.unwrap()); } bump(rdr); accum_int *= 16; accum_int += hex_digit_val(n); i -= 1u; } + if i != 0 && is_eof(rdr) { + fatal_span(rdr, start_bpos, rdr.last_pos.get(), + ~"unterminated numeric character escape"); + } + match char::from_u32(accum_int as u32) { Some(x) => x, None => fatal_span(rdr, start_bpos, rdr.last_pos.get(), @@ -621,14 +642,18 @@ fn scan_numeric_escape(rdr: &StringReader, n_hex_digits: uint) -> char { } } -fn ident_start(c: char) -> bool { +fn ident_start(c: Option) -> bool { + let c = match c { Some(c) => c, None => return false }; + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && char::is_XID_start(c)) } -fn ident_continue(c: char) -> bool { +fn ident_continue(c: Option) -> bool { + let c = match c { Some(c) => c, None => return false }; + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') @@ -641,7 +666,7 @@ fn ident_continue(c: char) -> bool { // EFFECT: updates the interner fn next_token_inner(rdr: &StringReader) -> token::Token { let c = rdr.curr.get(); - if ident_start(c) && nextch(rdr) != '"' && nextch(rdr) != '#' { + if ident_start(c) && !nextch_is(rdr, '"') && !nextch_is(rdr, '#') { // Note: r as in r" or r#" is part of a raw string literal, // not an identifier, and is handled further down. @@ -654,7 +679,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { if string == "_" { token::UNDERSCORE } else { - let is_mod_name = rdr.curr.get() == ':' && nextch(rdr) == ':'; + let is_mod_name = rdr.curr_is(':') && nextch_is(rdr, ':'); // FIXME: perform NFKC normalization here. (Issue #2253) token::IDENT(str_to_ident(string), is_mod_name) @@ -662,16 +687,16 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { }) } if is_dec_digit(c) { - return scan_number(c, rdr); + return scan_number(c.unwrap(), rdr); } fn binop(rdr: &StringReader, op: token::BinOp) -> token::Token { bump(rdr); - if rdr.curr.get() == '=' { + if rdr.curr_is('=') { bump(rdr); return token::BINOPEQ(op); } else { return token::BINOP(op); } } - match c { + match c.expect("next_token_inner called at EOF") { @@ -682,9 +707,9 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { ',' => { bump(rdr); return token::COMMA; } '.' => { bump(rdr); - return if rdr.curr.get() == '.' { + return if rdr.curr_is('.') { bump(rdr); - if rdr.curr.get() == '.' { + if rdr.curr_is('.') { bump(rdr); token::DOTDOTDOT } else { @@ -705,7 +730,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { '~' => { bump(rdr); return token::TILDE; } ':' => { bump(rdr); - if rdr.curr.get() == ':' { + if rdr.curr_is(':') { bump(rdr); return token::MOD_SEP; } else { return token::COLON; } @@ -720,10 +745,10 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { // Multi-byte tokens. '=' => { bump(rdr); - if rdr.curr.get() == '=' { + if rdr.curr_is('=') { bump(rdr); return token::EQEQ; - } else if rdr.curr.get() == '>' { + } else if rdr.curr_is('>') { bump(rdr); return token::FAT_ARROW; } else { @@ -732,19 +757,19 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { } '!' => { bump(rdr); - if rdr.curr.get() == '=' { + if rdr.curr_is('=') { bump(rdr); return token::NE; } else { return token::NOT; } } '<' => { bump(rdr); - match rdr.curr.get() { + match rdr.curr.get().unwrap_or('\x00') { '=' => { bump(rdr); return token::LE; } '<' => { return binop(rdr, token::SHL); } '-' => { bump(rdr); - match rdr.curr.get() { + match rdr.curr.get().unwrap_or('\x00') { '>' => { bump(rdr); return token::DARROW; } _ => { return token::LARROW; } } @@ -754,7 +779,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { } '>' => { bump(rdr); - match rdr.curr.get() { + match rdr.curr.get().unwrap_or('\x00') { '=' => { bump(rdr); return token::GE; } '>' => { return binop(rdr, token::SHR); } _ => { return token::GT; } @@ -764,12 +789,14 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { // Either a character constant 'a' OR a lifetime name 'abc bump(rdr); let start = rdr.last_pos.get(); - let mut c2 = rdr.curr.get(); + + // the eof will be picked up by the final `'` check below + let mut c2 = rdr.curr.get().unwrap_or('\x00'); bump(rdr); // If the character is an ident start not followed by another single // quote, then this is a lifetime name: - if ident_start(c2) && rdr.curr.get() != '\'' { + if ident_start(Some(c2)) && !rdr.curr_is('\'') { while ident_continue(rdr.curr.get()) { bump(rdr); } @@ -798,19 +825,24 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { let escaped_pos = rdr.last_pos.get(); bump(rdr); match escaped { - 'n' => { c2 = '\n'; } - 'r' => { c2 = '\r'; } - 't' => { c2 = '\t'; } - '\\' => { c2 = '\\'; } - '\'' => { c2 = '\''; } - '"' => { c2 = '"'; } - '0' => { c2 = '\x00'; } - 'x' => { c2 = scan_numeric_escape(rdr, 2u); } - 'u' => { c2 = scan_numeric_escape(rdr, 4u); } - 'U' => { c2 = scan_numeric_escape(rdr, 8u); } - c2 => { - fatal_span_char(rdr, escaped_pos, rdr.last_pos.get(), - ~"unknown character escape", c2); + None => {} + Some(e) => { + c2 = match e { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '\'' => '\'', + '"' => '"', + '0' => '\x00', + 'x' => scan_numeric_escape(rdr, 2u), + 'u' => scan_numeric_escape(rdr, 4u), + 'U' => scan_numeric_escape(rdr, 8u), + c2 => { + fatal_span_char(rdr, escaped_pos, rdr.last_pos.get(), + ~"unknown character escape", c2) + } + } } } } @@ -820,7 +852,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { } _ => {} } - if rdr.curr.get() != '\'' { + if !rdr.curr_is('\'') { fatal_span_verbose(rdr, // Byte offsetting here is okay because the // character before position `start` is an @@ -836,17 +868,22 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { let mut accum_str = ~""; let start_bpos = rdr.last_pos.get(); bump(rdr); - while rdr.curr.get() != '"' { + while !rdr.curr_is('"') { if is_eof(rdr) { fatal_span(rdr, start_bpos, rdr.last_pos.get(), ~"unterminated double quote string"); } - let ch = rdr.curr.get(); + let ch = rdr.curr.get().unwrap(); bump(rdr); match ch { '\\' => { - let escaped = rdr.curr.get(); + if is_eof(rdr) { + fatal_span(rdr, start_bpos, rdr.last_pos.get(), + ~"unterminated double quote string"); + } + + let escaped = rdr.curr.get().unwrap(); let escaped_pos = rdr.last_pos.get(); bump(rdr); match escaped { @@ -883,15 +920,19 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { let start_bpos = rdr.last_pos.get(); bump(rdr); let mut hash_count = 0u; - while rdr.curr.get() == '#' { + while rdr.curr_is('#') { bump(rdr); hash_count += 1; } - if rdr.curr.get() != '"' { + + if is_eof(rdr) { + fatal_span(rdr, start_bpos, rdr.last_pos.get(), + ~"unterminated raw string"); + } else if !rdr.curr_is('"') { fatal_span_char(rdr, start_bpos, rdr.last_pos.get(), ~"only `#` is allowed in raw string delimitation; \ found illegal character", - rdr.curr.get()); + rdr.curr.get().unwrap()); } bump(rdr); let content_start_bpos = rdr.last_pos.get(); @@ -901,11 +942,11 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { fatal_span(rdr, start_bpos, rdr.last_pos.get(), ~"unterminated raw string"); } - if rdr.curr.get() == '"' { + if rdr.curr_is('"') { content_end_bpos = rdr.last_pos.get(); for _ in range(0, hash_count) { bump(rdr); - if rdr.curr.get() != '#' { + if !rdr.curr_is('#') { continue 'outer; } } @@ -921,14 +962,14 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { return token::LIT_STR_RAW(str_content, hash_count); } '-' => { - if nextch(rdr) == '>' { + if nextch_is(rdr, '>') { bump(rdr); bump(rdr); return token::RARROW; } else { return binop(rdr, token::MINUS); } } '&' => { - if nextch(rdr) == '&' { + if nextch_is(rdr, '&') { bump(rdr); bump(rdr); return token::ANDAND; @@ -936,7 +977,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token { } '|' => { match nextch(rdr) { - '|' => { bump(rdr); bump(rdr); return token::OROR; } + Some('|') => { bump(rdr); bump(rdr); return token::OROR; } _ => { return binop(rdr, token::OR); } } }