summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2017-11-04 23:09:19 +0000
committerbors <bors@rust-lang.org>2017-11-04 23:09:19 +0000
commit12e6b53744a0101f28855f25e6e4551e2d664857 (patch)
treeefa9116ee7711cb5512a48470041b99e23003d25
parentd762b1d6c67db12e117186d94d70e46cddb22965 (diff)
parent272c2faa1d766fd4185141106959cdb58b88e6e9 (diff)
downloadrust-12e6b53744a0101f28855f25e6e4551e2d664857.tar.gz
Auto merge of #45711 - tirr-c:unicode-span, r=estebank
Display spans correctly when there are zero-width or wide characters Hopefully... * fixes #45211 * fixes #8706 --- Before: ``` error: invalid width `7` for integer literal --> unicode_2.rs:12:25 | 12 | let _ = ("a̐éö̲", 0u7); | ^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `42` for integer literal --> unicode_2.rs:13:20 | 13 | let _ = ("아あ", 1i42); | ^^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: aborting due to 2 previous errors ``` After: ``` error: invalid width `7` for integer literal --> unicode_2.rs:12:25 | 12 | let _ = ("a̐éö̲", 0u7); | ^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `42` for integer literal --> unicode_2.rs:13:20 | 13 | let _ = ("아あ", 1i42); | ^^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: aborting due to 2 previous errors ``` Spans might display incorrectly on the browser. r? @estebank
-rw-r--r--src/Cargo.lock1
-rw-r--r--src/librustc/ich/impls_syntax.rs16
-rw-r--r--src/librustc_errors/emitter.rs14
-rw-r--r--src/librustc_metadata/decoder.rs8
-rw-r--r--src/libsyntax/codemap.rs42
-rw-r--r--src/libsyntax/parse/lexer/mod.rs1
-rw-r--r--src/libsyntax_pos/Cargo.toml1
-rw-r--r--src/libsyntax_pos/lib.rs95
-rw-r--r--src/test/ui/codemap_tests/unicode.stderr2
-rw-r--r--src/test/ui/codemap_tests/unicode_2.rs17
-rw-r--r--src/test/ui/codemap_tests/unicode_2.stderr24
-rw-r--r--src/test/ui/codemap_tests/unicode_3.rs14
-rw-r--r--src/test/ui/codemap_tests/unicode_3.stderr10
-rw-r--r--src/test/ui/issue-44078.stderr2
14 files changed, 231 insertions, 16 deletions
diff --git a/src/Cargo.lock b/src/Cargo.lock
index 70266df79eb..f8418b77f61 100644
--- a/src/Cargo.lock
+++ b/src/Cargo.lock
@@ -2230,6 +2230,7 @@ version = "0.0.0"
dependencies = [
"rustc_data_structures 0.0.0",
"serialize 0.0.0",
+ "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs
index 799e790b85f..fea4e283db1 100644
--- a/src/librustc/ich/impls_syntax.rs
+++ b/src/librustc/ich/impls_syntax.rs
@@ -364,6 +364,7 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
end_pos: _,
ref lines,
ref multibyte_chars,
+ ref non_narrow_chars,
} = *self;
name.hash_stable(hcx, hasher);
@@ -389,6 +390,12 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
for &char_pos in multibyte_chars.iter() {
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
}
+
+ let non_narrow_chars = non_narrow_chars.borrow();
+ non_narrow_chars.len().hash_stable(hcx, hasher);
+ for &char_pos in non_narrow_chars.iter() {
+ stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
+ }
}
}
@@ -408,3 +415,12 @@ fn stable_multibyte_char(mbc: ::syntax_pos::MultiByteChar,
(pos.0 - filemap_start.0, bytes as u32)
}
+
+fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
+ filemap_start: ::syntax_pos::BytePos)
+ -> (u32, u32) {
+ let pos = swc.pos();
+ let width = swc.width();
+
+ (pos.0 - filemap_start.0, width as u32)
+}
diff --git a/src/librustc_errors/emitter.rs b/src/librustc_errors/emitter.rs
index 011be74ee7c..6c43c60686e 100644
--- a/src/librustc_errors/emitter.rs
+++ b/src/librustc_errors/emitter.rs
@@ -10,7 +10,7 @@
use self::Destination::*;
-use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan, CharPos};
+use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan};
use {Level, CodeSuggestion, DiagnosticBuilder, SubDiagnostic, CodeMapper, DiagnosticId};
use RenderSpan::*;
@@ -201,8 +201,8 @@ impl EmitterWriter {
// 6..7. This is degenerate input, but it's best to degrade
// gracefully -- and the parser likes to supply a span like
// that for EOF, in particular.
- if lo.col == hi.col && lo.line == hi.line {
- hi.col = CharPos(lo.col.0 + 1);
+ if lo.col_display == hi.col_display && lo.line == hi.line {
+ hi.col_display += 1;
}
let ann_type = if lo.line != hi.line {
@@ -210,8 +210,8 @@ impl EmitterWriter {
depth: 1,
line_start: lo.line,
line_end: hi.line,
- start_col: lo.col.0,
- end_col: hi.col.0,
+ start_col: lo.col_display,
+ end_col: hi.col_display,
is_primary: span_label.is_primary,
label: span_label.label.clone(),
};
@@ -221,8 +221,8 @@ impl EmitterWriter {
AnnotationType::Singleline
};
let ann = Annotation {
- start_col: lo.col.0,
- end_col: hi.col.0,
+ start_col: lo.col_display,
+ end_col: hi.col_display,
is_primary: span_label.is_primary,
label: span_label.label.clone(),
annotation_type: ann_type,
diff --git a/src/librustc_metadata/decoder.rs b/src/librustc_metadata/decoder.rs
index fa9b166ca94..b129356311c 100644
--- a/src/librustc_metadata/decoder.rs
+++ b/src/librustc_metadata/decoder.rs
@@ -1189,6 +1189,7 @@ impl<'a, 'tcx> CrateMetadata {
end_pos,
lines,
multibyte_chars,
+ non_narrow_chars,
.. } = filemap_to_import;
let source_length = (end_pos - start_pos).to_usize();
@@ -1206,6 +1207,10 @@ impl<'a, 'tcx> CrateMetadata {
for mbc in &mut multibyte_chars {
mbc.pos = mbc.pos - start_pos;
}
+ let mut non_narrow_chars = non_narrow_chars.into_inner();
+ for swc in &mut non_narrow_chars {
+ *swc = *swc - start_pos;
+ }
let local_version = local_codemap.new_imported_filemap(name,
name_was_remapped,
@@ -1213,7 +1218,8 @@ impl<'a, 'tcx> CrateMetadata {
src_hash,
source_length,
lines,
- multibyte_chars);
+ multibyte_chars,
+ non_narrow_chars);
debug!("CrateMetaData::imported_filemaps alloc \
filemap {:?} original (start_pos {:?} end_pos {:?}) \
translated (start_pos {:?} end_pos {:?})",
diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs
index ad78c550cf6..3464db2a811 100644
--- a/src/libsyntax/codemap.rs
+++ b/src/libsyntax/codemap.rs
@@ -242,7 +242,8 @@ impl CodeMap {
src_hash: u128,
source_len: usize,
mut file_local_lines: Vec<BytePos>,
- mut file_local_multibyte_chars: Vec<MultiByteChar>)
+ mut file_local_multibyte_chars: Vec<MultiByteChar>,
+ mut file_local_non_narrow_chars: Vec<NonNarrowChar>)
-> Rc<FileMap> {
let start_pos = self.next_start_pos();
let mut files = self.files.borrow_mut();
@@ -258,6 +259,10 @@ impl CodeMap {
mbc.pos = mbc.pos + start_pos;
}
+ for swc in &mut file_local_non_narrow_chars {
+ *swc = *swc + start_pos;
+ }
+
let filemap = Rc::new(FileMap {
name: filename,
name_was_remapped,
@@ -270,6 +275,7 @@ impl CodeMap {
end_pos,
lines: RefCell::new(file_local_lines),
multibyte_chars: RefCell::new(file_local_multibyte_chars),
+ non_narrow_chars: RefCell::new(file_local_non_narrow_chars),
});
files.push(filemap.clone());
@@ -297,6 +303,24 @@ impl CodeMap {
let line = a + 1; // Line numbers start at 1
let linebpos = (*f.lines.borrow())[a];
let linechpos = self.bytepos_to_file_charpos(linebpos);
+ let col = chpos - linechpos;
+
+ let col_display = {
+ let non_narrow_chars = f.non_narrow_chars.borrow();
+ let start_width_idx = non_narrow_chars
+ .binary_search_by_key(&linebpos, |x| x.pos())
+ .unwrap_or_else(|x| x);
+ let end_width_idx = non_narrow_chars
+ .binary_search_by_key(&pos, |x| x.pos())
+ .unwrap_or_else(|x| x);
+ let special_chars = end_width_idx - start_width_idx;
+ let non_narrow: usize =
+ non_narrow_chars[start_width_idx..end_width_idx]
+ .into_iter()
+ .map(|x| x.width())
+ .sum();
+ col.0 - special_chars + non_narrow
+ };
debug!("byte pos {:?} is on the line at byte pos {:?}",
pos, linebpos);
debug!("char pos {:?} is on the line at char pos {:?}",
@@ -306,14 +330,28 @@ impl CodeMap {
Loc {
file: f,
line,
- col: chpos - linechpos,
+ col,
+ col_display,
}
}
Err(f) => {
+ let col_display = {
+ let non_narrow_chars = f.non_narrow_chars.borrow();
+ let end_width_idx = non_narrow_chars
+ .binary_search_by_key(&pos, |x| x.pos())
+ .unwrap_or_else(|x| x);
+ let non_narrow: usize =
+ non_narrow_chars[0..end_width_idx]
+ .into_iter()
+ .map(|x| x.width())
+ .sum();
+ chpos.0 - end_width_idx + non_narrow
+ };
Loc {
file: f,
line: 0,
col: chpos,
+ col_display,
}
}
}
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index d9c3dbb630d..951163d35fa 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -433,6 +433,7 @@ impl<'a> StringReader<'a> {
self.filemap.record_multibyte_char(self.pos, new_ch_len);
}
}
+ self.filemap.record_width(self.pos, new_ch);
} else {
self.ch = None;
self.pos = new_pos;
diff --git a/src/libsyntax_pos/Cargo.toml b/src/libsyntax_pos/Cargo.toml
index dd8129bab51..aad2155157d 100644
--- a/src/libsyntax_pos/Cargo.toml
+++ b/src/libsyntax_pos/Cargo.toml
@@ -11,3 +11,4 @@ crate-type = ["dylib"]
[dependencies]
serialize = { path = "../libserialize" }
rustc_data_structures = { path = "../librustc_data_structures" }
+unicode-width = "0.1.4"
diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs
index 44e73d876e8..47755dc1d54 100644
--- a/src/libsyntax_pos/lib.rs
+++ b/src/libsyntax_pos/lib.rs
@@ -44,6 +44,8 @@ use serialize::{Encodable, Decodable, Encoder, Decoder};
extern crate serialize;
extern crate serialize as rustc_serialize; // used by deriving
+extern crate unicode_width;
+
pub mod hygiene;
pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind};
@@ -494,6 +496,63 @@ pub struct MultiByteChar {
pub bytes: usize,
}
+/// Identifies an offset of a non-narrow character in a FileMap
+#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
+pub enum NonNarrowChar {
+ /// Represents a zero-width character
+ ZeroWidth(BytePos),
+ /// Represents a wide (fullwidth) character
+ Wide(BytePos),
+}
+
+impl NonNarrowChar {
+ fn new(pos: BytePos, width: usize) -> Self {
+ match width {
+ 0 => NonNarrowChar::ZeroWidth(pos),
+ 2 => NonNarrowChar::Wide(pos),
+ _ => panic!("width {} given for non-narrow character", width),
+ }
+ }
+
+ /// Returns the absolute offset of the character in the CodeMap
+ pub fn pos(&self) -> BytePos {
+ match *self {
+ NonNarrowChar::ZeroWidth(p) |
+ NonNarrowChar::Wide(p) => p,
+ }
+ }
+
+ /// Returns the width of the character, 0 (zero-width) or 2 (wide)
+ pub fn width(&self) -> usize {
+ match *self {
+ NonNarrowChar::ZeroWidth(_) => 0,
+ NonNarrowChar::Wide(_) => 2,
+ }
+ }
+}
+
+impl Add<BytePos> for NonNarrowChar {
+ type Output = Self;
+
+ fn add(self, rhs: BytePos) -> Self {
+ match self {
+ NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
+ NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
+ }
+ }
+}
+
+impl Sub<BytePos> for NonNarrowChar {
+ type Output = Self;
+
+ fn sub(self, rhs: BytePos) -> Self {
+ match self {
+ NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
+ NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
+ }
+ }
+}
+
/// The state of the lazy external source loading mechanism of a FileMap.
#[derive(PartialEq, Eq, Clone)]
pub enum ExternalSource {
@@ -552,11 +611,13 @@ pub struct FileMap {
pub lines: RefCell<Vec<BytePos>>,
/// Locations of multi-byte characters in the source code
pub multibyte_chars: RefCell<Vec<MultiByteChar>>,
+ /// Width of characters that are not narrow in the source code
+ pub non_narrow_chars: RefCell<Vec<NonNarrowChar>>,
}
impl Encodable for FileMap {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
- s.emit_struct("FileMap", 7, |s| {
+ s.emit_struct("FileMap", 8, |s| {
s.emit_struct_field("name", 0, |s| self.name.encode(s))?;
s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?;
s.emit_struct_field("src_hash", 6, |s| self.src_hash.encode(s))?;
@@ -610,6 +671,9 @@ impl Encodable for FileMap {
})?;
s.emit_struct_field("multibyte_chars", 5, |s| {
(*self.multibyte_chars.borrow()).encode(s)
+ })?;
+ s.emit_struct_field("non_narrow_chars", 7, |s| {
+ (*self.non_narrow_chars.borrow()).encode(s)
})
})
}
@@ -618,7 +682,7 @@ impl Encodable for FileMap {
impl Decodable for FileMap {
fn decode<D: Decoder>(d: &mut D) -> Result<FileMap, D::Error> {
- d.read_struct("FileMap", 6, |d| {
+ d.read_struct("FileMap", 8, |d| {
let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?;
let name_was_remapped: bool =
d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?;
@@ -657,6 +721,8 @@ impl Decodable for FileMap {
})?;
let multibyte_chars: Vec<MultiByteChar> =
d.read_struct_field("multibyte_chars", 5, |d| Decodable::decode(d))?;
+ let non_narrow_chars: Vec<NonNarrowChar> =
+ d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?;
Ok(FileMap {
name,
name_was_remapped,
@@ -671,7 +737,8 @@ impl Decodable for FileMap {
src_hash,
external_src: RefCell::new(ExternalSource::AbsentOk),
lines: RefCell::new(lines),
- multibyte_chars: RefCell::new(multibyte_chars)
+ multibyte_chars: RefCell::new(multibyte_chars),
+ non_narrow_chars: RefCell::new(non_narrow_chars)
})
})
}
@@ -709,6 +776,7 @@ impl FileMap {
end_pos: Pos::from_usize(end_pos),
lines: RefCell::new(Vec::new()),
multibyte_chars: RefCell::new(Vec::new()),
+ non_narrow_chars: RefCell::new(Vec::new()),
}
}
@@ -798,6 +866,23 @@ impl FileMap {
self.multibyte_chars.borrow_mut().push(mbc);
}
+ pub fn record_width(&self, pos: BytePos, ch: char) {
+ let width = match ch {
+ '\t' | '\n' =>
+ // Tabs will consume one column.
+ // Make newlines take one column so that displayed spans can point them.
+ 1,
+ ch =>
+ // Assume control characters are zero width.
+ // FIXME: How can we decide between `width` and `width_cjk`?
+ unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0),
+ };
+ // Only record non-narrow characters.
+ if width != 1 {
+ self.non_narrow_chars.borrow_mut().push(NonNarrowChar::new(pos, width));
+ }
+ }
+
pub fn is_real_file(&self) -> bool {
!(self.name.starts_with("<") &&
self.name.ends_with(">"))
@@ -944,7 +1029,9 @@ pub struct Loc {
/// The (1-based) line number
pub line: usize,
/// The (0-based) column offset
- pub col: CharPos
+ pub col: CharPos,
+ /// The (0-based) column offset when displayed
+ pub col_display: usize,
}
/// A source code location used as the result of lookup_char_pos_adj
diff --git a/src/test/ui/codemap_tests/unicode.stderr b/src/test/ui/codemap_tests/unicode.stderr
index 0828fd28b58..02a9d7ee0ef 100644
--- a/src/test/ui/codemap_tests/unicode.stderr
+++ b/src/test/ui/codemap_tests/unicode.stderr
@@ -2,7 +2,7 @@ error: invalid ABI: expected one of [cdecl, stdcall, fastcall, vectorcall, thisc
--> $DIR/unicode.rs:11:8
|
11 | extern "路濫狼á́́" fn foo() {}
- | ^^^^^^^^
+ | ^^^^^^^^^
error: aborting due to previous error
diff --git a/src/test/ui/codemap_tests/unicode_2.rs b/src/test/ui/codemap_tests/unicode_2.rs
new file mode 100644
index 00000000000..cc3eae90f90
--- /dev/null
+++ b/src/test/ui/codemap_tests/unicode_2.rs
@@ -0,0 +1,17 @@
+// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![feature(non_ascii_idents)]
+
+fn main() {
+ let _ = ("a̐éö̲", 0u7);
+ let _ = ("아あ", 1i42);
+ let _ = a̐é;
+}
diff --git a/src/test/ui/codemap_tests/unicode_2.stderr b/src/test/ui/codemap_tests/unicode_2.stderr
new file mode 100644
index 00000000000..6cfa66730a2
--- /dev/null
+++ b/src/test/ui/codemap_tests/unicode_2.stderr
@@ -0,0 +1,24 @@
+error: invalid width `7` for integer literal
+ --> $DIR/unicode_2.rs:14:25
+ |
+14 | let _ = ("a̐éö̲", 0u7);
+ | ^^^
+ |
+ = help: valid widths are 8, 16, 32, 64 and 128
+
+error: invalid width `42` for integer literal
+ --> $DIR/unicode_2.rs:15:20
+ |
+15 | let _ = ("아あ", 1i42);
+ | ^^^^
+ |
+ = help: valid widths are 8, 16, 32, 64 and 128
+
+error[E0425]: cannot find value `a̐é` in this scope
+ --> $DIR/unicode_2.rs:16:13
+ |
+16 | let _ = a̐é;
+ | ^^ not found in this scope
+
+error: aborting due to 3 previous errors
+
diff --git a/src/test/ui/codemap_tests/unicode_3.rs b/src/test/ui/codemap_tests/unicode_3.rs
new file mode 100644
index 00000000000..5294eedb845
--- /dev/null
+++ b/src/test/ui/codemap_tests/unicode_3.rs
@@ -0,0 +1,14 @@
+// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+fn main() {
+ let s = "ZͨA͑ͦ͒͋ͤ͑̚L̄͑͋Ĝͨͥ̿͒̽̈́Oͥ͛ͭ!̏"; while true { break; }
+ println!("{}", s);
+}
diff --git a/src/test/ui/codemap_tests/unicode_3.stderr b/src/test/ui/codemap_tests/unicode_3.stderr
new file mode 100644
index 00000000000..a7514a6b792
--- /dev/null
+++ b/src/test/ui/codemap_tests/unicode_3.stderr
@@ -0,0 +1,10 @@
+warning: denote infinite loops with `loop { ... }`
+ --> $DIR/unicode_3.rs:12:45
+ |
+12 | let s = "ZͨA͑ͦ͒͋ͤ͑̚L̄͑͋Ĝͨͥ̿͒̽̈́Oͥ͛ͭ!̏"; while true { break; }
+ | ----------^^^^^^^^^^^
+ | |
+ | help: use `loop`
+ |
+ = note: #[warn(while_true)] on by default
+
diff --git a/src/test/ui/issue-44078.stderr b/src/test/ui/issue-44078.stderr
index 389f3b2479a..2ed4578d538 100644
--- a/src/test/ui/issue-44078.stderr
+++ b/src/test/ui/issue-44078.stderr
@@ -2,7 +2,7 @@ error: unterminated double quote string
--> $DIR/issue-44078.rs:12:8
|
12 | "😊"";
- | ________^
+ | _________^
13 | | }
| |__^