ee9256409f
contrib/ChangeLog 2019-12-09 Lewis Hyatt <lhyatt@gmail.com> PR preprocessor/49973 * unicode/from_glibc/unicode_utils.py: Support script from glibc (commit 464cd3) to extract character widths from Unicode data files. * unicode/from_glibc/utf8_gen.py: Likewise. * unicode/UnicodeData.txt: Unicode v. 12.1.0 data file. * unicode/EastAsianWidth.txt: Likewise. * unicode/PropList.txt: Likewise. * unicode/gen_wcwidth.py: New utility to generate libcpp/generated_cpp_wcwidth.h with help from the glibc support scripts and the Unicode data files. * unicode/unicode-license.txt: Added. * unicode/README: New explanatory file. libcpp/ChangeLog 2019-12-09 Lewis Hyatt <lhyatt@gmail.com> PR preprocessor/49973 * generated_cpp_wcwidth.h: New file generated by ../contrib/unicode/gen_wcwidth.py, supports new cpp_wcwidth function. * charset.c (compute_next_display_width): New function to help implement display columns. (cpp_byte_column_to_display_column): Likewise. (cpp_display_column_to_byte_column): Likewise. (cpp_wcwidth): Likewise. * include/cpplib.h (cpp_byte_column_to_display_column): Declare. (cpp_display_column_to_byte_column): Declare. (cpp_wcwidth): Declare. (cpp_display_width): New function. gcc/ChangeLog 2019-12-09 Lewis Hyatt <lhyatt@gmail.com> PR preprocessor/49973 * input.c (location_compute_display_column): New function to help with multibyte awareness in diagnostics. (test_cpp_utf8): New self-test. (input_c_tests): Call the new test. * input.h (location_compute_display_column): Declare. * diagnostic-show-locus.c: Pervasive changes to add multibyte awareness to all classes and functions. (enum column_unit): New enum. (class exploc_with_display_col): New class. (class layout_point): Convert m_column member to array m_columns[2]. (layout_range::contains_point): Add col_unit argument. (test_layout_range_for_single_point): Pass new argument. (test_layout_range_for_single_line): Likewise. (test_layout_range_for_multiple_lines): Likewise. (line_bounds::convert_to_display_cols): New function. (layout::get_state_at_point): Add col_unit argument. (make_range): Use empty filename rather than dummy filename. (get_line_width_without_trailing_whitespace): Rename to... (get_line_bytes_without_trailing_whitespace): ...this. (test_get_line_width_without_trailing_whitespace): Rename to... (test_get_line_bytes_without_trailing_whitespace): ...this. (class layout): m_exploc changed to exploc_with_display_col from plain expanded_location. (layout::get_linenum_width): New accessor member function. (layout::get_x_offset_display): Likewise. (layout::calculate_linenum_width): New subroutine for the constuctor. (layout::calculate_x_offset_display): Likewise. (layout::layout): Use the new subroutines. Add multibyte awareness. (layout::print_source_line): Add multibyte awareness. (layout::print_line): Likewise. (layout::print_annotation_line): Likewise. (line_label::line_label): Likewise. (layout::print_any_labels): Likewise. (layout::annotation_line_showed_range_p): Likewise. (get_printed_columns): Likewise. (class line_label): Rename m_length to m_display_width. (get_affected_columns): Rename to... (get_affected_range): ...this; add col_unit argument and multibyte awareness. (class correction): Add m_affected_bytes and m_display_cols members. Rename m_len to m_byte_length for clarity. Add multibyte awareness throughout. (correction::insertion_p): Add multibyte awareness. (correction::compute_display_cols): New function. (correction::ensure_terminated): Use new member name m_byte_length. (line_corrections::add_hint): Add multibyte awareness. (layout::print_trailing_fixits): Likewise. (layout::get_x_bound_for_row): Likewise. (test_one_liner_simple_caret_utf8): New self-test analogous to the one with _utf8 suffix removed, testing multibyte awareness. (test_one_liner_caret_and_range_utf8): Likewise. (test_one_liner_multiple_carets_and_ranges_utf8): Likewise. (test_one_liner_fixit_insert_before_utf8): Likewise. (test_one_liner_fixit_insert_after_utf8): Likewise. (test_one_liner_fixit_remove_utf8): Likewise. (test_one_liner_fixit_replace_utf8): Likewise. (test_one_liner_fixit_replace_non_equal_range_utf8): Likewise. (test_one_liner_fixit_replace_equal_secondary_range_utf8): Likewise. (test_one_liner_fixit_validation_adhoc_locations_utf8): Likewise. (test_one_liner_many_fixits_1_utf8): Likewise. (test_one_liner_many_fixits_2_utf8): Likewise. (test_one_liner_labels_utf8): Likewise. (test_diagnostic_show_locus_one_liner_utf8): Likewise. (test_overlapped_fixit_printing_utf8): Likewise. (test_overlapped_fixit_printing): Adapt for changes to get_affected_columns, get_printed_columns and class corrections. (test_overlapped_fixit_printing_2): Likewise. (test_linenum_sep): New constant. (test_left_margin): Likewise. (test_offset_impl): Helper function for new test. (test_layout_x_offset_display_utf8): New test. (diagnostic_show_locus_c_tests): Call new tests. gcc/testsuite/ChangeLog: 2019-12-09 Lewis Hyatt <lhyatt@gmail.com> PR preprocessor/49973 * gcc.dg/plugin/diagnostic_plugin_test_show_locus.c (test_show_locus): Tweak so that expected output is the same as before the diagnostic-show-locus.c changes. * gcc.dg/cpp/pr66415-1.c: Likewise. From-SVN: r279137
107 lines
3.2 KiB
Python
Executable File
107 lines
3.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# Script to generate tables for cpp_wcwidth, leveraging glibc's utf8_gen.py.
|
|
#
|
|
# This file is part of GCC.
|
|
#
|
|
# GCC is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free
|
|
# Software Foundation; either version 3, or (at your option) any later
|
|
# version.
|
|
#
|
|
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
# for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with GCC; see the file COPYING3. If not see
|
|
# <http://www.gnu.org/licenses/>. */
|
|
|
|
import sys
|
|
import os
|
|
|
|
if len(sys.argv) != 2:
|
|
print("usage: %s <unicode version>", file=sys.stderr)
|
|
sys.exit(1)
|
|
unicode_version = sys.argv[1]
|
|
|
|
# Parse a codepoint in the format output by glibc tools.
|
|
def parse_ucn(s):
|
|
if not (s.startswith("<U") and s.endswith(">")):
|
|
raise ValueError
|
|
return int(s[2:-1], base=16)
|
|
|
|
# Process a line of width output from utf_gen.py and update global array.
|
|
widths = [1] * (1 + 0x10FFFF)
|
|
def process_width(line):
|
|
# Example lines:
|
|
# <UA8FF> 0
|
|
# <UA926>...<UA92D> 0
|
|
|
|
s = line.split()
|
|
width = int(s[1])
|
|
r = s[0].split("...")
|
|
if len(r) == 1:
|
|
begin = parse_ucn(r[0])
|
|
end = begin + 1
|
|
elif len(r) == 2:
|
|
begin = parse_ucn(r[0])
|
|
end = parse_ucn(r[1]) + 1
|
|
else:
|
|
raise ValueError
|
|
widths[begin:end] = [width] * (end - begin)
|
|
|
|
# To keep things simple, we use glibc utf8_gen.py as-is. It only outputs to a
|
|
# file named UTF-8, which is not configurable. Then we parse this into the form
|
|
# we want it.
|
|
os.system("from_glibc/utf8_gen.py --unicode_version %s" % unicode_version)
|
|
processing = False
|
|
for line in open("UTF-8", "r"):
|
|
if processing:
|
|
if line == "END WIDTH\n":
|
|
processing = False
|
|
else:
|
|
try:
|
|
process_width(line)
|
|
except (ValueError, IndexError):
|
|
print(e, "warning: ignored unexpected line: %s" % line,
|
|
file=sys.stderr, end="")
|
|
elif line == "WIDTH\n":
|
|
processing = True
|
|
|
|
# All bytes < 256 we treat as width 1.
|
|
widths[0:255] = [1] * 255
|
|
|
|
# Condense the list to contiguous ranges.
|
|
cur_range = [-1, 1]
|
|
all_ranges = []
|
|
for i, width in enumerate(widths):
|
|
if width == cur_range[1]:
|
|
cur_range[0] = i
|
|
else:
|
|
all_ranges.append(cur_range)
|
|
cur_range = [i, width]
|
|
|
|
# Output the arrays for generated_cpp_wcwidth.h
|
|
print("/* Generated by contrib/unicode/gen_wcwidth.py,",
|
|
"with the help of glibc's")
|
|
print(" utf8_gen.py, using version %s" % unicode_version,
|
|
"of the Unicode standard. */")
|
|
print("\nstatic const cppchar_t wcwidth_range_ends[] = {", end="")
|
|
for i, r in enumerate(all_ranges):
|
|
if i % 8:
|
|
print(" ", end="")
|
|
else:
|
|
print("\n ", end="")
|
|
print("0x%x," % (r[0]), end="")
|
|
print("\n};\n")
|
|
print("static const unsigned char wcwidth_widths[] = {", end="")
|
|
for i, r in enumerate(all_ranges):
|
|
if i % 24:
|
|
print(" ", end="")
|
|
else:
|
|
print("\n ", end="")
|
|
print("%d," % r[1], end="")
|
|
print("\n};")
|