summarylogtreecommitdiffstats
path: root/chromium-use-fromUTF8-for-UnicodeString-construction.patch
blob: 0e0765a27a0fb02f8337680713a930fcf09e8428 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
From e58fa0ba66272c5f28828b15d06c7e42a9882b3b Mon Sep 17 00:00:00 2001
From: Jungshik Shin <jshin@chromium.org>
Date: Sat, 16 Dec 2017 04:19:27 +0000
Subject: [PATCH] Use fromUTF8() for UnicodeString construction from UTF-8

Chrome's copy of ICU is built with U_CHARSET_IS_UTF8=1 so that |char *|
buffer is treated as UTF-8 when constructing UnicodeString() regardless
of the default encoding of the current locale on Linux or non-Unicode code
page on Windows.

However, some Linux distros do not set U_CHARSET_IS_UTF=1 when building
ICU and Chromium build with system_icu crashes when Chromium is run in
non-UTF-8 locale (e.g. 'C').

To make Chromium work in a non-UTF-8 locale (which is pretty rare these
days), use 'icu::UnicodeString::fromUTF8(StringPiece)' instead of
'icu::UnicodeString(const char*)'.

Bug: 772655
Test: components_unittests --gtest_filter=*IDN*
Test: Chromium built with system_icu does not crash in C locale.
Change-Id: I0daa284ec06b8e83814fc70eb8e9e5c96444ebfa
Reviewed-on: https://chromium-review.googlesource.com/831247
Reviewed-by: Peter Kasting <pkasting@chromium.org>
Commit-Queue: Jungshik Shin <jshin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#524586}
---
 components/url_formatter/idn_spoof_checker.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/components/url_formatter/idn_spoof_checker.cc b/components/url_formatter/idn_spoof_checker.cc
index a88c5e8f8331..aee748d8a4d5 100644
--- a/components/url_formatter/idn_spoof_checker.cc
+++ b/components/url_formatter/idn_spoof_checker.cc
@@ -110,8 +110,8 @@ IDNSpoofChecker::IDNSpoofChecker() {
 
   // These Cyrillic letters look like Latin. A domain label entirely made of
   // these letters is blocked as a simplified whole-script-spoofable.
-  cyrillic_letters_latin_alike_ =
-      icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status);
+  cyrillic_letters_latin_alike_ = icu::UnicodeSet(
+      icu::UnicodeString::fromUTF8("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status);
   cyrillic_letters_latin_alike_.freeze();
 
   cyrillic_letters_ =
@@ -141,8 +141,8 @@ IDNSpoofChecker::IDNSpoofChecker() {
   UParseError parse_error;
   diacritic_remover_.reset(icu::Transliterator::createFromRules(
       UNICODE_STRING_SIMPLE("DropAcc"),
-      icu::UnicodeString("::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;"
-                         " ł > l; ø > o; đ > d;"),
+      icu::UnicodeString::fromUTF8("::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;"
+                                   " ł > l; ø > o; đ > d;"),
       UTRANS_FORWARD, parse_error, status));
 
   // Supplement the Unicode confusable list by the following mapping.
@@ -158,7 +158,7 @@ IDNSpoofChecker::IDNSpoofChecker() {
   //   - U+0D1F (ട) => s
   extra_confusable_mapper_.reset(icu::Transliterator::createFromRules(
       UNICODE_STRING_SIMPLE("ExtraConf"),
-      icu::UnicodeString(
+      icu::UnicodeString::fromUTF8(
           "ӏ > l; [кĸκ] > k; п > n; [ƅь] > b; в > b; м > m; н > h; "
           "т > t; [шщ] > w; ട > s;"),
       UTRANS_FORWARD, parse_error, status));
-- 
2.15.1