Attachment 875923 Details for Bug 917618 – ICU_74.1.patch

[patch] ICU_74.1.patch

ICU_74.1.patch (text/plain), 34.61 KB, created by jospezial on 2023-11-29 17:27:47 UTC

(hide)

Description:

Filename:

MIME Type:

Creator: jospezial

Created: 2023-11-29 17:27:47 UTC

Size: 34.61 KB

patch

obsolete

>From 6a2b1d989cbc0bc13d3e0905ca96cdcc2ccbaea7 Mon Sep 17 00:00:00 2001
>From: Taichi Haradaguchi <20001722@ymail.ne.jp>
>Date: Tue, 31 Oct 2023 19:46:23 +0900
>Subject: [PATCH] tdf#158108: Update to ICU 74.1
>
>https://icu.unicode.org/download/74
>
>Unicode 15.1
>https://blog.unicode.org/2023/09/announcing-unicode-standard-version-151.html
>
>CLDR 44
>https://cldr.unicode.org/index/downloads/cldr-44
>
>New Unicode blocks:
>UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I
>
>Update "i18npool/source/breakiterator/data/line.txt" and "i18npool/source/breakiterator/data/sent.txt".
>These were backported from <https://github.com/unicode-org/icu/tree/main/icu4c/source/data/brkitr/rules>.
>
>Change-Id: Ic9196e10138663d07235f5ebd9cc4bf3a9750824
>---
>
>diff --git a/configure.ac b/configure.ac
>index e41223f..ff72549 100644
>--- a/configure.ac
>+++ b/configure.ac
>@@ -10803,8 +10803,8 @@
> dnl ===================================================================
> dnl Check for system icu
> dnl ===================================================================
>-ICU_MAJOR=73
>-ICU_MINOR=2
>+ICU_MAJOR=74
>+ICU_MINOR=1
> ICU_CFLAGS_internal="-I${WORKDIR}/UnpackedTarball/icu/source/i18n -I${WORKDIR}/UnpackedTarball/icu/source/common"
> ICU_LIBS_internal="-L${WORKDIR}/UnpackedTarball/icu/source/lib"
> libo_CHECK_SYSTEM_MODULE([icu],[ICU],[icu-i18n >= 66])
>diff --git a/download.lst b/download.lst
>index 60ac664..faccfc7 100644
>--- a/download.lst
>+++ b/download.lst
>@@ -312,10 +312,10 @@
> # three static lines
> # so that git cherry-pick
> # will not run into conflicts
>-ICU_SHA256SUM := 818a80712ed3caacd9b652305e01afc7fa167e6f2e94996da44b90c2ab604ce1
>-ICU_TARBALL := icu4c-73_2-src.tgz
>-ICU_DATA_SHA256SUM := ca1ee076163b438461e484421a7679fc33a64cd0a54f9d4b401893fa1eb42701
>-ICU_DATA_TARBALL := icu4c-73_2-data.zip
>+ICU_SHA256SUM := 86ce8e60681972e60e4dcb2490c697463fcec60dd400a5f9bffba26d0b52b8d0
>+ICU_TARBALL := icu4c-74_1-src.tgz
>+ICU_DATA_SHA256SUM := 67d5ab39c5187e1dd0fed60a3fe52794dce9784b4c045cb85e19f5d317fd783f
>+ICU_DATA_TARBALL := icu4c-74_1-data.zip
> # three static lines
> # so that git cherry-pick
> # will not run into conflicts
>diff --git a/external/icu/icu4c-khmerbreakengine.patch.1 b/external/icu/icu4c-khmerbreakengine.patch.1
>index 6059140..db8ac50 100644
>--- a/external/icu/icu4c-khmerbreakengine.patch.1
>+++ b/external/icu/icu4c-khmerbreakengine.patch.1
>@@ -796,7 +796,6 @@
>              if (wordCount < limit) {
>                  if (values != nullptr) {
>                      values[wordCount] = bt.getValue();
>-
> diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata.h
> --- icu.org/source/common/dictionarydata.h	2023-06-14 06:23:55.000000000 +0900
> +++ icu/source/common/dictionarydata.h	2023-06-26 17:43:53.097724900 +0900
>diff --git a/i18npool/source/breakiterator/data/line.txt b/i18npool/source/breakiterator/data/line.txt
>index ff3f3ea..538e386 100644
>--- a/i18npool/source/breakiterator/data/line.txt
>+++ b/i18npool/source/breakiterator/data/line.txt
>@@ -1,177 +1,112 @@
>-# Copyright (c) 2002-2006  International Business Machines Corporation and
>+# Copyright (C) 2016 and later: Unicode, Inc. and others.
>+# License & terms of use: http://www.unicode.org/copyright.html
>+# Copyright (c) 2002-2016  International Business Machines Corporation and
> # others. All Rights Reserved.
> #
> #  file:  line.txt
> #
> #         Line Breaking Rules
>-#         Implement default line breaking as defined by Unicode Standard Annex #14 version 5.0.0
>-#         http://www.unicode.org/reports/tr14/
>-
>-
>+#         Implement default line breaking as defined by
>+#         Unicode Standard Annex #14 (https://www.unicode.org/reports/tr14/)
>+#         for Unicode 14.0, with the following modification:
>+#
>+#         Boundaries between hyphens and following letters are suppressed when
>+#         there is a boundary preceding the hyphen. See rule 20.9
>+#
>+#         This corresponds to CSS line-break=strict (BCP47 -u-lb-strict).
>+#         It sets characters of class CJ to behave like NS.
> 
> #
> #  Character Classes defined by TR 14.
> #
> 
> !!chain;
>-!!LBCMNoChain;
>-
>-
>-!!lookAheadHardBreak;
>-#
>-#  !!lookAheadHardBreak    Described here because it is (as yet) undocumented elsewhere
>-#                          and only used for the line break rules.
>-#
>-#           It is used in the implementation of the incredibly annoying rule LB 10
>-#           which says to treat any combining mark that is not attached to a base
>-#           character as if it were of class AL  (alphabetic).
>-#
>-#           The problem occurs in the reverse rules.
>-#
>-#           Consider a sequence like, with correct breaks as shown
>-#               LF  ID  CM  AL  AL
>-#                  ^       ^       ^
>-#           Then consider the sequence without the initial ID (ideographic)
>-#                 LF  CM  AL  AL
>-#                    ^           ^
>-#           Our CM, which in the first example was attached to the ideograph,
>-#           is now unattached, becomes an alpha, and joins in with the other
>-#           alphas.
>-#
>-#           When iterating forwards, these sequences do not present any problems
>-#           When iterating backwards, we need to look ahead when encountering
>-#           a CM to see whether it attaches to something further on or not.
>-#           (Look-ahead in a reverse rule is looking towards the start)
>-#
>-#           If the CM is unattached, we need to force a break.
>-#
>-#           !!lookAheadHardBreak forces the run time state machine to
>-#           stop immediately when a look ahead rule ( '/' operator) matches,
>-#           and set the match position to that of the look-ahead operator,
>-#           no matter what other rules may be in play at the time.
>-#
>-#           See rule LB 19 for an example.
>-#
>+!!quoted_literals_only;
> 
> $AI = [:LineBreak =  Ambiguous:];
>-$DG = \u00B0;
>-$AL = [[:LineBreak =  Alphabetic:] $DG];
>+$AK = [:LineBreak =  Aksara:];
>+$AL = [:LineBreak =  Alphabetic:];
>+$AP = [:LineBreak =  Aksara_Prebase:];
>+$AS = [:LineBreak =  Aksara_Start:];
> $BA = [:LineBreak =  Break_After:];
>+$HH = [\u2010];     # \u2010 is HYPHEN, default line break is BA.
> $BB = [:LineBreak =  Break_Before:];
> $BK = [:LineBreak =  Mandatory_Break:];
> $B2 = [:LineBreak =  Break_Both:];
> $CB = [:LineBreak =  Contingent_Break:];
> $CJ = [:LineBreak =  Conditional_Japanese_Starter:];
>-$CL = [[:LineBreak =  Close_Punctuation:] [:LineBreak = Close_Parenthesis:]]; # tdf#31271
>-$CM = [:LineBreak =  Combining_Mark:];
>+$CL = [:LineBreak =  Close_Punctuation:];
>+# $CM = [:LineBreak =  Combining_Mark:];
>+$CP = [:LineBreak =  Close_Parenthesis:];
> $CR = [:LineBreak =  Carriage_Return:];
>+$EB = [:LineBreak =  EB:];
>+$EM = [:LineBreak =  EM:];
> $EX = [:LineBreak =  Exclamation:];
> $GL = [:LineBreak =  Glue:];
> $HL = [:LineBreak =  Hebrew_Letter:];
> $HY = [:LineBreak =  Hyphen:];
> $H2 = [:LineBreak =  H2:];
> $H3 = [:LineBreak =  H3:];
>-$ID = [[:LineBreak =  Ideographic:] - [\ufe30]];
>-$IN = [:LineBreak =  Inseparable:];
>-$IS = [[:LineBreak =  Infix_Numeric:] [\ufe30]];
>+$ID = [:LineBreak =  Ideographic:];
>+$IN = [:LineBreak =  Inseperable:];
>+$IS = [:LineBreak =  Infix_Numeric:];
> $JL = [:LineBreak =  JL:];
> $JV = [:LineBreak =  JV:];
> $JT = [:LineBreak =  JT:];
> $LF = [:LineBreak =  Line_Feed:];
> $NL = [:LineBreak =  Next_Line:];
>+# NS includes CJ for CSS strict line breaking.
> $NS = [[:LineBreak =  Nonstarter:] $CJ];
> $NU = [:LineBreak =  Numeric:];
>-$OP = [[:LineBreak =  Open_Punctuation:] - $DG];
>+$OP = [:LineBreak =  Open_Punctuation:];
> $PO = [:LineBreak =  Postfix_Numeric:];
>-$BS = \u005C;
>-$PR = [[:LineBreak =  Prefix_Numeric:] - $BS];
>+$PR = [:LineBreak =  Prefix_Numeric:];
> $QU = [:LineBreak =  Quotation:];
>+$RI = [:LineBreak =  Regional_Indicator:];
> $SA = [:LineBreak =  Complex_Context:];
> $SG = [:LineBreak =  Surrogate:];
> $SP = [:LineBreak =  Space:];
>-$SY = [[:LineBreak =  Break_Symbols:] $BS];
>+$SY = [:LineBreak =  Break_Symbols:];
>+$VF = [:LineBreak =  Virama_Final:];
>+$VI = [:LineBreak =  Virama:];
> $WJ = [:LineBreak =  Word_Joiner:];
> $XX = [:LineBreak =  Unknown:];
> $ZW = [:LineBreak =  ZWSpace:];
>+$ZWJ = [:LineBreak = ZWJ:];
>+
>+# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
>+# without a formal name. Because ICU rules require multiple uses of the expressions,
>+# give them a single definition with a name
>+
>+$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
>+$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
>+
>+$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
>+
>+# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
>+#         list it in the numerous rules that use CM.
>+# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
>+
>+$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
>+$CMX = [[$CM] - [$ZWJ]];
> 
> #   Dictionary character set, for triggering language-based break engines. Currently
>-#   limited to LineBreak=Complex_Context. Note that this set only works in Unicode
>-#   5.0 or later as the definition of Complex_Context was corrected to include all
>-#   characters requiring dictionary break.
>+#   limited to LineBreak=Complex_Context (SA).
> 
>-$dictionary = [:LineBreak = Complex_Context:];
>+$dictionary = [$SA];
> 
> #
> #  Rule LB1.  By default, treat AI  (characters with ambiguous east Asian width),
>-#                               SA  (South East Asian: Thai, Lao, Khmer)
>+#                               SA  (Dictionary chars, excluding Mn and Mc)
> #                               SG  (Unpaired Surrogates)
> #                               XX  (Unknown, unassigned)
> #                         as $AL  (Alphabetic)
> #
>-$ALPlus = [$AL $AI $SA $SG $XX];
>+$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]];
> 
>-#
>-#  Combining Marks.   X $CM*  behaves as if it were X.  Rule LB6.
>-#
>-$ALcm = $ALPlus $CM*;
>-$BAcm = $BA $CM*;
>-$BBcm = $BB $CM*;
>-$B2cm = $B2 $CM*;
>-$CLcm = $CL $CM*;
>-$EXcm = $EX $CM*;
>-$GLcm = $GL $CM*;
>-$HLcm = $HL $CM*;
>-$HYcm = $HY $CM*;
>-$H2cm = $H2 $CM*;
>-$H3cm = $H3 $CM*;
>-$IDcm = $ID $CM*;
>-$INcm = $IN $CM*;
>-$IScm = $IS $CM*;
>-$JLcm = $JL $CM*;
>-$JVcm = $JV $CM*;
>-$JTcm = $JT $CM*;
>-$NScm = $NS $CM*;
>-$NUcm = $NU $CM*;
>-$OPcm = $OP $CM*;
>-$POcm = $PO $CM*;
>-$PRcm = $PR $CM*;
>-$QUcm = $QU $CM*;
>-$SYcm = $SY $CM*;
>-$WJcm = $WJ $CM*;
> 
> ## -------------------------------------------------
> 
>-!!forward;
>-
>-#
>-#  Each class of character can stand by itself as an unbroken token, with trailing combining stuff
>-#
>-$ALPlus $CM+;
>-$BA $CM+;
>-$BB $CM+;
>-$B2 $CM+;
>-$CL $CM+;
>-$EX $CM+;
>-$GL $CM+;
>-$HL $CM+;
>-$HY $CM+;
>-$H2 $CM+;
>-$H3 $CM+;
>-$ID $CM+;
>-$IN $CM+;
>-$IS $CM+;
>-$JL $CM+;
>-$JV $CM+;
>-$JT $CM+;
>-$NS $CM+;
>-$NU $CM+;
>-$OP $CM+;
>-$PO $CM+;
>-$PR $CM+;
>-$QU $CM+;
>-$SY $CM+;
>-$WJ $CM+;
>-
> #
> # CAN_CM  is the set of characters that may combine with CM combining chars.
> #         Note that Linebreak UAX 14's concept of a combining char and the rules
>@@ -186,19 +121,15 @@
> #
> # AL_FOLLOW  set of chars that can unconditionally follow an AL
> #            Needed in rules where stand-alone $CM s are treated as AL.
>-#            Chaining is disabled with CM because it causes other failures,
>-#            so for this one case we need to manually list out longer sequences.
> #
>-$AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];
>-$AL_FOLLOW_CM   = [$CL $EX $HL $IS $SY $WJ $GL $QU $BA $HY $NS $IN $NU $ALPlus $OP];
>-$AL_FOLLOW      = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];
>+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
> 
> 
> #
> #  Rule LB 4, 5    Mandatory (Hard) breaks.
> #
> $LB4Breaks    = [$BK $CR $LF $NL];
>-$LB4NonBreaks = [^$BK $CR $LF $NL];
>+$LB4NonBreaks = [^$BK $CR $LF $NL $CM];
> $CR $LF {100};
> 
> #
>@@ -206,91 +137,134 @@
> #
> $LB4NonBreaks?  $LB4Breaks {100};    # LB 5  do not break before hard breaks.
> $CAN_CM $CM*    $LB4Breaks {100};
>-$CM+            $LB4Breaks {100};
>+^$CM+           $LB4Breaks {100};
> 
> # LB 7         x SP
> #              x ZW
> $LB4NonBreaks [$SP $ZW];
> $CAN_CM $CM*  [$SP $ZW];
>-$CM+          [$SP $ZW];
>+^$CM+         [$SP $ZW];
> 
> #
> # LB 8         Break after zero width space
>+#              ZW SP* Ã·
> #
> $LB8Breaks    = [$LB4Breaks $ZW];
> $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
>+$ZW $SP* / [^$SP $ZW $LB4Breaks];
> 
>+# LB 8a        ZWJ x            Do not break Emoji ZWJ sequences.
>+#
>+$ZWJ [^$CM];
> 
>-# LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL 
>-#                                $CM not covered by the above needs to behave like $AL   
>+# LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
>+#                                $CM not covered by the above needs to behave like $AL
> #                                See definition of $CAN_CM.
> 
> $CAN_CM $CM+;                   #  Stick together any combining sequences that don't match other rules.
>-$CM+;
>+^$CM+;
> 
> #
> # LB 11  Do not break before or after WORD JOINER & related characters.
> #
>-$CAN_CM $CM*  $WJcm;
>-$LB8NonBreaks $WJcm;
>-$CM+          $WJcm;
>+$CAN_CM $CM*  $WJ;
>+$LB8NonBreaks $WJ;
>+^$CM+         $WJ;
> 
>-$WJcm [^$CAN_CM];
>-$WJcm $CAN_CM $CM*;
>+$WJ $CM* .;
> 
> #
>-# LB 12  Do not break before or after NBSP and related characters.
>-#
>-#         (!SP) x GL
>-[$LB8NonBreaks-$SP] $CM* $GLcm;
>-$CM+               $GLcm;
>-
>+# LB 12  Do not break after NBSP and related characters.
> #         GL  x
>-$GLcm ($LB8Breaks | $SP);
>-$GLcm [$LB8NonBreaks-$SP] $CM*;     # Don't let a combining mark go onto $CR, $BK, etc.
>-                              #  TODO:  I don't think we need this rule.
>-                              #         All but $CM will chain off of preceding rule.
>-                              #         $GLcm will pick up the CM case by itself.
>-
>-
>-
>+#
>+$GL $CM* .;
> 
> #
>-# LB 13   Don't break before ']' or '!' or ';' or '/', even after spaces.
>+# LB 12a  Do not break before NBSP and related characters ...
>+#            [^SP BA HY] x GL
>+#
>+[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GL;
>+^$CM+ $GL;
>+
>+
>+
>+
>+# LB 13   Don't break before ']' or '!' or '/', even after spaces.
> #
> $LB8NonBreaks $CL;
> $CAN_CM $CM*  $CL;
>-$CM+          $CL;              # by rule 10, stand-alone CM behaves as AL
>+^$CM+         $CL;              # by rule 10, stand-alone CM behaves as AL
>+
>+$LB8NonBreaks $CP;
>+$CAN_CM $CM*  $CP;
>+^$CM+         $CP;              # by rule 10, stand-alone CM behaves as AL
> 
> $LB8NonBreaks $EX;
> $CAN_CM $CM*  $EX;
>-$CM+          $EX;              # by rule 10, stand-alone CM behaves as AL
>-
>-$LB8NonBreaks $IS;
>-$CAN_CM $CM*  $IS;
>-$CM+          $IS;              # by rule 10, stand-alone CM behaves as AL
>+^$CM+         $EX;              # by rule 10, stand-alone CM behaves as AL
> 
> $LB8NonBreaks $SY;
> $CAN_CM $CM*  $SY;
>-$CM+          $SY;              # by rule 10, stand-alone CM behaves as AL
>+^$CM+         $SY;              # by rule 10, stand-alone CM behaves as AL
> 
> 
> #
>-# LB 14  Do not break after OP, even after spaced
>+# LB 14  Do not break after OP, even after spaces
>+#        Note subtle interaction with "SP IS /" rules in LB14a.
>+#        This rule consumes the SP, chaining happens on the IS, effectivley overriding the  SP IS rules,
>+#        which is the desired behavior.
> #
>-$OPcm $SP* $CAN_CM $CM*;
>-$OPcm $SP* $CANT_CM;
>+$OP $CM* $SP* .;
> 
>-$OPcm $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
>+$OP $CM* $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
>+                                   # by rule 8, CM following a SP is stand-alone.
> 
>-# LB 15
>-# $QUcm $SP* $OPcm;
>+
>+# LB 15a
>+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ .;
>+($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?;
>+^([\p{Pi} & $QU] $CM* $SP*)+ .;
>+^([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?;
>+
>+# LB 15b
>+$LB8NonBreaks [\p{Pf} & $QU] $CM* [$SP $GL $WJ $CL $QU $CP $EX $IS $SY $BK $CR $LF $NL $ZW {eof}];
>+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* [$SP $GL $WJ $CL $QU $CP $EX $IS $SY $BK $CR $LF $NL $ZW {eof}];
>+^$CM+  [\p{Pf} & $QU] $CM* [$SP $GL $WJ $CL $QU $CP $EX $IS $SY $BK $CR $LF $NL $ZW {eof}];
>+
>+# Messy interaction: manually chain between LB 15b and LB 15a on Pf Pi.
>+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ .;
>+$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?;
>+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ .;
>+$CAN_CM $CM*  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?;
>+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ .;
>+^$CM+  [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?;
>+
>+
>+# LB 15c Force a break before start of a number with a leading decimal pt, e.g. " .23"
>+#        Note: would be simpler to express as "$SP / $IS $CM* $NU;", but ICU rules have limitations.
>+#        See issue ICU-20303
>+
>+
>+$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
>+$SP $IS           / [^ $CanFollowIS $NU $CM];
>+$SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];
>+
>+#
>+# LB 15d Do not break before numeric separators (IS), even after spaces.
>+
>+[$LB8NonBreaks - $SP] $IS;
>+$SP $IS $CM* [$CanFollowIS {eof}];
>+$SP $IS $CM* $ZWJ [^$CM $NU];
>+
>+$CAN_CM $CM*  $IS;
>+^$CM+         $IS;              # by rule 10, stand-alone CM behaves as AL
>+
> 
> # LB 16
>-$CLcm $SP* $NScm;
>+($CL | $CP) $CM* $SP* $NS;
> 
> # LB 17
>-$B2cm $SP* $B2cm;
>+$B2 $CM* $SP* $B2;
> 
> #
> # LB 18  Break after spaces.
>@@ -301,347 +275,115 @@
> 
> # LB 19
> #         x QU
>-$LB18NonBreaks $CM* $QUcm;
>-$CM+                $QUcm;
>+$LB18NonBreaks $CM* $QU;
>+^$CM+               $QU;
> 
> #         QU  x
>-$QUcm .?;
>-$QUcm $LB18NonBreaks $CM*;    # Don't let a combining mark go onto $CR, $BK, etc.
>-                              #  TODO:  I don't think this rule is needed.
>-
>+$QU $CM* .;
> 
> # LB 20
> #        <break>  $CB
> #        $CB   <break>
>-
>+#
> $LB20NonBreaks = [$LB18NonBreaks - $CB];
> 
>+# LB 20.09    Don't break between Hyphens and Letters when there is a break preceding the hyphen.
>+#             Originally added as a Finnish tailoring, now promoted to default ICU behavior.
>+#             Note: this is not default UAX-14 behaviour. See issue ICU-8151.
>+#
>+^($HY | $HH) $CM* $ALPlus;
>+
> # LB 21        x   (BA | HY | NS)
> #           BB x
> #
>-$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm); 
>+$LB20NonBreaks $CM* ($BA | $HY | $NS);
> 
>-$BBcm [^$CB];                                  #  $BB  x
>-$BBcm $LB20NonBreaks $CM*;
>+
>+^$CM+ ($BA | $HY | $NS);
>+
>+$BB $CM* [^$CB];                                  #  $BB  x
>+$BB $CM* $LB20NonBreaks;
> 
> # LB 21a Don't break after Hebrew + Hyphen
> #   HL (HY | BA) x
>-#  
>-$HLcm ($HYcm | $BAcm) [^$CB]?;
>+#
>+$HL $CM* ($HY | $BA) $CM* [^$CB]?;
> 
>-# LB 22
>-($ALcm | $HLcm) $INcm;
>-$CM+     $INcm;     #  by rule 10, any otherwise unattached CM behaves as AL
>-$IDcm    $INcm;
>-$INcm    $INcm;
>-$NUcm    $INcm;
>+# LB 21b (forward) Don't break between SY and HL
>+# (break between HL and SY already disallowed by LB 13 above)
>+$SY $CM* $HL;
>+
>+# LB 22  Do not break before ellipses
>+#
>+$LB20NonBreaks $CM*    $IN;
>+^$CM+ $IN;
> 
> 
>-# $LB 23
>-$IDcm  $POcm;
>-$ALcm  $NUcm;       # includes $LB19
>-$HLcm  $NUcm;
>-$CM+   $NUcm;       # Rule 10, any otherwise unattached CM behaves as AL
>-$NUcm  $ALcm;
>-$NUcm  $HLcm;
>+# LB 23
>+#
>+($ALPlus | $HL) $CM* $NU;
>+^$CM+  $NU;       # Rule 10, any otherwise unattached CM behaves as AL
>+$NU $CM* ($ALPlus | $HL);
>+
>+# LB 23a
>+#
>+$PR $CM* ($ID | $EB | $EM);
>+($ID | $EB | $EM) $CM*  $PO;
>+
> 
> #
> # LB 24
> #
>-$PRcm $IDcm;
>-$ALcm $PRcm;
>-$PRcm ($ALcm | $HLcm);
>-$POcm ($ALcm | $HLcm);
>+($PR | $PO) $CM* ($ALPlus | $HL);
>+($ALPlus | $HL) $CM* ($PR | $PO);
>+^$CM+ ($PR | $PO);       # Rule 10, any otherwise unattached CM behaves as AL
> 
> #
> # LB 25   Numbers.
> #
>-($PRcm | $POcm)? ($OPcm)? $NUcm ($NUcm | $SYcm | $IScm)* $CLcm? ($PRcm | $POcm)?;
>+(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? ($IS $CM*)? $NU ($CM* ($NU | $SY | $IS))*
>+    ($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
> 
> # LB 26  Do not break a Korean syllable
> #
>-$JLcm ($JLcm | $JVcm | $H2cm | $H3cm);
>-($JVcm | $H2cm) ($JVcm | $JTcm);
>-($JTcm | $H3cm) $JTcm;
>+$JL $CM* ($JL | $JV | $H2 | $H3);
>+($JV | $H2) $CM* ($JV | $JT);
>+($JT | $H3) $CM* $JT;
> 
> # LB 27  Treat korean Syllable Block the same as ID  (don't break it)
>-($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $INcm;
>-($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $POcm;
>-$PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm);
>+($JL | $JV | $JT | $H2 | $H3) $CM* $PO;
>+$PR $CM* ($JL | $JV | $JT | $H2 | $H3);
> 
> 
> # LB 28   Do not break between alphabetics
> #
>-($ALcm | $HLcm) ($ALcm | $HLcm);
>-$CM+ ($ALcm | $HLcm);      # The $CM+ is from rule 10, an unattached CM is treated as AL
>+($ALPlus | $HL) $CM* ($ALPlus | $HL);
>+^$CM+ ($ALPlus | $HL);      # The $CM+ is from rule 10, an unattached CM is treated as AL
>+
>+#LB 28a  Do not break Orthographic syllables
>+($AP $CM*)? ($AS | $AK | [â] ) ($CM* $VI $CM* ($AK | [â] ))* ($CM* $VI | (($CM* ($AS | $AK | [â] ) )? $CM* $VF))?;
> 
> # LB 29
>-$IScm ($ALcm | $NUcm);
>-
>-#
>-# Rule 30   Do not break between letters, numbers or ordinary symbols
>-#           and opening or closing punctuation
>-#
>-($ALcm | $HLcm | $NUcm) $OPcm;
>-$CM+ $OPcm;
>-$CLcm ($ALcm | $HLcm | $NUcm);
>-
>-#
>-#  Reverse Rules.
>-#
>-## -------------------------------------------------
>-
>-!!reverse;
>-
>-$CM+ $ALPlus;
>-$CM+ $BA;
>-$CM+ $BB;
>-$CM+ $B2;
>-$CM+ $CL;
>-$CM+ $EX;
>-$CM+ $GL;
>-$CM+ $HL;
>-$CM+ $HY;
>-$CM+ $H2;
>-$CM+ $H3;
>-$CM+ $ID;
>-$CM+ $IN;
>-$CM+ $IS;
>-$CM+ $JL;
>-$CM+ $JV;
>-$CM+ $JT;
>-$CM+ $NS;
>-$CM+ $NU;
>-$CM+ $OP;
>-$CM+ $PO;
>-$CM+ $PR;
>-$CM+ $QU;
>-$CM+ $SY;
>-$CM+ $WJ;
>-$CM+;
>-
>-
>-#
>-#  Sequences of the form  (shown forwards)
>-#      [CANT_CM]  <break>  [CM]  [whatever]
>-#  The CM needs to behave as an AL
>-#
>-$AL_FOLLOW $CM+ / (
>-          [$BK $CR $LF $NL $ZW {eof}] |
>-          $SP+ $CM+ $SP |
>-          $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to suppress this break.
>-                                               #  LB14 says    OP SP* x .        
>-                                               #    becomes    OP SP* x AL
>-                                               #    becomes    OP SP* x CM+ AL_FOLLOW
>-                                               #
>-                                               # Further note:  the $AL in [$AL {eof}] is only to work around
>-                                               #                a rule compiler bug which complains about
>-                                               #                empty sets otherwise.
>-          
>-#
>-#  Sequences of the form  (shown forwards)
>-#      [CANT_CM]  <break> [CM]  <break>  [PR]
>-#  The CM needs to behave as an AL
>-#  This rule is concerned about getting the second of the two <breaks> in place.
>-#
>-
>-[$PR   ] / $CM+ [$BK $CR $LF $NL $ZW $SP {eof}];
>-
>-
>-
>-# LB 4, 5, 5
>-
>-$LB4Breaks [$LB4NonBreaks-$CM];
>-$LB4Breaks $CM+ $CAN_CM;
>-$LF $CR;
>-
>-
>-# LB 7         x SP
>-#              x ZW
>-[$SP $ZW] [$LB4NonBreaks-$CM];
>-[$SP $ZW] $CM+ $CAN_CM;
>-
>-# LB 8 Break after zero width space
>-
>-
>-# LB 9,10  Combining marks.
>-#    X   $CM needs to behave like X, where X is not $SP or controls.
>-#    $CM not covered by the above needs to behave like $AL
>-# Stick together any combining sequences that don't match other rules.
>-$CM+ $CAN_CM;
>-
>-
>-# LB 11
>-$CM* $WJ $CM* $CAN_CM;
>-$CM* $WJ      [$LB8NonBreaks-$CM];
>-
>-     $CANT_CM $CM* $WJ;
>-$CM* $CAN_CM  $CM* $WJ;
>-
>-# LB 12
>-#         x GL
>-#
>-$CM* $GL $CM* [$LB8NonBreaks-$CM-$SP];
>-
>-#
>-#     GL  x
>-#
>-$CANT_CM $CM* $GL;
>-$CM* $CAN_CM $CM* $GL;
>-
>-
>-# LB 13
>-$CL $CM+ $CAN_CM;
>-$EX $CM+ $CAN_CM;
>-$IS $CM+ $CAN_CM;
>-$SY $CM+ $CAN_CM;
>-
>-$CL [$LB8NonBreaks-$CM];
>-$EX [$LB8NonBreaks-$CM];
>-$IS [$LB8NonBreaks-$CM];
>-$SY [$LB8NonBreaks-$CM];
>-
>-# Rule 13 & 14 taken together for an edge case.
>-#   Match this, shown forward
>-#     OP SP+  ($CM+ behaving as $AL) (CL | EX | IS | IY)
>-#   This really wants to chain at the $CM+ (which is acting as an $AL)
>-#   except for $CM chaining being disabled.
>-[$CL $EX $IS $SY] $CM+ $SP+ $CM* $OP;  
>-
>-# LB 14    OP SP* x
>-#
>-$CM* $CAN_CM    $SP* $CM* $OP;
>-     $CANT_CM   $SP* $CM* $OP;
>-$AL_FOLLOW? $CM+  $SP $SP* $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP* $CM* $OP
>-     
>-     $AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP;
>-$CM* $AL_FOLLOW_CM   $CM+ $SP+ $CM* $OP;
>-$SY $CM $SP+ $OP;   # TODO:  Experiment.  Remove.
>-
>-
>-
>-# LB 15
>-# $CM* $OP $SP* $CM* $QU;
>-
>-# LB 16
>-$CM* $NS $SP* $CM* $CL;
>-
>-# LB 17
>-$CM* $B2 $SP* $CM* $B2;
>-
>-# LB 18  break after spaces
>-#        Nothing explicit needed here.
>-
>-
>-#
>-# LB 19
>-#
>-$CM* $QU $CM* $CAN_CM;                                #   . x QU
>-$CM* $QU      $LB18NonBreaks;
>-
>-
>-$CM* $CAN_CM  $CM* $QU;                               #   QU x .
>-     $CANT_CM $CM* $QU;
>-     
>-#
>-#  LB 20  Break before and after CB.
>-#         nothing needed here.
>-#
>-
>-# LB 21
>-$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
>-
>-$CM* [$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
>-[^$CB] $CM* $BB;                                      # 
>-
>-# LB21a
>-[^$CB] $CM* ($HY | $BA) $CM* $HL;
>-
>-# LB 22
>-$CM* $IN $CM* ($ALPlus | $HL);
>-$CM* $IN $CM* $ID;
>-$CM* $IN $CM* $IN;
>-$CM* $IN $CM* $NU;
>-
>-# LB 23
>-$CM* $PO $CM* $ID;
>-$CM* $NU $CM* ($ALPlus | $HL);
>-$CM* ($ALPlus | $HL) $CM* $NU;
>-
>-# LB 24
>-$CM* $ID $CM* $PR;
>-$CM* $PR $CM* $ALPlus;
>-$CM* ($ALPlus | $HL) $CM* $PR;
>-$CM* ($ALPlus | $HL) $CM* $PO;
>-
>-$CM* $ALPlus $CM* ($IS | $SY | $HY)+ / $SP;
>-$CM* $NU+ $CM* $HY+ / $SP;
>-
>-# LB 25
>-($CM* ($PR | $PO))? ($CM* $CL)? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP))? ($CM* ($PR | $PO))?;
>-
>-# LB 26
>-$CM* ($H3 | $H2 | $JV | $JL) $CM* $JL;
>-$CM* ($JT | $JV) $CM* ($H2 | $JV);
>-$CM* $JT $CM* ($H3 | $JT);
>-
>-# LB 27
>-$CM* $IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
>-$CM* $PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
>-$CM* ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
>-
>-# LB 28
>-$CM* ($ALPlus | $HL) $CM* ($ALPlus | $HL);
>-
>-# LB 29
>-$CM* ($NU | $ALPlus) $CM* $IS+ [^$SP];
>+$IS $CM* ($ALPlus | $HL);
> 
> # LB 30
>-$CM* $OP $CM* ($ALPlus | $HL | $NU);
>-$CM* ($ALPlus | $HL | $NU) $CM* ($CL | $SY)+ [^$SP];
>+($ALPlus | $HL | $NU) $CM* $OP30;
>+^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
>+$CP30 $CM* ($ALPlus | $HL | $NU);
> 
>+# LB 30a  Do not break between regional indicators. Break after pairs of them.
>+#         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
>+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
>+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
>+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
>+# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
>+#       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
>+#       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
> 
>-## -------------------------------------------------
>+# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
>+$EB $CM* $EM;
>+$ExtPictUnassigned $CM* $EM;
> 
>-!!safe_reverse;
>-
>-# LB 7
>-$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
>-$CM+ $SP / .;
>-
>-# LB 9
>-$SP+ $CM* $OP;
>-
>-# LB 10
>-$SP+ $CM* $QU;
>-
>-# LB 11
>-$SP+ $CM* $CL;
>-$SP+ $CM* $B2;
>-
>-# LB 21
>-$CM* ($HY | $BA) $CM* $HL;
>-
>-# LB 18
>-($CM* ($IS | $SY))+ $CM* $NU;
>-$CL $CM* ($NU | $IS | $SY);
>-
>-# For dictionary-based break
>-$dictionary $dictionary;
>-
>-## -------------------------------------------------
>-
>-!!safe_forward;
>-
>-# Skip forward over all character classes that are involved in
>-#   rules containing patterns with possibly more than one char
>-#   of context.
>-#
>-#  It might be slightly more efficient to have specific rules
>-#  instead of one generic one, but only if we could
>-#  turn off rule chaining.  We don't want to move more
>-#  than necessary.
>-#
>-[$CM $OP $QU $CL $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $B2 $PR $HY $BA $dictionary];
>-$dictionary $dictionary;
>-
>+# LB 31 Break everywhere else.
>+#       Match a single code point if no other rule applies.
>+.;
>diff --git a/i18npool/source/breakiterator/data/sent.txt b/i18npool/source/breakiterator/data/sent.txt
>index 7fada89..eb1224e 100644
>--- a/i18npool/source/breakiterator/data/sent.txt
>+++ b/i18npool/source/breakiterator/data/sent.txt
>@@ -1,43 +1,40 @@
>+# Copyright (C) 2016 and later: Unicode, Inc. and others.
>+# License & terms of use: http://www.unicode.org/copyright.html
> #
>-#   Copyright (C) 2002-2006, International Business Machines Corporation and others.
>+#   Copyright (C) 2002-2015, International Business Machines Corporation and others.
> #       All Rights Reserved.
> #
> #   file:  sent.txt
> #
> #   ICU Sentence Break Rules
> #      See Unicode Standard Annex #29.
>-#      These rules are based on SA 29 version 5.0.0
>-#      Includes post 5.0 changes to treat Japanese half width voicing marks
>-#        as Grapheme Extend.
>+#      These rules are based on UAX #29 Revision 34 for Unicode Version 12.0
> #
> 
>-
>-$VoiceMarks   = [\uff9e\uff9f];
>-$Thai         = [:Script = Thai:];
>+!!quoted_literals_only;
> 
> #
> # Character categories as defined in TR 29
> #
>+$CR        = [\p{Sentence_Break = CR}];
>+$LF        = [\p{Sentence_Break = LF}];
>+$Extend    = [\p{Sentence_Break = Extend}];
> $Sep       = [\p{Sentence_Break = Sep}];
> $Format    = [\p{Sentence_Break = Format}];
> $Sp        = [\p{Sentence_Break = Sp}];
> $Lower     = [\p{Sentence_Break = Lower}];
> $Upper     = [\p{Sentence_Break = Upper}];
>-$OLetter   = [\p{Sentence_Break = OLetter}-$VoiceMarks];
>+$OLetter   = [\p{Sentence_Break = OLetter}];
> $Numeric   = [\p{Sentence_Break = Numeric}];
> $ATerm     = [\p{Sentence_Break = ATerm}];
>+$SContinue = [\p{Sentence_Break = SContinue}];
> $STerm     = [\p{Sentence_Break = STerm}];
> $Close     = [\p{Sentence_Break = Close}];
> 
> #
> # Define extended forms of the character classes,
>-#   incorporate grapheme cluster + format chars.
>-#   Rules 4 and 5.  
>-
>-
>-$CR         = \u000d;
>-$LF         = \u000a;
>-$Extend     = [[:Grapheme_Extend = TRUE:]$VoiceMarks];
>+#   incorporate trailing Extend or Format chars.
>+#   Rules 4 and 5.
> 
> $SpEx       = $Sp      ($Extend | $Format)*;
> $LowerEx    = $Lower   ($Extend | $Format)*;
>@@ -45,6 +42,7 @@
> $OLetterEx  = $OLetter ($Extend | $Format)*;
> $NumericEx  = $Numeric ($Extend | $Format)*;
> $ATermEx    = $ATerm   ($Extend | $Format)*;
>+$SContinueEx= $SContinue ($Extend | $Format)*;
> $STermEx    = $STerm   ($Extend | $Format)*;
> $CloseEx    = $Close   ($Extend | $Format)*;
> 
>@@ -52,77 +50,34 @@
> ## -------------------------------------------------
> 
> !!chain;
>-!!forward;
> 
> # Rule 3 - break after separators.  Keep CR/LF together.
> #
> $CR $LF;
> 
>-$LettersEx = [$OLetter $Upper $Lower $Numeric $Close $STerm] ($Extend | $Format)*;
>-$LettersEx* $Thai $LettersEx* ($ATermEx | $SpEx)*;
> 
> # Rule 4 - Break after $Sep.
> # Rule 5 - Ignore $Format and $Extend
> #
>-[^$Sep]? ($Extend | $Format)*;
>+[^$Sep $CR $LF]? ($Extend | $Format)*;
> 
> 
> # Rule 6
> $ATermEx $NumericEx;
> 
> # Rule 7
>-$UpperEx $ATermEx $UpperEx;
>+($UpperEx | $LowerEx) $ATermEx $UpperEx;
> 
> #Rule 8
>-#  Note:  follows errata for Unicode 5.0 boundary rules.
>-$NotLettersEx = [^$OLetter $Upper $Lower $Sep $ATerm $STerm] ($Extend | $Format)*;
>+$NotLettersEx = [^$OLetter $Upper $Lower $Sep $CR $LF $ATerm $STerm] ($Extend | $Format)*;
> $ATermEx $CloseEx* $SpEx* $NotLettersEx* $Lower;
> 
> # Rule 8a
>-($STermEx | $ATermEx) $CloseEx* $SpEx* ($STermEx | $ATermEx);
>+($STermEx | $ATermEx) $CloseEx* $SpEx* ($SContinueEx | $STermEx | $ATermEx);
> 
> #Rule 9, 10, 11
>-($STermEx | $ATermEx) $CloseEx* $SpEx* $Sep?;
>+($STermEx | $ATermEx) $CloseEx* $SpEx* ($Sep | $CR | $LF)?;
> 
>-#Rule 12
>-[[^$STerm $ATerm $Close $Sp $Sep $Format $Extend $Thai]{bof}] ($Extend | $Format | $Close | $Sp)* [^$Thai];
>-[[^$STerm $ATerm $Close $Sp $Sep $Format $Extend]{bof}] ($Extend | $Format | $Close | $Sp)* ([$Sep{eof}] | $CR $LF){100};
>-
>-## -------------------------------------------------
>-
>-!!reverse;
>-
>-$SpEx_R       = ($Extend | $Format)* $Sp;
>-$ATermEx_R    = ($Extend | $Format)* $ATerm;
>-$STermEx_R    = ($Extend | $Format)* $STerm;
>-$CloseEx_R    = ($Extend | $Format)* $Close;
>-
>-#
>-#  Reverse rules.
>-#     For now, use the old style inexact reverse rules, which are easier
>-#     to write, but less efficient.
>-#     TODO:  exact reverse rules.  It appears that exact reverse rules
>-#            may require improving support for look-ahead breaks in the
>-#            builder.  Needs more investigation.
>-#
>-
>-[{bof}] (.? | $LF $CR) [^$Sep]* [$Sep {eof}] ($SpEx_R* $CloseEx_R* ($STermEx_R | $ATermEx_R))*;
>-#.*;
>-
>-# Explanation for this rule:
>-#
>-#    It needs to back over
>-#        The $Sep at which we probably begin
>-#        All of the non $Sep chars leading to the preceding $Sep
>-#        The preceding $Sep, which will be the second one that the rule matches.
>-#        Any immediately preceding STerm or ATerm sequences.  We need to see these
>-#              to get the correct rule status when moving forwards again.
>-#        
>-# [{bof}]           inhibit rule chaining.  Without this, rule would loop on itself and match
>-#                   the entire string.
>-#
>-# (.? | $LF $CR)    Match one $Sep instance.  Use .? rather than $Sep because position might be
>-#                   at the beginning of the string at this point, and we don't want to fail.
>-#                   Can only use {eof} once, and it is used later.
>-#
>-
>+#Rule 998
>+[[^$STerm $ATerm $Close $Sp $Sep $LF $CR $Format $Extend]{bof}] ($Extend | $Format | $Close | $Sp)* .;
>+[[^$STerm $ATerm $Close $Sp $Sep $LF $CR $Format $Extend]{bof}] ($Extend | $Format | $Close | $Sp)* ([$Sep $LF $CR {eof}] | $CR $LF){100};
>diff --git a/include/svx/strings.hrc b/include/svx/strings.hrc
>index 8dc2aa0..b294bdf 100644
>--- a/include/svx/strings.hrc
>+++ b/include/svx/strings.hrc
>@@ -1800,6 +1800,7 @@
> #define RID_SUBSETSTR_KAKTOVIK_NUMERALS                     NC_("RID_SUBSETMAP", "Kaktovik Numerals")
> #define RID_SUBSETSTR_KAWI                                  NC_("RID_SUBSETMAP", "Kawi")
> #define RID_SUBSETSTR_NAG_MUNDARI                           NC_("RID_SUBSETMAP", "Nag Mundari")
>+#define RID_SUBSETSTR_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I    NC_("RID_SUBSETMAP", "CJK Unified Ideographs Extension I")
> 
> #define RID_SVXSTR_FRAMEDIR_LTR                             NC_("RID_SVXSTR_FRAMEDIR_LTR", "Left-to-right (LTR)")
> #define RID_SVXSTR_FRAMEDIR_RTL                             NC_("RID_SVXSTR_FRAMEDIR_RTL", "Right-to-left (RTL)")
>diff --git a/svx/source/dialog/charmap.cxx b/svx/source/dialog/charmap.cxx
>index dcb1205..a5c84a7 100644
>--- a/svx/source/dialog/charmap.cxx
>+++ b/svx/source/dialog/charmap.cxx
>@@ -1924,6 +1924,11 @@
>                     aAllSubsets.emplace_back( 0x1E4D0, 0x1E4FF, SvxResId(RID_SUBSETSTR_NAG_MUNDARI) );
>                     break;
> #endif
>+#if (U_ICU_VERSION_MAJOR_NUM >= 74)
>+                case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I:
>+                    aAllSubsets.emplace_back( 0x2EBF0, 0x2EE5F, SvxResId(RID_SUBSETSTR_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I) );
>+                    break;
>+#endif
>             }
> 
> #if OSL_DEBUG_LEVEL > 0 && !defined NDEBUG

Actions: View | Diff

Attachments on bug 917618: 875923