Lines 1-177
Link Here
|
1 |
# Copyright (c) 2002-2006 International Business Machines Corporation and |
1 |
# Copyright (C) 2016 and later: Unicode, Inc. and others. |
|
|
2 |
# License & terms of use: http://www.unicode.org/copyright.html |
3 |
# Copyright (c) 2002-2016 International Business Machines Corporation and |
2 |
# others. All Rights Reserved. |
4 |
# others. All Rights Reserved. |
3 |
# |
5 |
# |
4 |
# file: line.txt |
6 |
# file: line.txt |
5 |
# |
7 |
# |
6 |
# Line Breaking Rules |
8 |
# Line Breaking Rules |
7 |
# Implement default line breaking as defined by Unicode Standard Annex #14 version 5.0.0 |
9 |
# Implement default line breaking as defined by |
8 |
# http://www.unicode.org/reports/tr14/ |
10 |
# Unicode Standard Annex #14 (https://www.unicode.org/reports/tr14/) |
9 |
|
11 |
# for Unicode 14.0, with the following modification: |
10 |
|
12 |
# |
|
|
13 |
# Boundaries between hyphens and following letters are suppressed when |
14 |
# there is a boundary preceding the hyphen. See rule 20.9 |
15 |
# |
16 |
# This corresponds to CSS line-break=strict (BCP47 -u-lb-strict). |
17 |
# It sets characters of class CJ to behave like NS. |
11 |
|
18 |
|
12 |
# |
19 |
# |
13 |
# Character Classes defined by TR 14. |
20 |
# Character Classes defined by TR 14. |
14 |
# |
21 |
# |
15 |
|
22 |
|
16 |
!!chain; |
23 |
!!chain; |
17 |
!!LBCMNoChain; |
24 |
!!quoted_literals_only; |
18 |
|
|
|
19 |
|
20 |
!!lookAheadHardBreak; |
21 |
# |
22 |
# !!lookAheadHardBreak Described here because it is (as yet) undocumented elsewhere |
23 |
# and only used for the line break rules. |
24 |
# |
25 |
# It is used in the implementation of the incredibly annoying rule LB 10 |
26 |
# which says to treat any combining mark that is not attached to a base |
27 |
# character as if it were of class AL (alphabetic). |
28 |
# |
29 |
# The problem occurs in the reverse rules. |
30 |
# |
31 |
# Consider a sequence like, with correct breaks as shown |
32 |
# LF ID CM AL AL |
33 |
# ^ ^ ^ |
34 |
# Then consider the sequence without the initial ID (ideographic) |
35 |
# LF CM AL AL |
36 |
# ^ ^ |
37 |
# Our CM, which in the first example was attached to the ideograph, |
38 |
# is now unattached, becomes an alpha, and joins in with the other |
39 |
# alphas. |
40 |
# |
41 |
# When iterating forwards, these sequences do not present any problems |
42 |
# When iterating backwards, we need to look ahead when encountering |
43 |
# a CM to see whether it attaches to something further on or not. |
44 |
# (Look-ahead in a reverse rule is looking towards the start) |
45 |
# |
46 |
# If the CM is unattached, we need to force a break. |
47 |
# |
48 |
# !!lookAheadHardBreak forces the run time state machine to |
49 |
# stop immediately when a look ahead rule ( '/' operator) matches, |
50 |
# and set the match position to that of the look-ahead operator, |
51 |
# no matter what other rules may be in play at the time. |
52 |
# |
53 |
# See rule LB 19 for an example. |
54 |
# |
55 |
|
25 |
|
56 |
$AI = [:LineBreak = Ambiguous:]; |
26 |
$AI = [:LineBreak = Ambiguous:]; |
57 |
$DG = \u00B0; |
27 |
$AK = [:LineBreak = Aksara:]; |
58 |
$AL = [[:LineBreak = Alphabetic:] $DG]; |
28 |
$AL = [:LineBreak = Alphabetic:]; |
|
|
29 |
$AP = [:LineBreak = Aksara_Prebase:]; |
30 |
$AS = [:LineBreak = Aksara_Start:]; |
59 |
$BA = [:LineBreak = Break_After:]; |
31 |
$BA = [:LineBreak = Break_After:]; |
|
|
32 |
$HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA. |
60 |
$BB = [:LineBreak = Break_Before:]; |
33 |
$BB = [:LineBreak = Break_Before:]; |
61 |
$BK = [:LineBreak = Mandatory_Break:]; |
34 |
$BK = [:LineBreak = Mandatory_Break:]; |
62 |
$B2 = [:LineBreak = Break_Both:]; |
35 |
$B2 = [:LineBreak = Break_Both:]; |
63 |
$CB = [:LineBreak = Contingent_Break:]; |
36 |
$CB = [:LineBreak = Contingent_Break:]; |
64 |
$CJ = [:LineBreak = Conditional_Japanese_Starter:]; |
37 |
$CJ = [:LineBreak = Conditional_Japanese_Starter:]; |
65 |
$CL = [[:LineBreak = Close_Punctuation:] [:LineBreak = Close_Parenthesis:]]; # tdf#31271 |
38 |
$CL = [:LineBreak = Close_Punctuation:]; |
66 |
$CM = [:LineBreak = Combining_Mark:]; |
39 |
# $CM = [:LineBreak = Combining_Mark:]; |
|
|
40 |
$CP = [:LineBreak = Close_Parenthesis:]; |
67 |
$CR = [:LineBreak = Carriage_Return:]; |
41 |
$CR = [:LineBreak = Carriage_Return:]; |
|
|
42 |
$EB = [:LineBreak = EB:]; |
43 |
$EM = [:LineBreak = EM:]; |
68 |
$EX = [:LineBreak = Exclamation:]; |
44 |
$EX = [:LineBreak = Exclamation:]; |
69 |
$GL = [:LineBreak = Glue:]; |
45 |
$GL = [:LineBreak = Glue:]; |
70 |
$HL = [:LineBreak = Hebrew_Letter:]; |
46 |
$HL = [:LineBreak = Hebrew_Letter:]; |
71 |
$HY = [:LineBreak = Hyphen:]; |
47 |
$HY = [:LineBreak = Hyphen:]; |
72 |
$H2 = [:LineBreak = H2:]; |
48 |
$H2 = [:LineBreak = H2:]; |
73 |
$H3 = [:LineBreak = H3:]; |
49 |
$H3 = [:LineBreak = H3:]; |
74 |
$ID = [[:LineBreak = Ideographic:] - [\ufe30]]; |
50 |
$ID = [:LineBreak = Ideographic:]; |
75 |
$IN = [:LineBreak = Inseparable:]; |
51 |
$IN = [:LineBreak = Inseperable:]; |
76 |
$IS = [[:LineBreak = Infix_Numeric:] [\ufe30]]; |
52 |
$IS = [:LineBreak = Infix_Numeric:]; |
77 |
$JL = [:LineBreak = JL:]; |
53 |
$JL = [:LineBreak = JL:]; |
78 |
$JV = [:LineBreak = JV:]; |
54 |
$JV = [:LineBreak = JV:]; |
79 |
$JT = [:LineBreak = JT:]; |
55 |
$JT = [:LineBreak = JT:]; |
80 |
$LF = [:LineBreak = Line_Feed:]; |
56 |
$LF = [:LineBreak = Line_Feed:]; |
81 |
$NL = [:LineBreak = Next_Line:]; |
57 |
$NL = [:LineBreak = Next_Line:]; |
|
|
58 |
# NS includes CJ for CSS strict line breaking. |
82 |
$NS = [[:LineBreak = Nonstarter:] $CJ]; |
59 |
$NS = [[:LineBreak = Nonstarter:] $CJ]; |
83 |
$NU = [:LineBreak = Numeric:]; |
60 |
$NU = [:LineBreak = Numeric:]; |
84 |
$OP = [[:LineBreak = Open_Punctuation:] - $DG]; |
61 |
$OP = [:LineBreak = Open_Punctuation:]; |
85 |
$PO = [:LineBreak = Postfix_Numeric:]; |
62 |
$PO = [:LineBreak = Postfix_Numeric:]; |
86 |
$BS = \u005C; |
63 |
$PR = [:LineBreak = Prefix_Numeric:]; |
87 |
$PR = [[:LineBreak = Prefix_Numeric:] - $BS]; |
|
|
88 |
$QU = [:LineBreak = Quotation:]; |
64 |
$QU = [:LineBreak = Quotation:]; |
|
|
65 |
$RI = [:LineBreak = Regional_Indicator:]; |
89 |
$SA = [:LineBreak = Complex_Context:]; |
66 |
$SA = [:LineBreak = Complex_Context:]; |
90 |
$SG = [:LineBreak = Surrogate:]; |
67 |
$SG = [:LineBreak = Surrogate:]; |
91 |
$SP = [:LineBreak = Space:]; |
68 |
$SP = [:LineBreak = Space:]; |
92 |
$SY = [[:LineBreak = Break_Symbols:] $BS]; |
69 |
$SY = [:LineBreak = Break_Symbols:]; |
|
|
70 |
$VF = [:LineBreak = Virama_Final:]; |
71 |
$VI = [:LineBreak = Virama:]; |
93 |
$WJ = [:LineBreak = Word_Joiner:]; |
72 |
$WJ = [:LineBreak = Word_Joiner:]; |
94 |
$XX = [:LineBreak = Unknown:]; |
73 |
$XX = [:LineBreak = Unknown:]; |
95 |
$ZW = [:LineBreak = ZWSpace:]; |
74 |
$ZW = [:LineBreak = ZWSpace:]; |
|
|
75 |
$ZWJ = [:LineBreak = ZWJ:]; |
76 |
|
77 |
# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14, |
78 |
# without a formal name. Because ICU rules require multiple uses of the expressions, |
79 |
# give them a single definition with a name |
80 |
|
81 |
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]]; |
82 |
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]]; |
83 |
|
84 |
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}]; |
85 |
|
86 |
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly |
87 |
# list it in the numerous rules that use CM. |
88 |
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM. |
89 |
|
90 |
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]]; |
91 |
$CMX = [[$CM] - [$ZWJ]]; |
96 |
|
92 |
|
97 |
# Dictionary character set, for triggering language-based break engines. Currently |
93 |
# Dictionary character set, for triggering language-based break engines. Currently |
98 |
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode |
94 |
# limited to LineBreak=Complex_Context (SA). |
99 |
# 5.0 or later as the definition of Complex_Context was corrected to include all |
|
|
100 |
# characters requiring dictionary break. |
101 |
|
95 |
|
102 |
$dictionary = [:LineBreak = Complex_Context:]; |
96 |
$dictionary = [$SA]; |
103 |
|
97 |
|
104 |
# |
98 |
# |
105 |
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width), |
99 |
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width), |
106 |
# SA (South East Asian: Thai, Lao, Khmer) |
100 |
# SA (Dictionary chars, excluding Mn and Mc) |
107 |
# SG (Unpaired Surrogates) |
101 |
# SG (Unpaired Surrogates) |
108 |
# XX (Unknown, unassigned) |
102 |
# XX (Unknown, unassigned) |
109 |
# as $AL (Alphabetic) |
103 |
# as $AL (Alphabetic) |
110 |
# |
104 |
# |
111 |
$ALPlus = [$AL $AI $SA $SG $XX]; |
105 |
$ALPlus = [$AL $AI $SG $XX [$SA-[[:Mn:][:Mc:]]]]; |
112 |
|
106 |
|
113 |
# |
|
|
114 |
# Combining Marks. X $CM* behaves as if it were X. Rule LB6. |
115 |
# |
116 |
$ALcm = $ALPlus $CM*; |
117 |
$BAcm = $BA $CM*; |
118 |
$BBcm = $BB $CM*; |
119 |
$B2cm = $B2 $CM*; |
120 |
$CLcm = $CL $CM*; |
121 |
$EXcm = $EX $CM*; |
122 |
$GLcm = $GL $CM*; |
123 |
$HLcm = $HL $CM*; |
124 |
$HYcm = $HY $CM*; |
125 |
$H2cm = $H2 $CM*; |
126 |
$H3cm = $H3 $CM*; |
127 |
$IDcm = $ID $CM*; |
128 |
$INcm = $IN $CM*; |
129 |
$IScm = $IS $CM*; |
130 |
$JLcm = $JL $CM*; |
131 |
$JVcm = $JV $CM*; |
132 |
$JTcm = $JT $CM*; |
133 |
$NScm = $NS $CM*; |
134 |
$NUcm = $NU $CM*; |
135 |
$OPcm = $OP $CM*; |
136 |
$POcm = $PO $CM*; |
137 |
$PRcm = $PR $CM*; |
138 |
$QUcm = $QU $CM*; |
139 |
$SYcm = $SY $CM*; |
140 |
$WJcm = $WJ $CM*; |
141 |
|
107 |
|
142 |
## ------------------------------------------------- |
108 |
## ------------------------------------------------- |
143 |
|
109 |
|
144 |
!!forward; |
|
|
145 |
|
146 |
# |
147 |
# Each class of character can stand by itself as an unbroken token, with trailing combining stuff |
148 |
# |
149 |
$ALPlus $CM+; |
150 |
$BA $CM+; |
151 |
$BB $CM+; |
152 |
$B2 $CM+; |
153 |
$CL $CM+; |
154 |
$EX $CM+; |
155 |
$GL $CM+; |
156 |
$HL $CM+; |
157 |
$HY $CM+; |
158 |
$H2 $CM+; |
159 |
$H3 $CM+; |
160 |
$ID $CM+; |
161 |
$IN $CM+; |
162 |
$IS $CM+; |
163 |
$JL $CM+; |
164 |
$JV $CM+; |
165 |
$JT $CM+; |
166 |
$NS $CM+; |
167 |
$NU $CM+; |
168 |
$OP $CM+; |
169 |
$PO $CM+; |
170 |
$PR $CM+; |
171 |
$QU $CM+; |
172 |
$SY $CM+; |
173 |
$WJ $CM+; |
174 |
|
175 |
# |
110 |
# |
176 |
# CAN_CM is the set of characters that may combine with CM combining chars. |
111 |
# CAN_CM is the set of characters that may combine with CM combining chars. |
177 |
# Note that Linebreak UAX 14's concept of a combining char and the rules |
112 |
# Note that Linebreak UAX 14's concept of a combining char and the rules |
Lines 186-204
Link Here
|
186 |
# |
121 |
# |
187 |
# AL_FOLLOW set of chars that can unconditionally follow an AL |
122 |
# AL_FOLLOW set of chars that can unconditionally follow an AL |
188 |
# Needed in rules where stand-alone $CM s are treated as AL. |
123 |
# Needed in rules where stand-alone $CM s are treated as AL. |
189 |
# Chaining is disabled with CM because it causes other failures, |
|
|
190 |
# so for this one case we need to manually list out longer sequences. |
191 |
# |
124 |
# |
192 |
$AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP]; |
125 |
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus]; |
193 |
$AL_FOLLOW_CM = [$CL $EX $HL $IS $SY $WJ $GL $QU $BA $HY $NS $IN $NU $ALPlus $OP]; |
|
|
194 |
$AL_FOLLOW = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM]; |
195 |
|
126 |
|
196 |
|
127 |
|
197 |
# |
128 |
# |
198 |
# Rule LB 4, 5 Mandatory (Hard) breaks. |
129 |
# Rule LB 4, 5 Mandatory (Hard) breaks. |
199 |
# |
130 |
# |
200 |
$LB4Breaks = [$BK $CR $LF $NL]; |
131 |
$LB4Breaks = [$BK $CR $LF $NL]; |
201 |
$LB4NonBreaks = [^$BK $CR $LF $NL]; |
132 |
$LB4NonBreaks = [^$BK $CR $LF $NL $CM]; |
202 |
$CR $LF {100}; |
133 |
$CR $LF {100}; |
203 |
|
134 |
|
204 |
# |
135 |
# |
Lines 206-296
Link Here
|
206 |
# |
137 |
# |
207 |
$LB4NonBreaks? $LB4Breaks {100}; # LB 5 do not break before hard breaks. |
138 |
$LB4NonBreaks? $LB4Breaks {100}; # LB 5 do not break before hard breaks. |
208 |
$CAN_CM $CM* $LB4Breaks {100}; |
139 |
$CAN_CM $CM* $LB4Breaks {100}; |
209 |
$CM+ $LB4Breaks {100}; |
140 |
^$CM+ $LB4Breaks {100}; |
210 |
|
141 |
|
211 |
# LB 7 x SP |
142 |
# LB 7 x SP |
212 |
# x ZW |
143 |
# x ZW |
213 |
$LB4NonBreaks [$SP $ZW]; |
144 |
$LB4NonBreaks [$SP $ZW]; |
214 |
$CAN_CM $CM* [$SP $ZW]; |
145 |
$CAN_CM $CM* [$SP $ZW]; |
215 |
$CM+ [$SP $ZW]; |
146 |
^$CM+ [$SP $ZW]; |
216 |
|
147 |
|
217 |
# |
148 |
# |
218 |
# LB 8 Break after zero width space |
149 |
# LB 8 Break after zero width space |
|
|
150 |
# ZW SP* ÷ |
219 |
# |
151 |
# |
220 |
$LB8Breaks = [$LB4Breaks $ZW]; |
152 |
$LB8Breaks = [$LB4Breaks $ZW]; |
221 |
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]]; |
153 |
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]]; |
|
|
154 |
$ZW $SP* / [^$SP $ZW $LB4Breaks]; |
222 |
|
155 |
|
|
|
156 |
# LB 8a ZWJ x Do not break Emoji ZWJ sequences. |
157 |
# |
158 |
$ZWJ [^$CM]; |
223 |
|
159 |
|
224 |
# LB 9 Combining marks. X $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL |
160 |
# LB 9 Combining marks. X $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL |
225 |
# $CM not covered by the above needs to behave like $AL |
161 |
# $CM not covered by the above needs to behave like $AL |
226 |
# See definition of $CAN_CM. |
162 |
# See definition of $CAN_CM. |
227 |
|
163 |
|
228 |
$CAN_CM $CM+; # Stick together any combining sequences that don't match other rules. |
164 |
$CAN_CM $CM+; # Stick together any combining sequences that don't match other rules. |
229 |
$CM+; |
165 |
^$CM+; |
230 |
|
166 |
|
231 |
# |
167 |
# |
232 |
# LB 11 Do not break before or after WORD JOINER & related characters. |
168 |
# LB 11 Do not break before or after WORD JOINER & related characters. |
233 |
# |
169 |
# |
234 |
$CAN_CM $CM* $WJcm; |
170 |
$CAN_CM $CM* $WJ; |
235 |
$LB8NonBreaks $WJcm; |
171 |
$LB8NonBreaks $WJ; |
236 |
$CM+ $WJcm; |
172 |
^$CM+ $WJ; |
237 |
|
173 |
|
238 |
$WJcm [^$CAN_CM]; |
174 |
$WJ $CM* .; |
239 |
$WJcm $CAN_CM $CM*; |
|
|
240 |
|
175 |
|
241 |
# |
176 |
# |
242 |
# LB 12 Do not break before or after NBSP and related characters. |
177 |
# LB 12 Do not break after NBSP and related characters. |
243 |
# |
|
|
244 |
# (!SP) x GL |
245 |
[$LB8NonBreaks-$SP] $CM* $GLcm; |
246 |
$CM+ $GLcm; |
247 |
|
248 |
# GL x |
178 |
# GL x |
249 |
$GLcm ($LB8Breaks | $SP); |
179 |
# |
250 |
$GLcm [$LB8NonBreaks-$SP] $CM*; # Don't let a combining mark go onto $CR, $BK, etc. |
180 |
$GL $CM* .; |
251 |
# TODO: I don't think we need this rule. |
|
|
252 |
# All but $CM will chain off of preceding rule. |
253 |
# $GLcm will pick up the CM case by itself. |
254 |
|
255 |
|
256 |
|
257 |
|
181 |
|
258 |
# |
182 |
# |
259 |
# LB 13 Don't break before ']' or '!' or ';' or '/', even after spaces. |
183 |
# LB 12a Do not break before NBSP and related characters ... |
|
|
184 |
# [^SP BA HY] x GL |
185 |
# |
186 |
[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GL; |
187 |
^$CM+ $GL; |
188 |
|
189 |
|
190 |
|
191 |
|
192 |
# LB 13 Don't break before ']' or '!' or '/', even after spaces. |
260 |
# |
193 |
# |
261 |
$LB8NonBreaks $CL; |
194 |
$LB8NonBreaks $CL; |
262 |
$CAN_CM $CM* $CL; |
195 |
$CAN_CM $CM* $CL; |
263 |
$CM+ $CL; # by rule 10, stand-alone CM behaves as AL |
196 |
^$CM+ $CL; # by rule 10, stand-alone CM behaves as AL |
|
|
197 |
|
198 |
$LB8NonBreaks $CP; |
199 |
$CAN_CM $CM* $CP; |
200 |
^$CM+ $CP; # by rule 10, stand-alone CM behaves as AL |
264 |
|
201 |
|
265 |
$LB8NonBreaks $EX; |
202 |
$LB8NonBreaks $EX; |
266 |
$CAN_CM $CM* $EX; |
203 |
$CAN_CM $CM* $EX; |
267 |
$CM+ $EX; # by rule 10, stand-alone CM behaves as AL |
204 |
^$CM+ $EX; # by rule 10, stand-alone CM behaves as AL |
268 |
|
|
|
269 |
$LB8NonBreaks $IS; |
270 |
$CAN_CM $CM* $IS; |
271 |
$CM+ $IS; # by rule 10, stand-alone CM behaves as AL |
272 |
|
205 |
|
273 |
$LB8NonBreaks $SY; |
206 |
$LB8NonBreaks $SY; |
274 |
$CAN_CM $CM* $SY; |
207 |
$CAN_CM $CM* $SY; |
275 |
$CM+ $SY; # by rule 10, stand-alone CM behaves as AL |
208 |
^$CM+ $SY; # by rule 10, stand-alone CM behaves as AL |
276 |
|
209 |
|
277 |
|
210 |
|
278 |
# |
211 |
# |
279 |
# LB 14 Do not break after OP, even after spaced |
212 |
# LB 14 Do not break after OP, even after spaces |
|
|
213 |
# Note subtle interaction with "SP IS /" rules in LB14a. |
214 |
# This rule consumes the SP, chaining happens on the IS, effectivley overriding the SP IS rules, |
215 |
# which is the desired behavior. |
280 |
# |
216 |
# |
281 |
$OPcm $SP* $CAN_CM $CM*; |
217 |
$OP $CM* $SP* .; |
282 |
$OPcm $SP* $CANT_CM; |
|
|
283 |
|
218 |
|
284 |
$OPcm $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL |
219 |
$OP $CM* $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL |
|
|
220 |
# by rule 8, CM following a SP is stand-alone. |
285 |
|
221 |
|
286 |
# LB 15 |
222 |
|
287 |
# $QUcm $SP* $OPcm; |
223 |
# LB 15a |
|
|
224 |
($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ .; |
225 |
($OP $CM* $SP+ | [$OP $QU $GL] $CM*) ([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?; |
226 |
^([\p{Pi} & $QU] $CM* $SP*)+ .; |
227 |
^([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?; |
228 |
|
229 |
# LB 15b |
230 |
$LB8NonBreaks [\p{Pf} & $QU] $CM* [$SP $GL $WJ $CL $QU $CP $EX $IS $SY $BK $CR $LF $NL $ZW {eof}]; |
231 |
$CAN_CM $CM* [\p{Pf} & $QU] $CM* [$SP $GL $WJ $CL $QU $CP $EX $IS $SY $BK $CR $LF $NL $ZW {eof}]; |
232 |
^$CM+ [\p{Pf} & $QU] $CM* [$SP $GL $WJ $CL $QU $CP $EX $IS $SY $BK $CR $LF $NL $ZW {eof}]; |
233 |
|
234 |
# Messy interaction: manually chain between LB 15b and LB 15a on Pf Pi. |
235 |
$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ .; |
236 |
$LB8NonBreaks [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?; |
237 |
$CAN_CM $CM* [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ .; |
238 |
$CAN_CM $CM* [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?; |
239 |
^$CM+ [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ .; |
240 |
^$CM+ [\p{Pf} & $QU] $CM* ([\p{Pi} & $QU] $CM* $SP*)+ $SP $CM+ $AL_FOLLOW?; |
241 |
|
242 |
|
243 |
# LB 15c Force a break before start of a number with a leading decimal pt, e.g. " .23" |
244 |
# Note: would be simpler to express as "$SP / $IS $CM* $NU;", but ICU rules have limitations. |
245 |
# See issue ICU-20303 |
246 |
|
247 |
|
248 |
$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN]; |
249 |
$SP $IS / [^ $CanFollowIS $NU $CM]; |
250 |
$SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM]; |
251 |
|
252 |
# |
253 |
# LB 15d Do not break before numeric separators (IS), even after spaces. |
254 |
|
255 |
[$LB8NonBreaks - $SP] $IS; |
256 |
$SP $IS $CM* [$CanFollowIS {eof}]; |
257 |
$SP $IS $CM* $ZWJ [^$CM $NU]; |
258 |
|
259 |
$CAN_CM $CM* $IS; |
260 |
^$CM+ $IS; # by rule 10, stand-alone CM behaves as AL |
261 |
|
288 |
|
262 |
|
289 |
# LB 16 |
263 |
# LB 16 |
290 |
$CLcm $SP* $NScm; |
264 |
($CL | $CP) $CM* $SP* $NS; |
291 |
|
265 |
|
292 |
# LB 17 |
266 |
# LB 17 |
293 |
$B2cm $SP* $B2cm; |
267 |
$B2 $CM* $SP* $B2; |
294 |
|
268 |
|
295 |
# |
269 |
# |
296 |
# LB 18 Break after spaces. |
270 |
# LB 18 Break after spaces. |
Lines 301-647
Link Here
|
301 |
|
275 |
|
302 |
# LB 19 |
276 |
# LB 19 |
303 |
# x QU |
277 |
# x QU |
304 |
$LB18NonBreaks $CM* $QUcm; |
278 |
$LB18NonBreaks $CM* $QU; |
305 |
$CM+ $QUcm; |
279 |
^$CM+ $QU; |
306 |
|
280 |
|
307 |
# QU x |
281 |
# QU x |
308 |
$QUcm .?; |
282 |
$QU $CM* .; |
309 |
$QUcm $LB18NonBreaks $CM*; # Don't let a combining mark go onto $CR, $BK, etc. |
|
|
310 |
# TODO: I don't think this rule is needed. |
311 |
|
312 |
|
283 |
|
313 |
# LB 20 |
284 |
# LB 20 |
314 |
# <break> $CB |
285 |
# <break> $CB |
315 |
# $CB <break> |
286 |
# $CB <break> |
316 |
|
287 |
# |
317 |
$LB20NonBreaks = [$LB18NonBreaks - $CB]; |
288 |
$LB20NonBreaks = [$LB18NonBreaks - $CB]; |
318 |
|
289 |
|
|
|
290 |
# LB 20.09 Don't break between Hyphens and Letters when there is a break preceding the hyphen. |
291 |
# Originally added as a Finnish tailoring, now promoted to default ICU behavior. |
292 |
# Note: this is not default UAX-14 behaviour. See issue ICU-8151. |
293 |
# |
294 |
^($HY | $HH) $CM* $ALPlus; |
295 |
|
319 |
# LB 21 x (BA | HY | NS) |
296 |
# LB 21 x (BA | HY | NS) |
320 |
# BB x |
297 |
# BB x |
321 |
# |
298 |
# |
322 |
$LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm); |
299 |
$LB20NonBreaks $CM* ($BA | $HY | $NS); |
323 |
|
300 |
|
324 |
$BBcm [^$CB]; # $BB x |
301 |
|
325 |
$BBcm $LB20NonBreaks $CM*; |
302 |
^$CM+ ($BA | $HY | $NS); |
|
|
303 |
|
304 |
$BB $CM* [^$CB]; # $BB x |
305 |
$BB $CM* $LB20NonBreaks; |
326 |
|
306 |
|
327 |
# LB 21a Don't break after Hebrew + Hyphen |
307 |
# LB 21a Don't break after Hebrew + Hyphen |
328 |
# HL (HY | BA) x |
308 |
# HL (HY | BA) x |
329 |
# |
309 |
# |
330 |
$HLcm ($HYcm | $BAcm) [^$CB]?; |
310 |
$HL $CM* ($HY | $BA) $CM* [^$CB]?; |
331 |
|
311 |
|
332 |
# LB 22 |
312 |
# LB 21b (forward) Don't break between SY and HL |
333 |
($ALcm | $HLcm) $INcm; |
313 |
# (break between HL and SY already disallowed by LB 13 above) |
334 |
$CM+ $INcm; # by rule 10, any otherwise unattached CM behaves as AL |
314 |
$SY $CM* $HL; |
335 |
$IDcm $INcm; |
315 |
|
336 |
$INcm $INcm; |
316 |
# LB 22 Do not break before ellipses |
337 |
$NUcm $INcm; |
317 |
# |
|
|
318 |
$LB20NonBreaks $CM* $IN; |
319 |
^$CM+ $IN; |
338 |
|
320 |
|
339 |
|
321 |
|
340 |
# $LB 23 |
322 |
# LB 23 |
341 |
$IDcm $POcm; |
323 |
# |
342 |
$ALcm $NUcm; # includes $LB19 |
324 |
($ALPlus | $HL) $CM* $NU; |
343 |
$HLcm $NUcm; |
325 |
^$CM+ $NU; # Rule 10, any otherwise unattached CM behaves as AL |
344 |
$CM+ $NUcm; # Rule 10, any otherwise unattached CM behaves as AL |
326 |
$NU $CM* ($ALPlus | $HL); |
345 |
$NUcm $ALcm; |
327 |
|
346 |
$NUcm $HLcm; |
328 |
# LB 23a |
|
|
329 |
# |
330 |
$PR $CM* ($ID | $EB | $EM); |
331 |
($ID | $EB | $EM) $CM* $PO; |
332 |
|
347 |
|
333 |
|
348 |
# |
334 |
# |
349 |
# LB 24 |
335 |
# LB 24 |
350 |
# |
336 |
# |
351 |
$PRcm $IDcm; |
337 |
($PR | $PO) $CM* ($ALPlus | $HL); |
352 |
$ALcm $PRcm; |
338 |
($ALPlus | $HL) $CM* ($PR | $PO); |
353 |
$PRcm ($ALcm | $HLcm); |
339 |
^$CM+ ($PR | $PO); # Rule 10, any otherwise unattached CM behaves as AL |
354 |
$POcm ($ALcm | $HLcm); |
|
|
355 |
|
340 |
|
356 |
# |
341 |
# |
357 |
# LB 25 Numbers. |
342 |
# LB 25 Numbers. |
358 |
# |
343 |
# |
359 |
($PRcm | $POcm)? ($OPcm)? $NUcm ($NUcm | $SYcm | $IScm)* $CLcm? ($PRcm | $POcm)?; |
344 |
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? ($IS $CM*)? $NU ($CM* ($NU | $SY | $IS))* |
|
|
345 |
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?; |
360 |
|
346 |
|
361 |
# LB 26 Do not break a Korean syllable |
347 |
# LB 26 Do not break a Korean syllable |
362 |
# |
348 |
# |
363 |
$JLcm ($JLcm | $JVcm | $H2cm | $H3cm); |
349 |
$JL $CM* ($JL | $JV | $H2 | $H3); |
364 |
($JVcm | $H2cm) ($JVcm | $JTcm); |
350 |
($JV | $H2) $CM* ($JV | $JT); |
365 |
($JTcm | $H3cm) $JTcm; |
351 |
($JT | $H3) $CM* $JT; |
366 |
|
352 |
|
367 |
# LB 27 Treat korean Syllable Block the same as ID (don't break it) |
353 |
# LB 27 Treat korean Syllable Block the same as ID (don't break it) |
368 |
($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $INcm; |
354 |
($JL | $JV | $JT | $H2 | $H3) $CM* $PO; |
369 |
($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $POcm; |
355 |
$PR $CM* ($JL | $JV | $JT | $H2 | $H3); |
370 |
$PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm); |
|
|
371 |
|
356 |
|
372 |
|
357 |
|
373 |
# LB 28 Do not break between alphabetics |
358 |
# LB 28 Do not break between alphabetics |
374 |
# |
359 |
# |
375 |
($ALcm | $HLcm) ($ALcm | $HLcm); |
360 |
($ALPlus | $HL) $CM* ($ALPlus | $HL); |
376 |
$CM+ ($ALcm | $HLcm); # The $CM+ is from rule 10, an unattached CM is treated as AL |
361 |
^$CM+ ($ALPlus | $HL); # The $CM+ is from rule 10, an unattached CM is treated as AL |
|
|
362 |
|
363 |
#LB 28a Do not break Orthographic syllables |
364 |
($AP $CM*)? ($AS | $AK | [◌] ) ($CM* $VI $CM* ($AK | [◌] ))* ($CM* $VI | (($CM* ($AS | $AK | [◌] ) )? $CM* $VF))?; |
377 |
|
365 |
|
378 |
# LB 29 |
366 |
# LB 29 |
379 |
$IScm ($ALcm | $NUcm); |
367 |
$IS $CM* ($ALPlus | $HL); |
380 |
|
|
|
381 |
# |
382 |
# Rule 30 Do not break between letters, numbers or ordinary symbols |
383 |
# and opening or closing punctuation |
384 |
# |
385 |
($ALcm | $HLcm | $NUcm) $OPcm; |
386 |
$CM+ $OPcm; |
387 |
$CLcm ($ALcm | $HLcm | $NUcm); |
388 |
|
389 |
# |
390 |
# Reverse Rules. |
391 |
# |
392 |
## ------------------------------------------------- |
393 |
|
394 |
!!reverse; |
395 |
|
396 |
$CM+ $ALPlus; |
397 |
$CM+ $BA; |
398 |
$CM+ $BB; |
399 |
$CM+ $B2; |
400 |
$CM+ $CL; |
401 |
$CM+ $EX; |
402 |
$CM+ $GL; |
403 |
$CM+ $HL; |
404 |
$CM+ $HY; |
405 |
$CM+ $H2; |
406 |
$CM+ $H3; |
407 |
$CM+ $ID; |
408 |
$CM+ $IN; |
409 |
$CM+ $IS; |
410 |
$CM+ $JL; |
411 |
$CM+ $JV; |
412 |
$CM+ $JT; |
413 |
$CM+ $NS; |
414 |
$CM+ $NU; |
415 |
$CM+ $OP; |
416 |
$CM+ $PO; |
417 |
$CM+ $PR; |
418 |
$CM+ $QU; |
419 |
$CM+ $SY; |
420 |
$CM+ $WJ; |
421 |
$CM+; |
422 |
|
423 |
|
424 |
# |
425 |
# Sequences of the form (shown forwards) |
426 |
# [CANT_CM] <break> [CM] [whatever] |
427 |
# The CM needs to behave as an AL |
428 |
# |
429 |
$AL_FOLLOW $CM+ / ( |
430 |
[$BK $CR $LF $NL $ZW {eof}] | |
431 |
$SP+ $CM+ $SP | |
432 |
$SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}])); # if LB 14 will match, need to suppress this break. |
433 |
# LB14 says OP SP* x . |
434 |
# becomes OP SP* x AL |
435 |
# becomes OP SP* x CM+ AL_FOLLOW |
436 |
# |
437 |
# Further note: the $AL in [$AL {eof}] is only to work around |
438 |
# a rule compiler bug which complains about |
439 |
# empty sets otherwise. |
440 |
|
441 |
# |
442 |
# Sequences of the form (shown forwards) |
443 |
# [CANT_CM] <break> [CM] <break> [PR] |
444 |
# The CM needs to behave as an AL |
445 |
# This rule is concerned about getting the second of the two <breaks> in place. |
446 |
# |
447 |
|
448 |
[$PR ] / $CM+ [$BK $CR $LF $NL $ZW $SP {eof}]; |
449 |
|
450 |
|
451 |
|
452 |
# LB 4, 5, 5 |
453 |
|
454 |
$LB4Breaks [$LB4NonBreaks-$CM]; |
455 |
$LB4Breaks $CM+ $CAN_CM; |
456 |
$LF $CR; |
457 |
|
458 |
|
459 |
# LB 7 x SP |
460 |
# x ZW |
461 |
[$SP $ZW] [$LB4NonBreaks-$CM]; |
462 |
[$SP $ZW] $CM+ $CAN_CM; |
463 |
|
464 |
# LB 8 Break after zero width space |
465 |
|
466 |
|
467 |
# LB 9,10 Combining marks. |
468 |
# X $CM needs to behave like X, where X is not $SP or controls. |
469 |
# $CM not covered by the above needs to behave like $AL |
470 |
# Stick together any combining sequences that don't match other rules. |
471 |
$CM+ $CAN_CM; |
472 |
|
473 |
|
474 |
# LB 11 |
475 |
$CM* $WJ $CM* $CAN_CM; |
476 |
$CM* $WJ [$LB8NonBreaks-$CM]; |
477 |
|
478 |
$CANT_CM $CM* $WJ; |
479 |
$CM* $CAN_CM $CM* $WJ; |
480 |
|
481 |
# LB 12 |
482 |
# x GL |
483 |
# |
484 |
$CM* $GL $CM* [$LB8NonBreaks-$CM-$SP]; |
485 |
|
486 |
# |
487 |
# GL x |
488 |
# |
489 |
$CANT_CM $CM* $GL; |
490 |
$CM* $CAN_CM $CM* $GL; |
491 |
|
492 |
|
493 |
# LB 13 |
494 |
$CL $CM+ $CAN_CM; |
495 |
$EX $CM+ $CAN_CM; |
496 |
$IS $CM+ $CAN_CM; |
497 |
$SY $CM+ $CAN_CM; |
498 |
|
499 |
$CL [$LB8NonBreaks-$CM]; |
500 |
$EX [$LB8NonBreaks-$CM]; |
501 |
$IS [$LB8NonBreaks-$CM]; |
502 |
$SY [$LB8NonBreaks-$CM]; |
503 |
|
504 |
# Rule 13 & 14 taken together for an edge case. |
505 |
# Match this, shown forward |
506 |
# OP SP+ ($CM+ behaving as $AL) (CL | EX | IS | IY) |
507 |
# This really wants to chain at the $CM+ (which is acting as an $AL) |
508 |
# except for $CM chaining being disabled. |
509 |
[$CL $EX $IS $SY] $CM+ $SP+ $CM* $OP; |
510 |
|
511 |
# LB 14 OP SP* x |
512 |
# |
513 |
$CM* $CAN_CM $SP* $CM* $OP; |
514 |
$CANT_CM $SP* $CM* $OP; |
515 |
$AL_FOLLOW? $CM+ $SP $SP* $CM* $OP; # by LB 10, behaves like $AL_FOLLOW? $AL $SP* $CM* $OP |
516 |
|
517 |
$AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP; |
518 |
$CM* $AL_FOLLOW_CM $CM+ $SP+ $CM* $OP; |
519 |
$SY $CM $SP+ $OP; # TODO: Experiment. Remove. |
520 |
|
521 |
|
522 |
|
523 |
# LB 15 |
524 |
# $CM* $OP $SP* $CM* $QU; |
525 |
|
526 |
# LB 16 |
527 |
$CM* $NS $SP* $CM* $CL; |
528 |
|
529 |
# LB 17 |
530 |
$CM* $B2 $SP* $CM* $B2; |
531 |
|
532 |
# LB 18 break after spaces |
533 |
# Nothing explicit needed here. |
534 |
|
535 |
|
536 |
# |
537 |
# LB 19 |
538 |
# |
539 |
$CM* $QU $CM* $CAN_CM; # . x QU |
540 |
$CM* $QU $LB18NonBreaks; |
541 |
|
542 |
|
543 |
$CM* $CAN_CM $CM* $QU; # QU x . |
544 |
$CANT_CM $CM* $QU; |
545 |
|
546 |
# |
547 |
# LB 20 Break before and after CB. |
548 |
# nothing needed here. |
549 |
# |
550 |
|
551 |
# LB 21 |
552 |
$CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM]; # . x (BA | HY | NS) |
553 |
|
554 |
$CM* [$LB20NonBreaks-$CM] $CM* $BB; # BB x . |
555 |
[^$CB] $CM* $BB; # |
556 |
|
557 |
# LB21a |
558 |
[^$CB] $CM* ($HY | $BA) $CM* $HL; |
559 |
|
560 |
# LB 22 |
561 |
$CM* $IN $CM* ($ALPlus | $HL); |
562 |
$CM* $IN $CM* $ID; |
563 |
$CM* $IN $CM* $IN; |
564 |
$CM* $IN $CM* $NU; |
565 |
|
566 |
# LB 23 |
567 |
$CM* $PO $CM* $ID; |
568 |
$CM* $NU $CM* ($ALPlus | $HL); |
569 |
$CM* ($ALPlus | $HL) $CM* $NU; |
570 |
|
571 |
# LB 24 |
572 |
$CM* $ID $CM* $PR; |
573 |
$CM* $PR $CM* $ALPlus; |
574 |
$CM* ($ALPlus | $HL) $CM* $PR; |
575 |
$CM* ($ALPlus | $HL) $CM* $PO; |
576 |
|
577 |
$CM* $ALPlus $CM* ($IS | $SY | $HY)+ / $SP; |
578 |
$CM* $NU+ $CM* $HY+ / $SP; |
579 |
|
580 |
# LB 25 |
581 |
($CM* ($PR | $PO))? ($CM* $CL)? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP))? ($CM* ($PR | $PO))?; |
582 |
|
583 |
# LB 26 |
584 |
$CM* ($H3 | $H2 | $JV | $JL) $CM* $JL; |
585 |
$CM* ($JT | $JV) $CM* ($H2 | $JV); |
586 |
$CM* $JT $CM* ($H3 | $JT); |
587 |
|
588 |
# LB 27 |
589 |
$CM* $IN $CM* ($H3 | $H2 | $JT | $JV | $JL); |
590 |
$CM* $PO $CM* ($H3 | $H2 | $JT | $JV | $JL); |
591 |
$CM* ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR; |
592 |
|
593 |
# LB 28 |
594 |
$CM* ($ALPlus | $HL) $CM* ($ALPlus | $HL); |
595 |
|
596 |
# LB 29 |
597 |
$CM* ($NU | $ALPlus) $CM* $IS+ [^$SP]; |
598 |
|
368 |
|
599 |
# LB 30 |
369 |
# LB 30 |
600 |
$CM* $OP $CM* ($ALPlus | $HL | $NU); |
370 |
($ALPlus | $HL | $NU) $CM* $OP30; |
601 |
$CM* ($ALPlus | $HL | $NU) $CM* ($CL | $SY)+ [^$SP]; |
371 |
^$CM+ $OP30; # The $CM+ is from rule 10, an unattached CM is treated as AL. |
|
|
372 |
$CP30 $CM* ($ALPlus | $HL | $NU); |
602 |
|
373 |
|
|
|
374 |
# LB 30a Do not break between regional indicators. Break after pairs of them. |
375 |
# Tricky interaction with LB8a: ZWJ x . together with ZWJ acting like a CM. |
376 |
$RI $CM* $RI / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]]; |
377 |
$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]]; |
378 |
$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}]; |
379 |
# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?' |
380 |
# because of the chain-out behavior difference. The rule must chain out only from the [set characters], |
381 |
# not from the preceding $RI or $CM, which it would be able to do if the set were optional. |
603 |
|
382 |
|
604 |
## ------------------------------------------------- |
383 |
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier. |
|
|
384 |
$EB $CM* $EM; |
385 |
$ExtPictUnassigned $CM* $EM; |
605 |
|
386 |
|
606 |
!!safe_reverse; |
387 |
# LB 31 Break everywhere else. |
607 |
|
388 |
# Match a single code point if no other rule applies. |
608 |
# LB 7 |
389 |
.; |
609 |
$CM+ [^$CM $BK $CR $LF $NL $ZW $SP]; |
|
|
610 |
$CM+ $SP / .; |
611 |
|
612 |
# LB 9 |
613 |
$SP+ $CM* $OP; |
614 |
|
615 |
# LB 10 |
616 |
$SP+ $CM* $QU; |
617 |
|
618 |
# LB 11 |
619 |
$SP+ $CM* $CL; |
620 |
$SP+ $CM* $B2; |
621 |
|
622 |
# LB 21 |
623 |
$CM* ($HY | $BA) $CM* $HL; |
624 |
|
625 |
# LB 18 |
626 |
($CM* ($IS | $SY))+ $CM* $NU; |
627 |
$CL $CM* ($NU | $IS | $SY); |
628 |
|
629 |
# For dictionary-based break |
630 |
$dictionary $dictionary; |
631 |
|
632 |
## ------------------------------------------------- |
633 |
|
634 |
!!safe_forward; |
635 |
|
636 |
# Skip forward over all character classes that are involved in |
637 |
# rules containing patterns with possibly more than one char |
638 |
# of context. |
639 |
# |
640 |
# It might be slightly more efficient to have specific rules |
641 |
# instead of one generic one, but only if we could |
642 |
# turn off rule chaining. We don't want to move more |
643 |
# than necessary. |
644 |
# |
645 |
[$CM $OP $QU $CL $B2 $PR $HY $BA $SP $dictionary]+ [^$CM $OP $QU $CL $B2 $PR $HY $BA $dictionary]; |
646 |
$dictionary $dictionary; |
647 |
|