Attachment 475122 Details for Bug 614054 – Patch for CVE-2017-7244

[patch] Patch for CVE-2017-7244

CVE-2017-7244.patch (text/plain), 13.30 KB, created by Thomas Deutschmann (RETIRED) on 2017-06-03 21:02:19 UTC

(hide)

Description:

Filename:

MIME Type:

Creator: Thomas Deutschmann (RETIRED)

Created: 2017-06-03 21:02:19 UTC

Size: 13.30 KB

patch

obsolete

>Index: pcre_ucd.c
>===================================================================
>--- pcre_ucd.c	(revision 1687)
>+++ pcre_ucd.c	(revision 1688)
>@@ -38,6 +38,20 @@
> const pcre_uint32 PRIV(ucd_caseless_sets)[] = {0};
> #else
> 
>+/* If the 32-bit library is run in non-32-bit mode, character values
>+greater than 0x10ffff may be encountered. For these we set up a
>+special record. */
>+
>+#ifdef COMPILE_PCRE32
>+const ucd_record PRIV(dummy_ucd_record)[] = {{
>+  ucp_Common,    /* script */
>+  ucp_Cn,        /* type unassigned */
>+  ucp_gbOther,   /* grapheme break property */
>+  0,             /* case set */
>+  0,             /* other case */
>+  }};
>+#endif
>+
> /* When recompiling tables with a new Unicode version, please check the
> types in this structure definition from pcre_internal.h (the actual
> field names will be different):
>Index: pcre_internal.h
>===================================================================
>--- pcre_internal.h	(revision 1687)
>+++ pcre_internal.h	(revision 1688)
>@@ -2772,6 +2772,9 @@
> extern const pcre_uint16 PRIV(ucd_stage2)[];
> extern const pcre_uint32 PRIV(ucp_gentype)[];
> extern const pcre_uint32 PRIV(ucp_gbtable)[];
>+#ifdef COMPILE_PCRE32
>+extern const ucd_record  PRIV(dummy_ucd_record)[];
>+#endif
> #ifdef SUPPORT_JIT
> extern const int         PRIV(ucp_typerange)[];
> #endif
>@@ -2780,9 +2783,15 @@
> /* UCD access macros */
> 
> #define UCD_BLOCK_SIZE 128
>-#define GET_UCD(ch) (PRIV(ucd_records) + \
>+#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
>         PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
>         UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
>+        
>+#ifdef COMPILE_PCRE32
>+#define GET_UCD(ch) ((ch > 0x10ffff)? PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
>+#else
>+#define GET_UCD(ch) REAL_GET_UCD(ch)
>+#endif 
> 
> #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
> #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
>Index: ChangeLog
>===================================================================
>--- ChangeLog	(revision 1687)
>+++ ChangeLog	(revision 1688)
>@@ -10,7 +10,7 @@
> 1.  Fixed typo in CMakeLists.txt (wrong number of arguments for 
> PCRE_STATIC_RUNTIME (affects MSVC only).
> 
>-2. Issue 1 for 8.40 below was not correctly fixed. If pcregrep in multiline 
>+2.  Issue 1 for 8.40 below was not correctly fixed. If pcregrep in multiline 
> mode with --only-matching matched several lines, it restarted scanning at the 
> next line instead of moving on to the end of the matched string, which can be 
> several lines after the start.
>@@ -29,6 +29,10 @@
> 
>     (a) Check for values < 256 when calling isprint() in pcretest.
>     (b) Give an error for too big a number after \O. 
>+    
>+7.  In the 32-bit library in non-UTF mode, an attempt to find a Unicode 
>+property for a character with a code point greater than 0x10ffff (the Unicode 
>+maximum) caused a crash.
> 
> 
> Version 8.40 11-January-2017
>Index: maint/MultiStage2.py
>===================================================================
>--- maint/MultiStage2.py	(revision 1687)
>+++ maint/MultiStage2.py	(revision 1688)
>@@ -1,5 +1,7 @@
> #! /usr/bin/python
> 
>+# WARNING! This is a python 2 script.
>+
> # Multistage table builder
> # (c) Peter Kankowski, 2008
> 
>@@ -15,10 +17,10 @@
> # ./MultiStage2.py >../pcre_ucd.c
> #
> # It requires four Unicode data tables, DerivedGeneralCategory.txt,
>-# GraphemeBreakProperty.txt, Scripts.txt, and CaseFolding.txt, to be in the 
>-# Unicode.tables subdirectory. The first of these is found in the "extracted" 
>-# subdirectory of the Unicode database (UCD) on the Unicode web site; the 
>-# second is in the "auxiliary" subdirectory; the other two are directly in the 
>+# GraphemeBreakProperty.txt, Scripts.txt, and CaseFolding.txt, to be in the
>+# Unicode.tables subdirectory. The first of these is found in the "extracted"
>+# subdirectory of the Unicode database (UCD) on the Unicode web site; the
>+# second is in the "auxiliary" subdirectory; the other two are directly in the
> # UCD directory.
> #
> # Minor modifications made to this script:
>@@ -42,7 +44,7 @@
> #  code scans CaseFolding.txt instead of UnicodeData.txt.
> #
> # The main tables generated by this script are used by macros defined in
>-# pcre_internal.h. They look up Unicode character properties using short 
>+# pcre_internal.h. They look up Unicode character properties using short
> # sequences of code that contains no branches, which makes for greater speed.
> #
> # Conceptually, there is a table of records (of type ucd_record), containing a
>@@ -69,13 +71,13 @@
> # Example: lowercase "a" (U+0061) is in block 0
> #          lookup 0 in stage1 table yields 0
> #          lookup 97 in the first table in stage2 yields 16
>-#          record 17 is { 33, 5, 11, 0, -32 } 
>+#          record 17 is { 33, 5, 11, 0, -32 }
> #            33 = ucp_Latin   => Latin script
> #             5 = ucp_Ll      => Lower case letter
> #            11 = ucp_gbOther => Grapheme break property "Other"
> #             0               => not part of a caseless set
> #           -32               => Other case is U+0041
>-#         
>+#
> # Almost all lowercase latin characters resolve to the same record. One or two
> # are different because they are part of a multi-character caseless set (for
> # example, k, K and the Kelvin symbol are such a set).
>@@ -83,17 +85,17 @@
> # Example: hiragana letter A (U+3042) is in block 96 (0x60)
> #          lookup 96 in stage1 table yields 88
> #          lookup 66 in the 88th table in stage2 yields 467
>-#          record 470 is { 26, 7, 11, 0, 0 } 
>+#          record 470 is { 26, 7, 11, 0, 0 }
> #            26 = ucp_Hiragana => Hiragana script
> #             7 = ucp_Lo       => Other letter
> #            11 = ucp_gbOther  => Grapheme break property "Other"
> #             0                => not part of a caseless set
>-#             0                => No other case 
>+#             0                => No other case
> #
> # In these examples, no other blocks resolve to the same "virtual" block, as it
> # happens, but plenty of other blocks do share "virtual" blocks.
> #
>-# There is a fourth table, maintained by hand, which translates from the 
>+# There is a fourth table, maintained by hand, which translates from the
> # individual character types such as ucp_Cc to the general types like ucp_C.
> #
> #  Philip Hazel, 03 July 2008
>@@ -101,8 +103,8 @@
> # 01-March-2010:     Updated list of scripts for Unicode 5.2.0
> # 30-April-2011:     Updated list of scripts for Unicode 6.0.0
> #     July-2012:     Updated list of scripts for Unicode 6.1.0
>-# 20-August-2012:    Added scan of GraphemeBreakProperty.txt and added a new 
>-#                      field in the record to hold the value. Luckily, the 
>+# 20-August-2012:    Added scan of GraphemeBreakProperty.txt and added a new
>+#                      field in the record to hold the value. Luckily, the
> #                      structure had a hole in it, so the resulting table is
> #                      not much bigger than before.
> # 18-September-2012: Added code for multiple caseless sets. This uses the
>@@ -144,14 +146,14 @@
>                 if m.group(3) is None:
>                         last = char
>                 else:
>-                        last = int(m.group(3), 16)            
>+                        last = int(m.group(3), 16)
>                 for i in range(char, last + 1):
>                         # It is important not to overwrite a previously set
>                         # value because in the CaseFolding file there are lines
>-                        # to be ignored (returning the default value of 0) 
>-                        # which often come after a line which has already set 
>-                        # data.   
>-                        if table[i] == default_value: 
>+                        # to be ignored (returning the default value of 0)
>+                        # which often come after a line which has already set
>+                        # data.
>+                        if table[i] == default_value:
>                           table[i] = value
>         file.close()
>         return table
>@@ -192,7 +194,7 @@
>                         stage2 += block
>                         blocks[block] = start
>                 stage1.append(start)
>-        
>+
>         return stage1, stage2
> 
> # Print a table
>@@ -199,7 +201,7 @@
> def print_table(table, table_name, block_size = None):
>         type, size = get_type_size(table)
>         ELEMS_PER_LINE = 16
>-        
>+
>         s = "const %s %s[] = { /* %d bytes" % (type, table_name, size * len(table))
>         if block_size:
>                 s += ", block = %d" % block_size
>@@ -245,15 +247,15 @@
>                 size = (size + slice_size - 1) & -slice_size
>                 size += slice_size
>                 structure += '%s property_%d;\n' % (slice_type, i)
>-        
>+
>         # round up to the first item of the next structure in array
>         record_slice = map(lambda record: record[0], records)
>         slice_type, slice_size = get_type_size(record_slice)
>         size = (size + slice_size - 1) & -slice_size
>-        
>+
>         structure += '} ucd_record;\n*/\n\n'
>         return size, structure
>-        
>+
> def test_record_size():
>         tests = [ \
>           ( [(3,), (6,), (6,), (1,)], 1 ), \
>@@ -305,7 +307,7 @@
>  'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi',
>  'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi'
>  ]
>- 
>+
> category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
>   'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps',
>   'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ]
>@@ -321,20 +323,20 @@
> other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0)
> 
> 
>-# This block of code was added by PH in September 2012. I am not a Python 
>-# programmer, so the style is probably dreadful, but it does the job. It scans 
>-# the other_case table to find sets of more than two characters that must all 
>-# match each other caselessly. Later in this script a table of these sets is 
>-# written out. However, we have to do this work here in order to compute the 
>+# This block of code was added by PH in September 2012. I am not a Python
>+# programmer, so the style is probably dreadful, but it does the job. It scans
>+# the other_case table to find sets of more than two characters that must all
>+# match each other caselessly. Later in this script a table of these sets is
>+# written out. However, we have to do this work here in order to compute the
> # offsets in the table that are inserted into the main table.
> 
> # The CaseFolding.txt file lists pairs, but the common logic for reading data
>-# sets only one value, so first we go through the table and set "return" 
>+# sets only one value, so first we go through the table and set "return"
> # offsets for those that are not already set.
> 
> for c in range(0x10ffff):
>   if other_case[c] != 0 and other_case[c + other_case[c]] == 0:
>-    other_case[c + other_case[c]] = -other_case[c] 
>+    other_case[c + other_case[c]] = -other_case[c]
> 
> # Now scan again and create equivalence sets.
> 
>@@ -344,25 +346,25 @@
>   o = c + other_case[c]
> 
>   # Trigger when this character's other case does not point back here. We
>-  # now have three characters that are case-equivalent. 
>- 
>+  # now have three characters that are case-equivalent.
>+
>   if other_case[o] != -other_case[c]:
>     t = o + other_case[o]
>-    
>-    # Scan the existing sets to see if any of the three characters are already 
>+
>+    # Scan the existing sets to see if any of the three characters are already
>     # part of a set. If so, unite the existing set with the new set.
>- 
>-    appended = 0 
>+
>+    appended = 0
>     for s in sets:
>-      found = 0 
>+      found = 0
>       for x in s:
>         if x == c or x == o or x == t:
>           found = 1
>-    
>+
>       # Add new characters to an existing set
>-       
>+
>       if found:
>-        found = 0 
>+        found = 0
>         for y in [c, o, t]:
>           for x in s:
>             if x == y:
>@@ -370,10 +372,10 @@
>           if not found:
>             s.append(y)
>         appended = 1
>-        
>+
>     # If we have not added to an existing set, create a new one.
> 
>-    if not appended:     
>+    if not appended:
>       sets.append([c, o, t])
> 
> # End of loop looking for caseless sets.
>@@ -384,7 +386,7 @@
> 
> offset = 1;
> for s in sets:
>-  for x in s:   
>+  for x in s:
>     caseless_offsets[x] = offset
>   offset += len(s) + 1
> 
>@@ -393,7 +395,7 @@
> 
> # Combine the tables
> 
>-table, records = combine_tables(script, category, break_props, 
>+table, records = combine_tables(script, category, break_props,
>   caseless_offsets, other_case)
> 
> record_size, record_struct = get_record_size_struct(records.keys())
>@@ -450,6 +452,20 @@
> print "const pcre_uint32 PRIV(ucd_caseless_sets)[] = {0};"
> print "#else"
> print
>+print "/* If the 32-bit library is run in non-32-bit mode, character values"
>+print "greater than 0x10ffff may be encountered. For these we set up a"
>+print "special record. */"
>+print
>+print "#ifdef COMPILE_PCRE32"
>+print "const ucd_record PRIV(dummy_ucd_record)[] = {{"
>+print "  ucp_Common,    /* script */"
>+print "  ucp_Cn,        /* type unassigned */"
>+print "  ucp_gbOther,   /* grapheme break property */"
>+print "  0,             /* case set */"
>+print "  0,             /* other case */"
>+print "  }};"
>+print "#endif"
>+print
> print record_struct
> 
> # --- Added by PH: output the table of caseless character sets ---
>@@ -460,7 +476,7 @@
>   s = sorted(s)
>   for x in s:
>     print '  0x%04x,' % x,
>-  print '  NOTACHAR,'   
>+  print '  NOTACHAR,'
> print '};'
> print
>

Actions: View | Diff

Attachments on bug 614054: 475122