Lines 1-5
Link Here
|
1 |
require 'rexml/parseexception' |
1 |
require 'rexml/parseexception' |
2 |
require 'rexml/source' |
2 |
require 'rexml/source' |
|
|
3 |
require 'set' |
3 |
|
4 |
|
4 |
module REXML |
5 |
module REXML |
5 |
module Parsers |
6 |
module Parsers |
Lines 24-30
Link Here
|
24 |
# Nat Price gave me some good ideas for the API. |
25 |
# Nat Price gave me some good ideas for the API. |
25 |
class BaseParser |
26 |
class BaseParser |
26 |
NCNAME_STR= '[\w:][\-\w\d.]*' |
27 |
NCNAME_STR= '[\w:][\-\w\d.]*' |
27 |
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" |
28 |
NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" |
|
|
29 |
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" |
28 |
|
30 |
|
29 |
NAMECHAR = '[\-\w\d\.:]' |
31 |
NAMECHAR = '[\-\w\d\.:]' |
30 |
NAME = "([\\w:]#{NAMECHAR}*)" |
32 |
NAME = "([\\w:]#{NAMECHAR}*)" |
Lines 35-41
Link Here
|
35 |
|
37 |
|
36 |
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um |
38 |
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um |
37 |
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um |
39 |
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um |
38 |
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um |
40 |
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um |
39 |
COMMENT_START = /\A<!--/u |
41 |
COMMENT_START = /\A<!--/u |
40 |
COMMENT_PATTERN = /<!--(.*?)-->/um |
42 |
COMMENT_PATTERN = /<!--(.*?)-->/um |
41 |
CDATA_START = /\A<!\[CDATA\[/u |
43 |
CDATA_START = /\A<!\[CDATA\[/u |
Lines 45-51
Link Here
|
45 |
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um |
47 |
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um |
46 |
INSTRUCTION_START = /\A<\?/u |
48 |
INSTRUCTION_START = /\A<\?/u |
47 |
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um |
49 |
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um |
48 |
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um |
50 |
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um |
49 |
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um |
51 |
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um |
50 |
|
52 |
|
51 |
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um |
53 |
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um |
Lines 133-138
Link Here
|
133 |
@tags = [] |
135 |
@tags = [] |
134 |
@stack = [] |
136 |
@stack = [] |
135 |
@entities = [] |
137 |
@entities = [] |
|
|
138 |
@nsstack = [] |
136 |
end |
139 |
end |
137 |
|
140 |
|
138 |
def position |
141 |
def position |
Lines 213-218
Link Here
|
213 |
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] |
216 |
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ] |
214 |
when DOCTYPE_START |
217 |
when DOCTYPE_START |
215 |
md = @source.match( DOCTYPE_PATTERN, true ) |
218 |
md = @source.match( DOCTYPE_PATTERN, true ) |
|
|
219 |
@nsstack.unshift(curr_ns=Set.new) |
216 |
identity = md[1] |
220 |
identity = md[1] |
217 |
close = md[2] |
221 |
close = md[2] |
218 |
identity =~ IDENTITY |
222 |
identity =~ IDENTITY |
Lines 288-293
Link Here
|
288 |
val = attdef[3] |
292 |
val = attdef[3] |
289 |
val = attdef[4] if val == "#FIXED " |
293 |
val = attdef[4] if val == "#FIXED " |
290 |
pairs[attdef[0]] = val |
294 |
pairs[attdef[0]] = val |
|
|
295 |
if attdef[0] =~ /^xmlns:(.*)/ |
296 |
@nsstack[0] << $1 |
297 |
end |
291 |
end |
298 |
end |
292 |
end |
299 |
end |
293 |
return [ :attlistdecl, element, pairs, contents ] |
300 |
return [ :attlistdecl, element, pairs, contents ] |
Lines 312-317
Link Here
|
312 |
begin |
319 |
begin |
313 |
if @source.buffer[0] == ?< |
320 |
if @source.buffer[0] == ?< |
314 |
if @source.buffer[1] == ?/ |
321 |
if @source.buffer[1] == ?/ |
|
|
322 |
@nsstack.shift |
315 |
last_tag = @tags.pop |
323 |
last_tag = @tags.pop |
316 |
#md = @source.match_to_consume( '>', CLOSE_MATCH) |
324 |
#md = @source.match_to_consume( '>', CLOSE_MATCH) |
317 |
md = @source.match( CLOSE_MATCH, true ) |
325 |
md = @source.match( CLOSE_MATCH, true ) |
Lines 345-363
Link Here
|
345 |
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) |
353 |
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES ) |
346 |
raise REXML::ParseException.new("malformed XML: missing tag start", @source) |
354 |
raise REXML::ParseException.new("malformed XML: missing tag start", @source) |
347 |
end |
355 |
end |
348 |
attrs = [] |
356 |
attributes = {} |
349 |
if md[2].size > 0 |
357 |
prefixes = Set.new |
350 |
attrs = md[2].scan( ATTRIBUTE_PATTERN ) |
358 |
prefixes << md[2] if md[2] |
|
|
359 |
@nsstack.unshift(curr_ns=Set.new) |
360 |
if md[4].size > 0 |
361 |
attrs = md[4].scan( ATTRIBUTE_PATTERN ) |
351 |
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 |
362 |
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0 |
|
|
363 |
attrs.each { |a,b,c,d,e| |
364 |
if b == "xmlns" |
365 |
if c == "xml" |
366 |
if d != "http://www.w3.org/XML/1998/namespace" |
367 |
msg = "The 'xml' prefix must not be bound to any other namespace "+ |
368 |
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)" |
369 |
raise REXML::ParseException.new( msg, @source, self ) |
370 |
end |
371 |
elsif c == "xmlns" |
372 |
msg = "The 'xmlns' prefix must not be declared "+ |
373 |
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)" |
374 |
raise REXML::ParseException.new( msg, @source, self) |
375 |
end |
376 |
curr_ns << c |
377 |
elsif b |
378 |
prefixes << b unless b == "xml" |
379 |
end |
380 |
attributes[a] = e |
381 |
} |
352 |
end |
382 |
end |
353 |
|
383 |
|
354 |
if md[4] |
384 |
# Verify that all of the prefixes have been defined |
|
|
385 |
for prefix in prefixes |
386 |
unless @nsstack.find{|k| k.member?(prefix)} |
387 |
raise UndefinedNamespaceException.new(prefix,@source,self) |
388 |
end |
389 |
end |
390 |
|
391 |
if md[6] |
355 |
@closed = md[1] |
392 |
@closed = md[1] |
|
|
393 |
@nsstack.shift |
356 |
else |
394 |
else |
357 |
@tags.push( md[1] ) |
395 |
@tags.push( md[1] ) |
358 |
end |
396 |
end |
359 |
attributes = {} |
|
|
360 |
attrs.each { |a,b,c| attributes[a] = c } |
361 |
return [ :start_element, md[1], attributes ] |
397 |
return [ :start_element, md[1], attributes ] |
362 |
end |
398 |
end |
363 |
else |
399 |
else |
Lines 371-376
Link Here
|
371 |
# return PullEvent.new( :text, md[1], unnormalized ) |
407 |
# return PullEvent.new( :text, md[1], unnormalized ) |
372 |
return [ :text, md[1] ] |
408 |
return [ :text, md[1] ] |
373 |
end |
409 |
end |
|
|
410 |
rescue REXML::UndefinedNamespaceException |
411 |
raise |
374 |
rescue REXML::ParseException |
412 |
rescue REXML::ParseException |
375 |
raise |
413 |
raise |
376 |
rescue Exception, NameError => error |
414 |
rescue Exception, NameError => error |