@@ -124,11 +124,10 @@ class BaseParser
124124 }
125125
126126 module Private
127- INSTRUCTION_END = /#{ NAME } (\s +.*?)?\? >/um
128127 TAG_PATTERN = /((?>#{ QNAME_STR } ))\s */um
129128 CLOSE_PATTERN = /(#{ QNAME_STR } )\s *>/um
130129 ATTLISTDECL_END = /\s +#{ NAME } (?:#{ ATTDEF } )*\s *>/um
131- NAME_PATTERN = /\s * #{ NAME } /um
130+ NAME_PATTERN = /#{ NAME } /um
132131 GEDECL_PATTERN = "\\ s+#{ NAME } \\ s+#{ ENTITYDEF } \\ s*>"
133132 PEDECL_PATTERN = "\\ s+(%)\\ s+#{ NAME } \\ s+#{ PEDEF } \\ s*>"
134133 ENTITYDECL_PATTERN = /(?:#{ GEDECL_PATTERN } )|(?:#{ PEDECL_PATTERN } )/um
@@ -242,7 +241,7 @@ def pull_event
242241 if @document_status == nil
243242 start_position = @source . position
244243 if @source . match ( "<?" , true )
245- return process_instruction ( start_position )
244+ return process_instruction
246245 elsif @source . match ( "<!" , true )
247246 if @source . match ( "--" , true )
248247 md = @source . match ( /(.*?)-->/um , true )
@@ -442,7 +441,7 @@ def pull_event
442441 raise REXML ::ParseException . new ( "Declarations can only occur " +
443442 "in the doctype declaration." , @source )
444443 elsif @source . match ( "?" , true )
445- return process_instruction ( start_position )
444+ return process_instruction
446445 else
447446 # Get the next tag
448447 md = @source . match ( Private ::TAG_PATTERN , true )
@@ -588,14 +587,14 @@ def need_source_encoding_update?(xml_declaration_encoding)
588587 def parse_name ( base_error_message )
589588 md = @source . match ( Private ::NAME_PATTERN , true )
590589 unless md
591- if @source . match ( /\s * \ S /um )
590+ if @source . match ( /\S /um )
592591 message = "#{ base_error_message } : invalid name"
593592 else
594593 message = "#{ base_error_message } : name is missing"
595594 end
596595 raise REXML ::ParseException . new ( message , @source )
597596 end
598- md [ 1 ]
597+ md [ 0 ]
599598 end
600599
601600 def parse_id ( base_error_message ,
@@ -664,18 +663,24 @@ def parse_id_invalid_details(accept_external_id:,
664663 end
665664 end
666665
667- def process_instruction ( start_position )
668- match_data = @source . match ( Private ::INSTRUCTION_END , true )
669- unless match_data
670- message = "Invalid processing instruction node"
671- @source . position = start_position
672- raise REXML ::ParseException . new ( message , @source )
666+ def process_instruction
667+ name = parse_name ( "Malformed XML: Invalid processing instruction node" )
668+ if @source . match ( /\s +/um , true )
669+ match_data = @source . match ( /(.*?)\? >/um , true )
670+ unless match_data
671+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction" , @source )
672+ end
673+ content = match_data [ 1 ]
674+ else
675+ content = nil
676+ unless @source . match ( "?>" , true )
677+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction" , @source )
678+ end
673679 end
674- if match_data [ 1 ] == "xml"
680+ if name == "xml"
675681 if @document_status
676682 raise ParseException . new ( "Malformed XML: XML declaration is not at the start" , @source )
677683 end
678- content = match_data [ 2 ]
679684 version = VERSION . match ( content )
680685 version = version [ 1 ] unless version . nil?
681686 encoding = ENCODING . match ( content )
@@ -690,7 +695,7 @@ def process_instruction(start_position)
690695 standalone = standalone [ 1 ] unless standalone . nil?
691696 return [ :xmldecl , version , encoding , standalone ]
692697 end
693- [ :processing_instruction , match_data [ 1 ] , match_data [ 2 ] ]
698+ [ :processing_instruction , name , content ]
694699 end
695700
696701 def parse_attributes ( prefixes , curr_ns )
0 commit comments