@@ -140,7 +140,7 @@ pub struct UnescapeWriter<W> {
140
140
enum UnescapeState {
141
141
CharData ,
142
142
Begin ,
143
- Named ( uint , uint ) , // index into ENTITIES, and prefix len
143
+ Named ( uint , uint , uint ) , // index into ENTITIES, prefix len, last non-semi index
144
144
HexStart ( bool ) , // boolean indicates if x is lower or upper case
145
145
Hex ( u32 ) ,
146
146
DecStart ,
@@ -209,10 +209,14 @@ impl<W: Writer> UnescapeWriter<W> {
209
209
Begin => {
210
210
try!( self . inner . get_mut_ref ( ) . write_str ( "&" ) ) ;
211
211
}
212
- Named ( cursor, plen) => {
212
+ Named ( cursor, plen, lastcur ) => {
213
213
let ( name, chars, needs_semi) = ENTITIES [ cursor] ;
214
214
if !needs_semi && name. len ( ) == plen {
215
215
try!( self . inner . get_mut_ref ( ) . write_str ( chars) ) ;
216
+ } else if lastcur != -1 {
217
+ let ( lastname, chars, _) = ENTITIES [ lastcur] ;
218
+ try!( self . inner . get_mut_ref ( ) . write_str ( chars) ) ;
219
+ try!( self . inner . get_mut_ref ( ) . write_str ( name. slice ( lastname. len ( ) , plen) ) ) ;
216
220
} else {
217
221
try!( self . inner . get_mut_ref ( ) . write_str ( name. slice_to ( plen) ) ) ;
218
222
}
@@ -349,13 +353,13 @@ impl<W:Writer> Writer for UnescapeWriter<W> {
349
353
// with our character as a prefix.
350
354
// There's at least one entity that starts with every letter, so we don't
351
355
// have to worry about not finding one.
352
- self . state = Named ( base, 2 ) ; // plen is 2 to include &
356
+ self . state = Named ( base, 2 , - 1 ) ; // plen is 2 to include &
353
357
}
354
- ( Named ( cursor, plen) , ';' ) => {
358
+ ( Named ( cursor, plen, _) , ';' ) => {
359
+ it. next ( ) ; // consume ;
355
360
let ( name, chars, _) = ENTITIES [ cursor] ;
356
361
if name. len ( ) == plen {
357
362
// valid entity
358
- it. next ( ) ; // consume ;
359
363
try!( self . inner_write_str ( chars) ) ;
360
364
self . state = CharData ;
361
365
cdata = i+1 ;
@@ -365,25 +369,32 @@ impl<W:Writer> Writer for UnescapeWriter<W> {
365
369
cdata = i;
366
370
}
367
371
}
368
- ( Named ( cursor, plen) , 'a' ..'z' ) |
369
- ( Named ( cursor, plen) , 'A' ..'Z' ) |
370
- ( Named ( cursor, plen) , '0' ..'9' ) => {
372
+ ( Named ( cursor, plen, lastcur ) , 'a' ..'z' ) |
373
+ ( Named ( cursor, plen, lastcur ) , 'A' ..'Z' ) |
374
+ ( Named ( cursor, plen, lastcur ) , '0' ..'9' ) => {
371
375
let mut cursor = cursor;
372
- let ( name, _, _) = ENTITIES [ cursor] ;
376
+ it. next ( ) ; // consume character
377
+ let ( mut name, _, mut needs_semi) = ENTITIES [ cursor] ;
373
378
if name. len ( ) > plen && name[ plen] == b {
374
379
// existing cursor is still a match
375
380
} else {
376
381
// search forward to find the next entity with our prefix
377
382
let prefix = name. slice_to ( plen) ;
378
383
for ix in range ( cursor+1 , ENTITIES . len ( ) ) {
379
- let ( name , _, _ ) = ENTITIES [ ix] ;
380
- if !name . starts_with ( prefix) {
384
+ let ( name_ , _, needs_semi_ ) = ENTITIES [ ix] ;
385
+ if !name_ . starts_with ( prefix) {
381
386
// no match
382
387
cursor = -1 ;
383
388
break ;
384
389
}
385
- if name . len ( ) > plen && name [ plen] == b {
390
+ if name_ . len ( ) > plen && name_ [ plen] == b {
386
391
cursor = ix;
392
+ name = name_;
393
+ needs_semi = needs_semi_;
394
+ if name_. len ( ) == plen+1 {
395
+ name = name_;
396
+ needs_semi = needs_semi_;
397
+ }
387
398
break ;
388
399
}
389
400
}
@@ -394,8 +405,13 @@ impl<W:Writer> Writer for UnescapeWriter<W> {
394
405
self . state = CharData ;
395
406
cdata = i;
396
407
} else {
397
- it. next ( ) ; // consume character
398
- self . state = Named ( cursor, plen+1 ) ;
408
+ let plen = plen+1 ;
409
+ let lastcur = if !needs_semi && name. len ( ) == plen {
410
+ cursor
411
+ } else {
412
+ lastcur
413
+ } ;
414
+ self . state = Named ( cursor, plen, lastcur) ;
399
415
}
400
416
}
401
417
( HexStart ( _) , 'a' ..'f' ) |( HexStart ( _) , 'A' ..'F' ) |( HexStart ( _) , '0' ..'9' ) => {
0 commit comments