Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions src/main/java/org/codehaus/plexus/util/xml/XmlReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -523,11 +523,8 @@ else if ( bomEnc.equals( UTF_8 ) )
}
else if ( bomEnc.equals( UTF_16BE ) || bomEnc.equals( UTF_16LE ) )
{
if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc ) )
{
throw new IOException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ) );
}
if ( xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) )
if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc )
|| xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) )
{
throw new XmlStreamReaderException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ),
bomEnc, xmlGuessEnc, xmlEnc, is );
Expand Down
42 changes: 14 additions & 28 deletions src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
import java.io.Reader;
import java.io.UnsupportedEncodingException;

import org.codehaus.plexus.util.xml.XmlReader;
import org.codehaus.plexus.util.xml.XmlStreamReader;
import org.codehaus.plexus.util.xml.XmlStreamReaderException;

//import java.util.Hashtable;

//TODO best handling of interning issues
// have isAllNewStringInterned ???
Expand Down Expand Up @@ -663,20 +662,6 @@ public void setInput( Reader in )
{
reset();
reader = in;

if ( reader instanceof XmlReader ) {
// encoding already detected
XmlReader xsr = (XmlReader) reader;
fileEncoding = xsr.getEncoding();
}
else if ( reader instanceof InputStreamReader )
{
InputStreamReader isr = (InputStreamReader) reader;
if ( isr.getEncoding() != null )
{
fileEncoding = isr.getEncoding().toUpperCase();
}
}
}

@Override
Expand All @@ -696,14 +681,26 @@ public void setInput( java.io.InputStream inputStream, String inputEncoding )
}
else
{
reader = new XmlStreamReader( inputStream );
reader = new XmlStreamReader( inputStream, false );
}
}
catch ( UnsupportedEncodingException une )
{
throw new XmlPullParserException( "could not create reader for encoding " + inputEncoding + " : " + une,
this, une );
}
catch ( XmlStreamReaderException e )
{
if ( "UTF-8".equals( e.getBomEncoding() ) )
{
throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + e.getXmlEncoding() + " is incompatible", this, e );
}
if ( e.getBomEncoding() != null && e.getBomEncoding().startsWith( "UTF-16" ) )
{
throw new XmlPullParserException( "UTF-16 BOM in a " + e.getXmlEncoding() + " encoded file is incompatible", this, e );
}
throw new XmlPullParserException( "could not create reader : " + e, this, e );
}
catch ( IOException e )
{
throw new XmlPullParserException( "could not create reader : " + e, this, e );
Expand Down Expand Up @@ -3434,17 +3431,6 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd )
// TODO reconcile with setInput encodingName
inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart );

if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) )
{
throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible",
this, null );
}
else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" ))
{
throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible",
this, null );
}

lastParsedAttr = "encoding";

ch = more();
Expand Down
104 changes: 95 additions & 9 deletions src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@

import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;

Expand Down Expand Up @@ -968,7 +971,7 @@ public void testXMLDeclVersionEncodingStandaloneNoSpace()
* @since 3.4.1
*/
@Test
public void testEncodingISO_8859_1setInputReader()
public void testEncodingISO_8859_1_newXmlReader()
throws IOException
{
try ( Reader reader =
Expand All @@ -994,7 +997,7 @@ public void testEncodingISO_8859_1setInputReader()
* @since 3.4.1
*/
@Test
public void testEncodingISO_8859_1_setInputStream()
public void testEncodingISO_8859_1_InputStream()
throws IOException
{
try ( InputStream input =
Expand All @@ -1012,12 +1015,6 @@ public void testEncodingISO_8859_1_setInputStream()
}
}

private static void assertPosition( int row, int col, MXParser parser )
{
assertEquals( "Current line", row, parser.getLineNumber() );
assertEquals( "Current column", col, parser.getColumnNumber() );
}

/**
* Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
*
Expand All @@ -1028,7 +1025,7 @@ private static void assertPosition( int row, int col, MXParser parser )
* @since 3.4.2
*/
@Test
public void testEncodingISO_8859_1setStringReader()
public void testEncodingISO_8859_1_StringReader()
throws IOException
{
String xmlFileContents;
Expand All @@ -1050,6 +1047,95 @@ public void testEncodingISO_8859_1setStringReader()
}
}

/**
* Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
*
* Another case of bug #163: Reader generated with ReaderFactory.newReader and the right file encoding.
*
* @throws IOException if IO error.
*
* @since 3.5.2
*/
@Test
public void testEncodingISO_8859_1_newReader()
throws IOException
{
// NOTE: if using Files.newBufferedReader(path, StandardCharsets.UTF-8), the reader will throw an exception
// because the decoder created by new InputStreamReader() is lenient while the one created by
// Files.newBufferedReader() is not.
try ( Reader reader = new InputStreamReader( Files.newInputStream(
Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ),
StandardCharsets.UTF_8 ) )
{
MXParser parser = new MXParser();
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
assertTrue( true );
}
catch ( XmlPullParserException e )
{
fail( "should not raise exception: " + e );
}
}

/**
* Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
*
* Another case of bug #163: InputStream supplied with the right file encoding.
*
* @throws IOException if IO error.
*
* @since 3.5.2
*/
@Test
public void testEncodingISO_8859_1_InputStream_encoded() throws IOException {
try ( InputStream input =
Files.newInputStream( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) )
{
MXParser parser = new MXParser();
parser.setInput( input, StandardCharsets.UTF_8.name() );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
assertTrue( true );
}
catch ( XmlPullParserException e )
{
fail( "should not raise exception: " + e );
}
}

/**
* Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163
*
* @throws IOException if IO error.
*
* @since 3.4.1
*/
@Test
public void testEncodingUTF8_newXmlReader()
throws IOException
{
try ( Reader reader = new XmlStreamReader( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) )
{
MXParser parser = new MXParser();
parser.setInput( reader );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
assertTrue( true );
}
catch ( XmlPullParserException e )
{
fail( "should not raise exception: " + e );
}
}

private static void assertPosition( int row, int col, MXParser parser )
{
assertEquals( "Current line", row, parser.getLineNumber() );
assertEquals( "Current column", col, parser.getColumnNumber() );
}

/**
* <p>
* Test custom Entity not found.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;

import org.junit.Before;
import org.junit.Test;

Expand Down Expand Up @@ -212,17 +210,16 @@ public void testhst_bh_006()
public void testhst_lhs_007()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
try ( InputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) ) )
{
parser.setInput( reader );
parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" );
fail( "UTF-8 BOM plus xml decl of ISO-8859-1 incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) );
assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of ISO-8859-1 is incompatible" ) );
}
}

Expand All @@ -239,17 +236,16 @@ public void testhst_lhs_007()
public void testhst_lhs_008()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) )
try ( InputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) ) )
{
parser.setInput( reader );
parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" );
fail( "UTF-16 BOM plus xml decl of UTF-8 (using UTF-16 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) );
assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
}
}

Expand All @@ -261,22 +257,24 @@ public void testhst_lhs_008()
* Version:
*
* @throws java.io.IOException if there is an I/O error
*
* NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as
* UTF-8.
*/
@Test
public void testhst_lhs_009()
throws IOException
{
try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) );
InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
{
parser.setInput( reader );
try ( InputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) ) )
{
parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" );
fail( "UTF-16 BOM plus xml decl of UTF-8 (using UTF-8 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
assertTrue( e.getMessage(), e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
}
}

Expand Down
Loading