Thanks to Paul Findlay, we finally have a possible contender in the Java camp with Aalto.


This goes to show you how good library design and the D Programming Language come together to kick serious butt.
PS: I am looking for anyone to do comparisons with MSXML, RapidXML, etc. More native code help is needed. Send me email at scott aht dotnot daht org.
Popularity: 8%
Next up from Paul Findlay: Aalto. Aalto.java:
// requires jar files from http://www.cowtowncoder.com/hatchery/aalto/index.html
// (and maybe some command line switches as per the same page)
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
import java.io.*;
public class Aalto
{
public static byte[] getBytesFromFile(File file) throws IOException {
InputStream is = new FileInputStream(file);
long length = file.length();
byte[] bytes = new byte[(int)length];
int offset = 0;
int numRead = 0;
while (offset < bytes.length
&& (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
offset += numRead;
}
if (offset < bytes.length) {
throw new IOException(”Could not completely read file “+file.getName());
}
is.close();
return bytes;
}
public static void main (String args[]) throws Exception
{
int iterations = 2000;
XMLInputFactory xmlif = XMLInputFactory.newInstance();
xmlif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.FALSE);
xmlif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE);
byte[] content = Aalto.getBytesFromFile(new File(args[0]));
ByteArrayInputStream bais = new ByteArrayInputStream(content);
for (int i = 0; i < 10; i++) {
long start = System.currentTimeMillis();
for (int j = 0; j < iterations; j++) {
XMLStreamReader xr = xmlif.createXMLStreamReader(bais);
while (xr.hasNext()) {
xr.next();
}
xr.close();
bais.reset();
}
long stop = System.currentTimeMillis();
double timer = (stop - start) / 1000.0;
double total = (content.length * iterations) / (timer * (1024 * 1024));
System.out.print(total);
System.out.println(” MB/s”);
}
}
}
How it was run:
echo “aalto”
javac -classpath aalto-0.9.jar Aalto.java
echo “hamlet.xml”
java -cp aalto-0.9.jar:stax2-3.0pr1.jar:. Aalto hamlet.xml
echo “soap_mid.xml”
java -cp alto-0.9.jar:stax2-3.0pr1.jar:. Aalto soap_mid.xml
Results:
stonecobra@jeff-home:~/xmlbench$ ./all
aalto
hamlet.xml
119.02434356083324 MB/s
149.60675553887623 MB/s
149.81687738654318 MB/s
149.4390819546354 MB/s
150.23889675946305 MB/s
150.36596659038446 MB/s
150.4507992936795 MB/s
151.09010863912005 MB/s
151.00455365121567 MB/s
151.13292249818468 MB/s
soap_mid.xml
41.88683525261311 MB/s
43.82856162166171 MB/s
43.896140352961176 MB/s
43.86607965078486 MB/s
43.552910290707864 MB/s
44.16855218759427 MB/s
44.16093954502489 MB/s
44.13811735404555 MB/s
44.267755915728124 MB/s
44.22191426307118 MB/s
Average for hamlet.xml: 147.22 MB/sec
Average for soap_mid.xml: 43.80 MB/sec
As noted on the website, Aalto does seem to be quite fast on the “fast path”. Impressive for a Java solution at this point.
Update: 2008-03-03 13:15 PST: Thanks to Paul Findlay for catching my misspelling of the aalto.jar in the java run command. These numbers posted are actually for the default Java6 StaX parser, and not Aalto. Re-running, I get:
stonecobra@jeff-home:~/xmlbench$ ./all
aalto
hamlet.xml
138.74820070137716 MB/s
148.31704212905834 MB/s
148.73064235808528 MB/s
148.73064235808528 MB/s
148.56492576492863 MB/s
148.85517261961868 MB/s
148.97991159108764 MB/s
149.18827510380245 MB/s
149.14655578749824 MB/s
149.23001776611466 MB/s
soap_mid.xml
79.94439040256923 MB/s
85.83643927646042 MB/s
86.3571861274804 MB/s
86.64922936768157 MB/s
85.72156950158394 MB/s
86.5906628050809 MB/s
87.09101673699332 MB/s
87.03185164410135 MB/s
87.06142413871369 MB/s
87.20958857734321 MB/s
Average for hamlet.xml: 147.85 MB/sec
Average for soap_mid.xml: 85.95 MB/sec
Much more impressive numbers from the Java camp. Graphs will be updated later today.
Popularity: 8%
Another benchmark from Paul Findlay, using Javolution. Here is Javolution.java:
// requires jar files from http://javolution.org/javolution-5.2.6-bin.zip
import javolution.xml.stream.XMLInputFactory;
import javolution.xml.stream.XMLStreamReader;
import java.io.*;
public class Javolution
{
public static byte[] getBytesFromFile(File file) throws IOException {
InputStream is = new FileInputStream(file);
long length = file.length();
byte[] bytes = new byte[(int)length];
int offset = 0;
int numRead = 0;
while (offset < bytes.length
&& (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
offset += numRead;
}
if (offset < bytes.length) {
throw new IOException(”Could not completely read file “+file.getName());
}
is.close();
return bytes;
}
public static void main (String args[]) throws Exception
{
int iterations = 2000;
XMLInputFactory factory = XMLInputFactory.newInstance();
byte[] content = Javolution.getBytesFromFile(new File(args[0]));
ByteArrayInputStream bais = new ByteArrayInputStream(content);
for (int i = 0; i < 10; i++) {
long start = System.currentTimeMillis();
for (int j = 0; j < iterations; j++) {
XMLStreamReader xr = factory.createXMLStreamReader(bais);
while (xr.hasNext()) {
xr.next();
}
xr.close();
bais.reset();
}
long stop = System.currentTimeMillis();
double timer = (stop - start) / 1000.0;
double total = (content.length * iterations) / (timer * (1024 * 1024));
System.out.print(total);
System.out.println(” MB/s”);
}
}
}
javac -classpath javolution.jar Javolution.java
echo “hamlet.xml”
java -cp javolution.jar:. Javolution hamlet.xml
echo “soap_mid.xml”
java -cp javolution.jar:. Javolution soap_mid.xml
stonecobra@jeff-home:~/xmlbench$ ./all
javolution
hamlet.xml
50.6551508686574 MB/s
51.165395577138696 MB/s
51.19486307315164 MB/s
51.19486307315164 MB/s
51.18503680384777 MB/s
51.23420590740574 MB/s
51.229284746527114 MB/s
51.23420590740574 MB/s
51.23420590740574 MB/s
51.229284746527114 MB/s
soap_mid.xml
44.98275478234452 MB/s
45.975555578724986 MB/s
46.000317996451415 MB/s
46.00857806432652 MB/s
45.99206089395699 MB/s
46.066481704465005 MB/s
46.08305238133712 MB/s
46.08305238133712 MB/s
46.09134219108371 MB/s
46.066481704465005 MB/s
Average for hamlet.xml: 51.16 MB/sec
Average for soap_mid.xml: 45.93 MB/sec
Most of the Java camp is starting to look the same.
Popularity: 8%
Thanks to Paul Findlay for submitting 3 new Java benchmarks, the first of which is for Woodstox. The file is Woodstox.java, listed here:
// requires jar files from http://woodstox.codehaus.org/Download#Download-Stable(3.2.4)
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
import org.codehaus.stax2.XMLInputFactory2;
import java.io.*;
public class Woodstox
{
public static byte[] getBytesFromFile(File file) throws IOException {
InputStream is = new FileInputStream(file);
long length = file.length();
byte[] bytes = new byte[(int)length];
int offset = 0;
int numRead = 0;
while (offset < bytes.length
&& (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
offset += numRead;
}
if (offset < bytes.length) {
throw new IOException(”Could not completely read file “+file.getName());
}
is.close();
return bytes;
}
public static void main (String args[]) throws Exception
{
int iterations = 2000;
XMLInputFactory2 xmlif = (XMLInputFactory2) XMLInputFactory2.newInstance();
xmlif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.FALSE);
xmlif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE);
xmlif.configureForSpeed();
byte[] content = Woodstox.getBytesFromFile(new File(args[0]));
ByteArrayInputStream bais = new ByteArrayInputStream(content);
for (int i = 0; i < 10; i++) {
long start = System.currentTimeMillis();
for (int j = 0; j < iterations; j++) {
XMLStreamReader xr = xmlif.createXMLStreamReader(bais);
while (xr.hasNext()) {
xr.next();
}
xr.close();
bais.reset();
}
long stop = System.currentTimeMillis();
double timer = (stop - start) / 1000.0;
double total = (content.length * iterations) / (timer * (1024 * 1024));
System.out.print(total);
System.out.println(” MB/s”);
}
}
}
I built it and ran it with the following commands:
echo “Woodstox”
javac -classpath wstx-asl-3.2.4.jar:stax2-2.1.jar Woodstox.java
echo “hamlet.xml”
java -cp wstx-asl-3.2.4.jar:stax2-2.1.jar:. Woodstox hamlet.xml
echo “soap_mid.xml”
java -cp wstx-asl-3.2.4.jar:stax2-2.1.jar:. Woodstox soap_mid.xml
And the results:
stonecobra@jeff-home:~/xmlbench$ ./all
Woodstox
hamlet.xml
77.77020756723444 MB/s
79.63985120144747 MB/s
79.4618717961999 MB/s
79.77087698116867 MB/s
79.75894773382589 MB/s
80.0822948192333 MB/s
79.91430678694842 MB/s
80.26306749376882 MB/s
80.49322117357285 MB/s
80.49322117357285 MB/s
soap_mid.xml
47.38704850013582 MB/s
49.05643715110748 MB/s
49.38738844260493 MB/s
49.492325910804404 MB/s
49.77112883454436 MB/s
49.916573395720704 MB/s
50.121629741829885 MB/s
49.86799751658902 MB/s
50.13143636083631 MB/s
50.1608792561148 MB/s
Average for hamlet.xml: 79.76 MB/sec
Average for soap_mid.xml: 49.53 MB/sec
Popularity: 8%