-
-
Notifications
You must be signed in to change notification settings - Fork 152
Closed
Description
The new CSV parser feature SKIP_EMPTY_LINES seems to be buggy in last versions 2.10.1 and 2.10.2.
At a certain point, it seems to skip some characters and resume reading in the middle of another line !
So, when I use it to read a file containing String and int columns, it fails with an InvalidFormatException :
com.fasterxml.jackson.databind.RuntimeJsonMappingException: Cannot deserialize value of type `int` from String "j3"": not a valid Integer value
at [Source: (InputStreamReader); line: 2001, column: 4001] (through reference chain: CsvFileReaderTest$Row["timestamp"])
at com.fasterxml.jackson.databind.MappingIterator._handleMappingException(MappingIterator.java:413)
at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:201)
at CsvFileReader.read(CsvFileReader.java:33)
at CsvFileReaderTest.test(CsvFileReaderTest.java:21)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:33)
at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:230)
at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:58)
Caused by: com.fasterxml.jackson.databind.exc.InvalidFormatException: Cannot deserialize value of type `int` from String "j3"": not a valid Integer value
at [Source: (InputStreamReader); line: 2001, column: 4001] (through reference chain: CsvFileReaderTest$Row["timestamp"])
at com.fasterxml.jackson.databind.exc.InvalidFormatException.from(InvalidFormatException.java:67)
at com.fasterxml.jackson.databind.DeserializationContext.weirdStringException(DeserializationContext.java:1676)
at com.fasterxml.jackson.databind.DeserializationContext.handleWeirdStringValue(DeserializationContext.java:932)
at com.fasterxml.jackson.databind.deser.std.NumberDeserializers$IntegerDeserializer._parseInteger(NumberDeserializers.java:522)
at com.fasterxml.jackson.databind.deser.std.NumberDeserializers$IntegerDeserializer.deserialize(NumberDeserializers.java:474)
at com.fasterxml.jackson.databind.deser.std.NumberDeserializers$IntegerDeserializer.deserialize(NumberDeserializers.java:452)
at com.fasterxml.jackson.databind.deser.impl.MethodProperty.deserializeAndSet(MethodProperty.java:129)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.vanillaDeserialize(BeanDeserializer.java:288)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserialize(BeanDeserializer.java:151)
at com.fasterxml.jackson.databind.MappingIterator.nextValue(MappingIterator.java:280)
at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:199)
... 24 more
Or, combining this feature with the FAIL_ON_MISSING_COLUMNS to read a file containing multiple String columns, it fails with a CsvMappingException :
com.fasterxml.jackson.databind.RuntimeJsonMappingException: Not enough column values: expected 2, found 1
at [Source: (InputStreamReader); line: 2001, column: 4001]
at com.fasterxml.jackson.databind.MappingIterator._handleMappingException(MappingIterator.java:413)
at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:201)
at CsvFileReader.read(CsvFileReader.java:34)
at CsvFileReaderTest.test(CsvFileReaderTest.java:21)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:33)
at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:230)
at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:58)
Caused by: com.fasterxml.jackson.dataformat.csv.CsvMappingException: Not enough column values: expected 2, found 1
at [Source: (InputStreamReader); line: 2001, column: 4001]
at com.fasterxml.jackson.dataformat.csv.CsvMappingException.from(CsvMappingException.java:28)
at com.fasterxml.jackson.dataformat.csv.CsvParser._reportCsvMappingError(CsvParser.java:1246)
at com.fasterxml.jackson.dataformat.csv.CsvParser._handleMissingColumns(CsvParser.java:1023)
at com.fasterxml.jackson.dataformat.csv.CsvParser._handleNextEntry(CsvParser.java:856)
at com.fasterxml.jackson.dataformat.csv.CsvParser.nextFieldName(CsvParser.java:665)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.vanillaDeserialize(BeanDeserializer.java:295)
at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserialize(BeanDeserializer.java:151)
at com.fasterxml.jackson.databind.MappingIterator.nextValue(MappingIterator.java:280)
at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:199)
... 24 more
Code to reproduce the issue :
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
public class CsvFileReader {
private final CsvMapper csvMapper = new CsvMapper()
.enable(CsvParser.Feature.SKIP_EMPTY_LINES);
public <T> void read(Path filePath, Class<T> type) throws IOException {
ObjectReader objectReader = csvMapper
.readerFor(type)
.with(csvMapper.schemaFor(type));
long lineCount = 0L;
try (InputStream inputStream = Files.newInputStream(filePath);
Reader reader = new InputStreamReader(inputStream)) {
MappingIterator<T> iterator = objectReader.readValues(reader);
while (iterator.hasNext()) {
++lineCount;
T data = iterator.next();
System.out.println(lineCount + " : " + data);
}
}
}
}import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.util.Random;
import org.junit.Test;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
public class CsvFileReaderTest {
@Test
public void test() throws IOException {
Path filePath = Files.createTempFile("test", ".csv");
generateCsvFile(filePath);
CsvFileReader reader = new CsvFileReader();
reader.read(filePath, Row.class);
}
private void generateCsvFile(Path filePath) throws IOException {
long lineCount = 0L;
try (BufferedWriter writer = Files.newBufferedWriter(filePath)) {
while (lineCount < 10_000L) {
writer.write("\"" + Instant.now().getEpochSecond() + "\",\"" + randomString() + "\"\n");
++lineCount;
}
}
}
private String randomString() {
return new Random()
.ints(48, 123) // [0-z]
.filter(i -> (i <= 57 || i >= 65) && (i <= 90 || i >= 97)) // special chars
.limit(10)
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
.toString();
}
@JsonPropertyOrder({ "timestamp", "random" })
public static class Row {
private int timestamp;
private String random;
public int getTimestamp() {
return timestamp;
}
public void setTimestamp(int timestamp) {
this.timestamp = timestamp;
}
public String getRandom() {
return random;
}
public void setRandom(String random) {
this.random = random;
}
@Override
public String toString() {
return "Row{timestamp=" + timestamp + ", random='" + random + "'}";
}
}
}