Skip to content

CsvParser.Feature.SKIP_EMPTY_LINES results in a mapping error #174

@ybonillo

Description

@ybonillo

The new CSV parser feature SKIP_EMPTY_LINES seems to be buggy in last versions 2.10.1 and 2.10.2.
At a certain point, it seems to skip some characters and resume reading in the middle of another line !

So, when I use it to read a file containing String and int columns, it fails with an InvalidFormatException :

com.fasterxml.jackson.databind.RuntimeJsonMappingException: Cannot deserialize value of type `int` from String "j3"": not a valid Integer value
 at [Source: (InputStreamReader); line: 2001, column: 4001] (through reference chain: CsvFileReaderTest$Row["timestamp"])

	at com.fasterxml.jackson.databind.MappingIterator._handleMappingException(MappingIterator.java:413)
	at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:201)
	at CsvFileReader.read(CsvFileReader.java:33)
	at CsvFileReaderTest.test(CsvFileReaderTest.java:21)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:497)
	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
	at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
	at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
	at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:33)
	at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:230)
	at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:58)
Caused by: com.fasterxml.jackson.databind.exc.InvalidFormatException: Cannot deserialize value of type `int` from String "j3"": not a valid Integer value
 at [Source: (InputStreamReader); line: 2001, column: 4001] (through reference chain: CsvFileReaderTest$Row["timestamp"])
	at com.fasterxml.jackson.databind.exc.InvalidFormatException.from(InvalidFormatException.java:67)
	at com.fasterxml.jackson.databind.DeserializationContext.weirdStringException(DeserializationContext.java:1676)
	at com.fasterxml.jackson.databind.DeserializationContext.handleWeirdStringValue(DeserializationContext.java:932)
	at com.fasterxml.jackson.databind.deser.std.NumberDeserializers$IntegerDeserializer._parseInteger(NumberDeserializers.java:522)
	at com.fasterxml.jackson.databind.deser.std.NumberDeserializers$IntegerDeserializer.deserialize(NumberDeserializers.java:474)
	at com.fasterxml.jackson.databind.deser.std.NumberDeserializers$IntegerDeserializer.deserialize(NumberDeserializers.java:452)
	at com.fasterxml.jackson.databind.deser.impl.MethodProperty.deserializeAndSet(MethodProperty.java:129)
	at com.fasterxml.jackson.databind.deser.BeanDeserializer.vanillaDeserialize(BeanDeserializer.java:288)
	at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserialize(BeanDeserializer.java:151)
	at com.fasterxml.jackson.databind.MappingIterator.nextValue(MappingIterator.java:280)
	at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:199)
	... 24 more

Or, combining this feature with the FAIL_ON_MISSING_COLUMNS to read a file containing multiple String columns, it fails with a CsvMappingException :

com.fasterxml.jackson.databind.RuntimeJsonMappingException: Not enough column values: expected 2, found 1
 at [Source: (InputStreamReader); line: 2001, column: 4001]

	at com.fasterxml.jackson.databind.MappingIterator._handleMappingException(MappingIterator.java:413)
	at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:201)
	at CsvFileReader.read(CsvFileReader.java:34)
	at CsvFileReaderTest.test(CsvFileReaderTest.java:21)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:497)
	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
	at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
	at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
	at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:33)
	at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:230)
	at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:58)
Caused by: com.fasterxml.jackson.dataformat.csv.CsvMappingException: Not enough column values: expected 2, found 1
 at [Source: (InputStreamReader); line: 2001, column: 4001]
	at com.fasterxml.jackson.dataformat.csv.CsvMappingException.from(CsvMappingException.java:28)
	at com.fasterxml.jackson.dataformat.csv.CsvParser._reportCsvMappingError(CsvParser.java:1246)
	at com.fasterxml.jackson.dataformat.csv.CsvParser._handleMissingColumns(CsvParser.java:1023)
	at com.fasterxml.jackson.dataformat.csv.CsvParser._handleNextEntry(CsvParser.java:856)
	at com.fasterxml.jackson.dataformat.csv.CsvParser.nextFieldName(CsvParser.java:665)
	at com.fasterxml.jackson.databind.deser.BeanDeserializer.vanillaDeserialize(BeanDeserializer.java:295)
	at com.fasterxml.jackson.databind.deser.BeanDeserializer.deserialize(BeanDeserializer.java:151)
	at com.fasterxml.jackson.databind.MappingIterator.nextValue(MappingIterator.java:280)
	at com.fasterxml.jackson.databind.MappingIterator.next(MappingIterator.java:199)
	... 24 more

Code to reproduce the issue :

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;

import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.ObjectReader;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;

public class CsvFileReader {

    private final CsvMapper csvMapper = new CsvMapper()
            .enable(CsvParser.Feature.SKIP_EMPTY_LINES);

    public <T> void read(Path filePath, Class<T> type) throws IOException {
        ObjectReader objectReader = csvMapper
                .readerFor(type)
                .with(csvMapper.schemaFor(type));
        long lineCount = 0L;
        try (InputStream inputStream = Files.newInputStream(filePath);
                Reader reader = new InputStreamReader(inputStream)) {
            MappingIterator<T> iterator = objectReader.readValues(reader);
            while (iterator.hasNext()) {
                ++lineCount;
                T data = iterator.next();
                System.out.println(lineCount + " : " + data);
            }
        }
    }
}
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.util.Random;

import org.junit.Test;

import com.fasterxml.jackson.annotation.JsonPropertyOrder;

public class CsvFileReaderTest {

    @Test
    public void test() throws IOException {
        Path filePath = Files.createTempFile("test", ".csv");
        generateCsvFile(filePath);
        CsvFileReader reader = new CsvFileReader();
        reader.read(filePath, Row.class);
    }

    private void generateCsvFile(Path filePath) throws IOException {
        long lineCount = 0L;
        try (BufferedWriter writer = Files.newBufferedWriter(filePath)) {
            while (lineCount < 10_000L) {
                writer.write("\"" + Instant.now().getEpochSecond() + "\",\"" + randomString() + "\"\n");
                ++lineCount;
            }
        }
    }

    private String randomString() {
        return new Random()
                .ints(48, 123) // [0-z]
                .filter(i -> (i <= 57 || i >= 65) && (i <= 90 || i >= 97)) // special chars
                .limit(10)
                .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
                .toString();
    }

    @JsonPropertyOrder({ "timestamp", "random" })
    public static class Row {

        private int timestamp;
        private String random;

        public int getTimestamp() {
            return timestamp;
        }

        public void setTimestamp(int timestamp) {
            this.timestamp = timestamp;
        }

        public String getRandom() {
            return random;
        }

        public void setRandom(String random) {
            this.random = random;
        }

        @Override
        public String toString() {
            return "Row{timestamp=" + timestamp + ", random='" + random + "'}";
        }
    }
}

#15 #153

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions