Skip to content

Commit

Permalink
Replace missing fields with empty string (#117)
Browse files Browse the repository at this point in the history
* add empty string option to InsufficientFieldsRowBehaviour enum

* handle empty string option inside csvfile reader

* add tests to handle insufficient rows with empty string option

* remove unneeded change

* change comment to better reflect the option

* remove unneeded change

* simplify adding empty strings to rows

* fix bad var name

* update readme
  • Loading branch information
popcornAC authored Feb 5, 2023
1 parent 8281219 commit 938452c
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ val tsvReader = csvReader {
| autoRenameDuplicateHeaders | `false` | Whether to auto rename duplicate headers or throw an exception. |
| ~~skipMissMatchedRow~~ | `false` | Deprecated. Replace with appropriate values in `excessFieldsRowBehaviour` and `insufficientFieldsRowBehaviour`, e.g. both set to `IGNORE`. ~~Whether to skip an invalid row. If `ignoreExcessCols` is true, only rows with less than the expected number of columns will be skipped.~~ |
| excessFieldsRowBehaviour | `ERROR` | Behaviour to use when a row has more fields (columns) than expected. `ERROR` (default), `IGNORE` (skip the row) or `TRIM` (remove the excess fields at the end of the row to match the expected number of fields). |
| insufficientFieldsRowBehaviour | `ERROR` | Behaviour to use when a row has fewer fields (columns) than expected. `ERROR` (default), `IGNORE` (skip the row). |
| insufficientFieldsRowBehaviour | `ERROR` | Behaviour to use when a row has fewer fields (columns) than expected. `ERROR` (default), `IGNORE` (skip the row) or `EMPTY_STRING` (replace missing fields with an empty string). |

### CSV Write examples

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ class CsvFileReader internal constructor(
} else if (numFieldsInRow != row.size) {
if (ctx.skipMissMatchedRow || ctx.insufficientFieldsRowBehaviour == InsufficientFieldsRowBehaviour.IGNORE) {
skipMismatchedRow(idx, row, numFieldsInRow)
} else if (ctx.insufficientFieldsRowBehaviour == InsufficientFieldsRowBehaviour.EMPTY_STRING) {
val numOfMissingFields = numFieldsInRow - row.size
row.plus(List(numOfMissingFields) { "" })
} else {
throw CSVFieldNumDifferentException(numFieldsInRow, row.size, idx + 1)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,11 @@ enum class InsufficientFieldsRowBehaviour {
/**
* Ignore the row and skip to the next row
*/
IGNORE
IGNORE,
/**
* Treat missing fields as an empty string
*/
EMPTY_STRING
}

enum class ExcessFieldsRowBehaviour {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ class CsvReaderTest : WordSpec({
"Trim row when reading csv with greater num of fields on a subsequent row" {
val expected = listOf(listOf("a", "b"), listOf("c", "d"))
val actual =
csvReader{
csvReader {
excessFieldsRowBehaviour = ExcessFieldsRowBehaviour.TRIM
}.readAll(readTestDataFile("different-fields-num2.csv"))

Expand All @@ -196,7 +196,7 @@ class CsvReaderTest : WordSpec({
"it should be be possible to skip rows with both excess and insufficient fields" {
val expected = listOf(listOf("a", "b"))
val actual =
csvReader{
csvReader {
excessFieldsRowBehaviour = ExcessFieldsRowBehaviour.IGNORE
insufficientFieldsRowBehaviour = InsufficientFieldsRowBehaviour.IGNORE
}.readAll(readTestDataFile("varying-column-lengths.csv"))
Expand All @@ -206,10 +206,36 @@ class CsvReaderTest : WordSpec({
actual.size shouldBe 1
}
}
"it should be be possible to replace insufficient fields with strings and skip rows with excess fields" {
val expected = listOf(listOf("a", "b"), listOf("c", ""))
val actual =
csvReader {
excessFieldsRowBehaviour = ExcessFieldsRowBehaviour.IGNORE
insufficientFieldsRowBehaviour = InsufficientFieldsRowBehaviour.EMPTY_STRING
}.readAll(readTestDataFile("varying-column-lengths.csv"))

assertSoftly {
actual shouldBe expected
actual.size shouldBe 2
}
}
"it should be be possible to replace insufficient fields with strings and trim rows with excess fields" {
val expected = listOf(listOf("a", "b"), listOf("c", ""), listOf("d", "e"))
val actual =
csvReader {
excessFieldsRowBehaviour = ExcessFieldsRowBehaviour.TRIM
insufficientFieldsRowBehaviour = InsufficientFieldsRowBehaviour.EMPTY_STRING
}.readAll(readTestDataFile("varying-column-lengths.csv"))

assertSoftly {
actual shouldBe expected
actual.size shouldBe 3
}
}
"it should be be possible to trim excess columns and skip insufficient row columns" {
val expected = listOf(listOf("a", "b"), listOf("d","e"))
val expected = listOf(listOf("a", "b"), listOf("d", "e"))
val actual =
csvReader{
csvReader {
excessFieldsRowBehaviour = ExcessFieldsRowBehaviour.TRIM
insufficientFieldsRowBehaviour = InsufficientFieldsRowBehaviour.IGNORE
}.readAll(readTestDataFile("varying-column-lengths.csv"))
Expand Down

0 comments on commit 938452c

Please sign in to comment.