From dd35f315532f0c494b96a92dc06420425d7ec60e Mon Sep 17 00:00:00 2001 From: Tuan Pham Date: Sat, 7 Sep 2024 17:13:00 +1000 Subject: [PATCH] Add string helper to validate coloured column --- .../fast/tests/DatasetComparerTest.scala | 27 +++++++++++++++++-- .../mrpowers/spark/fast/tests/StringExt.scala | 15 +++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 src/test/scala/com/github/mrpowers/spark/fast/tests/StringExt.scala diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala index f5bbc69..3832f2d 100644 --- a/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala @@ -3,6 +3,7 @@ package com.github.mrpowers.spark.fast.tests import org.apache.spark.sql.types._ import SparkSessionExt._ import com.github.mrpowers.spark.fast.tests.SchemaComparer.DatasetSchemaMismatch +import com.github.mrpowers.spark.fast.tests.StringExt.StringOps import org.scalatest.freespec.AnyFreeSpec object Person { @@ -20,7 +21,26 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes import spark.implicits._ "provides a good README example" in { + val sourceDS = Seq( + Person("juan", 5), + Person("bob", 1), + Person("li", 49), + Person("alice", 5) + ).toDS + + val expectedDS = Seq( + Person("juan", 5), + Person("frank", 10), + Person("li", 49), + Person("lucy", 5) + ).toDS + + val e = intercept[DatasetContentMismatch] { + assertSmallDatasetEquality(sourceDS, expectedDS) + } + } + "Correctly mark unequal column" in { val sourceDS = Seq( Person("juan", 5), Person("bob", 1), @@ -39,10 +59,14 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes assertSmallDatasetEquality(sourceDS, expectedDS) } + val colourGroup = e.getMessage.extractColorGroup + val expectedColourGroup = colourGroup.get(Console.GREEN) + val actualColourGroup = colourGroup.get(Console.RED) + assert(expectedColourGroup.contains(Seq("[frank,10]", "lucy"))) + assert(actualColourGroup.contains(Seq("[bob,1]", "alice"))) } "works with really long columns" in { - val sourceDS = Seq( Person("juanisareallygoodguythatilikealotOK", 5), Person("bob", 1), @@ -60,7 +84,6 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes val e = intercept[DatasetContentMismatch] { assertSmallDatasetEquality(sourceDS, expectedDS) } - } "does nothing if the DataFrames have the same schemas and content" in { diff --git a/src/test/scala/com/github/mrpowers/spark/fast/tests/StringExt.scala b/src/test/scala/com/github/mrpowers/spark/fast/tests/StringExt.scala new file mode 100644 index 0000000..52dfdd9 --- /dev/null +++ b/src/test/scala/com/github/mrpowers/spark/fast/tests/StringExt.scala @@ -0,0 +1,15 @@ +package com.github.mrpowers.spark.fast.tests + +import scala.util.matching.Regex + +object StringExt { + val coloredStringPattern: Regex = "(\u001B\\[\\d{1,2}m)([\\s\\S]*?)(?=\u001B\\[\\d{1,2}m)".r + implicit class StringOps(s: String) { + def extractColorGroup: Map[String, Seq[String]] = coloredStringPattern + .findAllMatchIn(s) + .map(m => (m.group(1), m.group(2))) + .toSeq + .groupBy(_._1) + .mapValues(_.map(_._2)) + } +}