Skip to content

Commit

Permalink
Add string helper to validate coloured column
Browse files Browse the repository at this point in the history
  • Loading branch information
zeotuan committed Sep 7, 2024
1 parent bf1d866 commit dd35f31
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.github.mrpowers.spark.fast.tests
import org.apache.spark.sql.types._
import SparkSessionExt._
import com.github.mrpowers.spark.fast.tests.SchemaComparer.DatasetSchemaMismatch
import com.github.mrpowers.spark.fast.tests.StringExt.StringOps
import org.scalatest.freespec.AnyFreeSpec

object Person {
Expand All @@ -20,7 +21,26 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
import spark.implicits._

"provides a good README example" in {
val sourceDS = Seq(
Person("juan", 5),
Person("bob", 1),
Person("li", 49),
Person("alice", 5)
).toDS

val expectedDS = Seq(
Person("juan", 5),
Person("frank", 10),
Person("li", 49),
Person("lucy", 5)
).toDS

val e = intercept[DatasetContentMismatch] {
assertSmallDatasetEquality(sourceDS, expectedDS)
}
}

"Correctly mark unequal column" in {
val sourceDS = Seq(
Person("juan", 5),
Person("bob", 1),
Expand All @@ -39,10 +59,14 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
assertSmallDatasetEquality(sourceDS, expectedDS)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("[frank,10]", "lucy")))
assert(actualColourGroup.contains(Seq("[bob,1]", "alice")))
}

"works with really long columns" in {

val sourceDS = Seq(
Person("juanisareallygoodguythatilikealotOK", 5),
Person("bob", 1),
Expand All @@ -60,7 +84,6 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
val e = intercept[DatasetContentMismatch] {
assertSmallDatasetEquality(sourceDS, expectedDS)
}

}

"does nothing if the DataFrames have the same schemas and content" in {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.github.mrpowers.spark.fast.tests

import scala.util.matching.Regex

object StringExt {
val coloredStringPattern: Regex = "(\u001B\\[\\d{1,2}m)([\\s\\S]*?)(?=\u001B\\[\\d{1,2}m)".r
implicit class StringOps(s: String) {
def extractColorGroup: Map[String, Seq[String]] = coloredStringPattern
.findAllMatchIn(s)
.map(m => (m.group(1), m.group(2)))
.toSeq
.groupBy(_._1)
.mapValues(_.map(_._2))
}
}

0 comments on commit dd35f31

Please sign in to comment.