Skip to content

Commit

Permalink
Make it easier to write message color diff test
Browse files Browse the repository at this point in the history
  • Loading branch information
zeotuan committed Oct 20, 2024
1 parent e28726c commit 8b10fab
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ package com.github.mrpowers.spark.fast.tests
import org.apache.spark.sql.types.{DoubleType, IntegerType, MetadataBuilder, LongType, StringType}
import SparkSessionExt._
import com.github.mrpowers.spark.fast.tests.SchemaComparer.DatasetSchemaMismatch
import com.github.mrpowers.spark.fast.tests.StringExt.StringOps
import org.apache.spark.sql.functions.col
import com.github.mrpowers.spark.fast.tests.TestUtilsExt.ExceptionOps
import org.scalatest.freespec.AnyFreeSpec

class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with SparkSessionTestWrapper {
Expand Down Expand Up @@ -72,11 +72,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
assertSmallDataFrameEquality(expectedDF, sourceDF)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("uk", "[steve,10,aus]")))
assert(actualColourGroup.contains(Seq("france", "[mark,11,usa]")))
e.assertColorDiff(Seq("france", "[mark,11,usa]"), Seq("uk", "[steve,10,aus]"))
}

"works well for wide DataFrames" in {
Expand Down Expand Up @@ -361,11 +357,10 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
assertSmallDataFrameEquality(sourceDF, expectedDF)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("word", "StringType", "StructField(long,LongType,true,{})")))
assert(actualColourGroup.contains(Seq("float", "DoubleType", "MISSING")))
e.assertColorDiff(
Seq("float", "DoubleType", "MISSING"),
Seq("word", "StringType", "StructField(long,LongType,true,{})")
)
}

"can performed Dataset comparisons and ignore metadata" in {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package com.github.mrpowers.spark.fast.tests
import org.apache.spark.sql.types._
import SparkSessionExt._
import com.github.mrpowers.spark.fast.tests.SchemaComparer.DatasetSchemaMismatch
import com.github.mrpowers.spark.fast.tests.StringExt.StringOps
import com.github.mrpowers.spark.fast.tests.TestUtilsExt.ExceptionOps
import org.apache.spark.sql.functions.col
import org.scalatest.freespec.AnyFreeSpec

Expand Down Expand Up @@ -60,11 +60,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
assertSmallDatasetEquality(sourceDS, expectedDS)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("Person(frank,10)", "lucy")))
assert(actualColourGroup.contains(Seq("Person(bob,1)", "alice")))
e.assertColorDiff(Seq("Person(bob,1)", "alice"), Seq("Person(frank,10)", "lucy"))
}

"correctly mark unequal element for Dataset[String]" in {
Expand All @@ -77,11 +73,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
assertSmallDatasetEquality(sourceDS, expectedDS)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("String(StructField(long,LongType2,true,{}))")))
assert(actualColourGroup.contains(Seq("String(StructField(long,LongType,true,{}))")))
e.assertColorDiff(Seq("String(StructField(long,LongType,true,{}))"), Seq("String(StructField(long,LongType2,true,{}))"))
}

"correctly mark unequal element for Dataset[Seq[String]]" in {
Expand All @@ -103,11 +95,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
assertSmallDatasetEquality(sourceDS, expectedDS)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("banana2", "MISSING")))
assert(actualColourGroup.contains(Seq("banana", "cherry")))
e.assertColorDiff(Seq("banana", "cherry"), Seq("banana2", "MISSING"))
}

"correctly mark unequal element for Dataset[Array[String]]" in {
Expand All @@ -129,11 +117,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
assertSmallDatasetEquality(sourceDS, expectedDS)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("banana2", "MISSING")))
assert(actualColourGroup.contains(Seq("banana", "cherry")))
e.assertColorDiff(Seq("banana", "cherry"), Seq("banana2", "MISSING"))
}

"correctly mark unequal element for Dataset[Map[String, String]]" in {
Expand All @@ -153,11 +137,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
assertSmallDatasetEquality(sourceDS, expectedDS)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("(apple,banana1)")))
assert(actualColourGroup.contains(Seq("(apple,banana)")))
e.assertColorDiff(Seq("(apple,banana)"), Seq("(apple,banana1)"))
}

"works with really long columns" in {
Expand Down Expand Up @@ -588,11 +568,29 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
assertLargeDatasetEquality(sourceDF, expectedDF)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("word", "StringType", "StructField(long,LongType,true,{})")))
assert(actualColourGroup.contains(Seq("float", "DoubleType", "MISSING")))
e.assertColorDiff(Seq("float", "DoubleType", "MISSING"), Seq("word", "StringType", "StructField(long,LongType,true,{})"))
}

"can performed Dataset comparisons and ignore metadata" in {
val ds1 = Seq(
Person("juan", 5),
Person("bob", 1),
Person("li", 49),
Person("alice", 5)
).toDS
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build()))
.as[Person]

val ds2 = Seq(
Person("juan", 5),
Person("bob", 1),
Person("li", 49),
Person("alice", 5)
).toDS
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build()))
.as[Person]

assertLargeDatasetEquality(ds2, ds1)
}

"can performed Dataset comparisons and ignore metadata" in {
Expand Down Expand Up @@ -852,11 +850,29 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes
assertSmallDatasetEquality(sourceDF, expectedDF)
}

val colourGroup = e.getMessage.extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(Seq("word", "StringType", "StructField(long,LongType,true,{})")))
assert(actualColourGroup.contains(Seq("float", "DoubleType", "MISSING")))
e.assertColorDiff(Seq("float", "DoubleType", "MISSING"), Seq("word", "StringType", "StructField(long,LongType,true,{})"))
}

"can performed Dataset comparisons and ignore metadata" in {
val ds1 = Seq(
Person("juan", 5),
Person("bob", 1),
Person("li", 49),
Person("alice", 5)
).toDS
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build()))
.as[Person]

val ds2 = Seq(
Person("juan", 5),
Person("bob", 1),
Person("li", 49),
Person("alice", 5)
).toDS
.withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build()))
.as[Person]

assertSmallDatasetEquality(ds2, ds1)
}

"can performed Dataset comparisons and ignore metadata" in {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package com.github.mrpowers.spark.fast.tests

import scala.util.matching.Regex

object TestUtilsExt {
val coloredStringPattern: Regex = "(\u001B\\[\\d{1,2}m)([\\s\\S]*?)(?=\u001B\\[\\d{1,2}m)".r
implicit class StringOps(s: String) {
def extractColorGroup: Map[String, Seq[String]] = coloredStringPattern
.findAllMatchIn(s)
.map(m => (m.group(1), m.group(2)))
.toSeq
.groupBy(_._1)
.mapValues(_.map(_._2))
.view
.toMap

def assertColorDiff(actual: Seq[String], expected: Seq[String]): Unit = {
val colourGroup = extractColorGroup
val expectedColourGroup = colourGroup.get(Console.GREEN)
val actualColourGroup = colourGroup.get(Console.RED)
assert(expectedColourGroup.contains(expected))
assert(actualColourGroup.contains(actual))
}
}

implicit class ExceptionOps(e: Exception) {
def assertColorDiff(actual: Seq[String], expected: Seq[String]): Unit = e.getMessage.assertColorDiff(actual, expected)
}
}

0 comments on commit 8b10fab

Please sign in to comment.