From a4d60d77ee1ceddb4e6e8a845f089fd03b6926ea Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Fri, 10 Apr 2026 17:22:33 -0700 Subject: [PATCH] fix spark-shell NPE without explicit -usejavacp --- bin/spark-shell | 7 ------- bin/spark-shell2.cmd | 11 ---------- .../scala/org/apache/spark/repl/Main.scala | 4 +++- .../org/apache/spark/repl/ReplSuite.scala | 21 +++++++++++++++++++ 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/bin/spark-shell b/bin/spark-shell index 8411158187260..5dc460ae903e8 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -37,13 +37,6 @@ export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options] Scala REPL options, Spark Classic only: -I preload , enforcing line-by-line interpretation" -# SPARK-4161: scala does not assume use of the java classpath, -# so we need to add the "-Dscala.usejavacp=true" flag manually. We -# do this specifically for the Spark shell because the scala REPL -# has its own class loader, and any additional classpath specified -# through spark.driver.extraClassPath is not automatically propagated. -SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dscala.usejavacp=true" - function main() { export SPARK_SCALA_SHELL=1 # In case of Spark Connect shell, the main class (and resource) is replaced in diff --git a/bin/spark-shell2.cmd b/bin/spark-shell2.cmd index 549bf43bb6078..9a845aa0c9a38 100644 --- a/bin/spark-shell2.cmd +++ b/bin/spark-shell2.cmd @@ -28,16 +28,5 @@ set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]^%LF%%LF%^%LF%%LF%^ Scala REPL options:^%LF%%LF%^ -I ^ preload ^, enforcing line-by-line interpretation -rem SPARK-4161: scala does not assume use of the java classpath, -rem so we need to add the "-Dscala.usejavacp=true" flag manually. We -rem do this specifically for the Spark shell because the scala REPL -rem has its own class loader, and any additional classpath specified -rem through spark.driver.extraClassPath is not automatically propagated. -if "x%SPARK_SUBMIT_OPTS%"=="x" ( - set SPARK_SUBMIT_OPTS=-Dscala.usejavacp=true - goto run_shell -) -set SPARK_SUBMIT_OPTS="%SPARK_SUBMIT_OPTS% -Dscala.usejavacp=true" - :run_shell "%SPARK_HOME%\bin\spark-submit2.cmd" --class org.apache.spark.repl.Main --name "Spark shell" %* diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/src/main/scala/org/apache/spark/repl/Main.scala index 4ed3bd51d7521..f880a7b9814a9 100644 --- a/repl/src/main/scala/org/apache/spark/repl/Main.scala +++ b/repl/src/main/scala/org/apache/spark/repl/Main.scala @@ -65,13 +65,15 @@ object Main extends Logging { // Visible for testing private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = { interp = _interp - val jars = Utils + val userJars = Utils .getLocalUserJarsForShell(conf) // Remove file:///, file:// or file:/ scheme if exists for each jar .map { x => if (x.startsWith("file:")) new File(new URI(x)).getPath else x } .mkString(File.pathSeparator) + val jvmClasspath = sys.props.getOrElse("java.class.path", "") + val jars = Seq(userJars, jvmClasspath).filter(_.nonEmpty).mkString(File.pathSeparator) val interpArguments = List( "-Yrepl-class-based", "-Yrepl-outdir", diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 4471f93840e22..234b51129497c 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -72,6 +72,27 @@ class ReplSuite extends SparkFunSuite { def runInterpreterInPasteMode(master: String, input: String): String = runInterpreter(master, ":paste\n" + input + 4.toChar) // 4 is the ascii code of CTRL + D + test("SPARK-56447: spark-shell REPL initializes without explicit -classpath argument") { + // Regression test for SPARK-56447: doMain must include java.class.path in the REPL + // classpath even when the caller does not pass -classpath explicitly. Before the fix, + // the Scala compiler mirror failed to find `object scala` because the JVM classpath + // was not propagated to the REPL settings. + // spark.repl.local.jars is not set, so userJars is intentionally empty; + // the classpath must be derived from java.class.path alone. + Main.sparkContext = null + Main.sparkSession = null + Main.conf.set("spark.master", "local") + + val in = new BufferedReader(new StringReader("spark.version\n")) + val out = new StringWriter() + Main.doMain(Array.empty, new SparkILoop(in, new PrintWriter(out))) + + val output = out.toString + assertDoesNotContain("object scala in compiler mirror not found", output) + assertDoesNotContain("Failed to initialize compiler", output) + assertContains("res0: String =", output) + } + def assertContains(message: String, output: String): Unit = { val isContain = output.contains(message) assert(isContain,