From 27f15f6c7ef95bb32b869cce7d8e9a2dde62683b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mat=C4=9Bj=C4=8Dek?= Date: Wed, 25 Dec 2024 18:58:50 +0100 Subject: [PATCH 1/7] Postpone the start phase of restart as much as possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The start succeeded too early and on fast machines collided with shutdown. - Shutdown Hook is really the last thing in the JVM capable of doing it. - All shutdown hooks have names now Signed-off-by: David Matějček --- .../ejb/embedded/EJBContainerImpl.java | 4 +- .../org/apache/catalina/startup/Catalina.java | 5 + .../enterprise/admin/launcher/GFLauncher.java | 2 +- .../enterprise/v3/admin/RestartServer.java | 12 +- .../enterprise/v3/admin/StartServerHook.java | 166 ++++++++++++++++++ 5 files changed, 179 insertions(+), 10 deletions(-) create mode 100644 nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java diff --git a/appserver/ejb/ejb-container/src/main/java/org/glassfish/ejb/embedded/EJBContainerImpl.java b/appserver/ejb/ejb-container/src/main/java/org/glassfish/ejb/embedded/EJBContainerImpl.java index c337c309ba9..b1d10066f05 100644 --- a/appserver/ejb/ejb-container/src/main/java/org/glassfish/ejb/embedded/EJBContainerImpl.java +++ b/appserver/ejb/ejb-container/src/main/java/org/glassfish/ejb/embedded/EJBContainerImpl.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Contributors to the Eclipse Foundation + * Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation * Copyright (c) 2009, 2020 Oracle and/or its affiliates. All rights reserved. * * This program and the accompanying materials are made available under the @@ -272,7 +272,7 @@ private static class Cleanup implements Runnable { Cleanup(EJBContainerImpl container) { this.container = container; Runtime.getRuntime().addShutdownHook( - cleanupThread = new Thread(this, "EJBContainerImplCleanup")); + cleanupThread = new Thread(this, "GlassFish EJBContainerImpl Cleanup Shutdown Hook")); } void disable() { diff --git a/appserver/web/web-core/src/main/java/org/apache/catalina/startup/Catalina.java b/appserver/web/web-core/src/main/java/org/apache/catalina/startup/Catalina.java index d55c9ab2c02..22d17503f66 100644 --- a/appserver/web/web-core/src/main/java/org/apache/catalina/startup/Catalina.java +++ b/appserver/web/web-core/src/main/java/org/apache/catalina/startup/Catalina.java @@ -1,4 +1,5 @@ /* + * Copyright (c) 2024 Contributors to the Eclipse Foundation * Copyright (c) 1997-2018 Oracle and/or its affiliates. All rights reserved. * Copyright 2004 The Apache Software Foundation * @@ -656,6 +657,10 @@ protected void usage() { */ protected class CatalinaShutdownHook extends Thread { + CatalinaShutdownHook() { + super("GlassFish Catalina Shutdown Hook"); + } + public void run() { if (server != null) { diff --git a/nucleus/admin/launcher/src/main/java/com/sun/enterprise/admin/launcher/GFLauncher.java b/nucleus/admin/launcher/src/main/java/com/sun/enterprise/admin/launcher/GFLauncher.java index 906a5b5a365..d57a9da323c 100644 --- a/nucleus/admin/launcher/src/main/java/com/sun/enterprise/admin/launcher/GFLauncher.java +++ b/nucleus/admin/launcher/src/main/java/com/sun/enterprise/admin/launcher/GFLauncher.java @@ -870,7 +870,7 @@ private void setShutdownHook(final Process p) { final String msg = strings.get("serverStopped", callerParameters.getType()); processWhacker = new ProcessWhacker(p, msg); - runtime.addShutdownHook(new Thread(processWhacker)); + runtime.addShutdownHook(new Thread(processWhacker, "GlassFish Process Whacker Shutdown Hook")); } else { processWhacker.setProcess(p); } diff --git a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java index 1dbedaea7d2..27e53b71d32 100644 --- a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java +++ b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Contributors to the Eclipse Foundation + * Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation * Copyright (c) 2010, 2018 Oracle and/or its affiliates. All rights reserved. * * This program and the accompanying materials are made available under the @@ -20,7 +20,6 @@ import com.sun.enterprise.module.ModulesRegistry; import com.sun.enterprise.module.bootstrap.StartupContext; import com.sun.enterprise.universal.i18n.LocalStringsImpl; -import com.sun.enterprise.universal.process.JavaClassRunner; import com.sun.enterprise.util.StringUtils; import jakarta.inject.Inject; @@ -98,7 +97,7 @@ protected final void doExecute(AdminCommandContext context) { // show up in the ServiceLocator. GlassFish gfKernel = glassfishProvider.get(); while (gfKernel == null) { - Thread.yield(); + Thread.onSpinWait(); gfKernel = glassfishProvider.get(); } @@ -134,13 +133,13 @@ private void init(AdminCommandContext context) { private void reincarnate() throws Exception { if (setupReincarnationWithAsadmin() || setupReincarnationWithOther()) { - doReincarnation(); + scheduleReincarnation(); } else { throw new IllegalStateException(strings.get("restart.server.noStartupInfo", props)); } } - private void doReincarnation() throws RDCException { + private void scheduleReincarnation() throws RDCException { try { final String[] sysProps; if (Boolean.parseBoolean(System.getenv("AS_SUPER_DEBUG"))) { @@ -150,8 +149,7 @@ private void doReincarnation() throws RDCException { sysProps = normalProps; } - JavaClassRunner runner = new JavaClassRunner(classpath, sysProps, classname, args); - runner.run(); + Runtime.getRuntime().addShutdownHook(new StartServerShutdownHook(classpath, sysProps, classname, args)); } catch (Exception e) { throw new RDCException(e); } diff --git a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java new file mode 100644 index 00000000000..49a09994086 --- /dev/null +++ b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2024 Contributors to the Eclipse Foundation. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License v. 2.0, which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the + * Eclipse Public License v. 2.0 are satisfied: GNU General Public License, + * version 2 with the GNU Classpath Exception, which is available at + * https://www.gnu.org/software/classpath/license.html. + * + * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 + */ + +package com.sun.enterprise.v3.admin; + +import com.sun.enterprise.util.OS; + +import java.io.File; +import java.lang.ProcessBuilder.Redirect; +import java.lang.System.Logger; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.lang.System.Logger.Level.DEBUG; +import static java.lang.System.Logger.Level.ERROR; +import static java.lang.System.Logger.Level.INFO; +import static java.lang.System.Logger.Level.TRACE; + + +/** + * Starts the server after it is completely turned off. + *

+ * Shutdown hooks are asynchronously executed - we need to wait for the end + * all GlassFish shutdown hooks, at least of those we know. + *

+ * All shutdown hooks are executed in parallel including this thread. + * As we don't have any direct reference to other hooks, but we need to + * shut GlassFish down before we can start it, we postpone our part + * of the job as much as possible. + */ +class StartServerShutdownHook extends Thread { + + private static final Logger LOG = System.getLogger(StartServerShutdownHook.class.getName()); + private ProcessBuilder builder; + + StartServerShutdownHook(String classpath, String[] sysProps, String classname, String[] args) { + super("GlassFish Restart Shutdown Hook"); + if (classname == null || classname.isBlank()) { + throw new IllegalArgumentException("classname was null"); + } + this.builder = prepareStartup(classpath, sysProps, classname, args); + } + + + @Override + public void run() { + try { + blockUntilGlassFishIsOff(); + startGlassFishInstance(); + } catch (Exception e) { + LOG.log(ERROR, "Failed to execute shutdown hook for server start. Restart failed.", e); + } + } + + + private void startGlassFishInstance() { + LOG.log(INFO, "Starting process {0} in directory {1}", this.builder.command(), this.builder.directory()); + Process process; + try { + process = this.builder.start(); + } catch (Exception e) { + throw new IllegalStateException("GlassFish instance startup failed.", e); + } + boolean fail; + try { + fail = process.waitFor(2, TimeUnit.SECONDS); + } catch (InterruptedException e) { + throw new IllegalStateException("Waiting for GlassFish instance restart was interrupted.", e); + } + if (fail) { + throw new IllegalStateException( + "GlassFish instance startup failed - the process stopped with error code " + process.exitValue()); + } + // Final note: at this point we must not make any output to logging, because + // log files are now used by the new process. + } + + + private void blockUntilGlassFishIsOff() { + Thread.onSpinWait(); + Thread[] threads = new Thread[Thread.activeCount() + 10]; + Thread.enumerate(threads); + LOG.log(DEBUG, () -> "Found threads: " + Arrays.toString(threads)); + List shutdownThreads = Stream.of(threads).filter(Objects::nonNull).filter(t -> t != this) + .filter(t -> t.getName().startsWith("GlassFish") && t.getName().endsWith("Shutdown Hook")) + .collect(Collectors.toList()); + while(true) { + if (isGlassFishTurnedOff(shutdownThreads)) { + break; + } + Thread.onSpinWait(); + } + } + + + private boolean isGlassFishTurnedOff(List shutdown) { + for (Thread thread : shutdown) { + if (thread.isAlive()) { + LOG.log(TRACE, "Waiting for the death of " + thread); + return false; + } + } + return true; + } + + + private static ProcessBuilder prepareStartup(String classpath, String[] sysprops, String classname, String[] args) { + Path javaExecutable = detectJavaExecutable(); + final List cmdline = new LinkedList<>(); + if (!OS.isWindows()) { + cmdline.add("nohup"); + } + cmdline.add(javaExecutable.toString()); + cmdline.add("-cp"); + cmdline.add(classpath); + if (sysprops != null) { + for (String sysprop : sysprops) { + cmdline.add(sysprop); + } + } + cmdline.add(classname); + if (args != null) { + for (String arg : args) { + cmdline.add(arg); + } + } + + ProcessBuilder builder = new ProcessBuilder(cmdline); + final File workDir = new File(System.getProperty("user.dir")); + // We start and abandon the process. + builder.redirectError(Redirect.DISCARD).redirectOutput(Redirect.DISCARD).redirectInput(Redirect.DISCARD); + builder.directory(workDir); + return builder; + } + + + private static Path detectJavaExecutable() { + final String javaName; + if (OS.isWindows()) { + javaName = "java.exe"; + } else { + javaName = "java"; + } + final Path javaroot = Path.of(System.getProperty("java.home")); + return javaroot.resolve("bin").resolve(javaName); + } +} From 31c2661cdb04798062b9e27cfea9ac8431311f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mat=C4=9Bj=C4=8Dek?= Date: Sat, 28 Dec 2024 01:25:23 +0100 Subject: [PATCH 2/7] Resolved conflict of debug ports on Linux, added logging for extreme cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - when current (old) JVM had enabled debugging, the new one sometimes failed to start. It is not possible to wait from the inside. - Stop the kernell after adding the last shutdown hook; shutdown hooks run in parallel, but we have to ensure that ours will be executed after all other non-daemon hooks finish. - export AS_RESTART_LOGFILES=true to get "old" and "new" files in the server's log directory. It is trivial workaround, because the standard logging system might get into a conflict with the new GF instance too. - The "super debug" is not helpful as it affects timing Signed-off-by: David Matějček --- nucleus/core/kernel/pom.xml | 9 +- .../enterprise/v3/admin/RestartServer.java | 34 +--- .../enterprise/v3/admin/StartServerHook.java | 192 ++++++++++++++---- 3 files changed, 163 insertions(+), 72 deletions(-) diff --git a/nucleus/core/kernel/pom.xml b/nucleus/core/kernel/pom.xml index 91d7e5da2f8..cba7b7592de 100755 --- a/nucleus/core/kernel/pom.xml +++ b/nucleus/core/kernel/pom.xml @@ -31,7 +31,7 @@ glassfish-jar Kernel Classes - + false @@ -169,6 +169,11 @@ osgi-resource-locator provided + + org.glassfish.main + glassfish-jul-extension + provided + org.glassfish.main @@ -321,7 +326,7 @@ - + fastest diff --git a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java index 27e53b71d32..6214fe54d4f 100644 --- a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java +++ b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java @@ -56,13 +56,11 @@ public class RestartServer { private String[] args; private String serverName = ""; private static final LocalStringsImpl strings = new LocalStringsImpl(RestartServer.class); - private static final String magicProperty = "-DAS_RESTART=" + ProcessHandle.current().pid(); - private static final String[] normalProps = { magicProperty }; + private static final String AS_RESTART_PID = "-DAS_RESTART=" + ProcessHandle.current().pid(); + private static final String[] normalProps = { AS_RESTART_PID }; private static final int RESTART_NORMAL = 10; private static final int RESTART_DEBUG_ON = 11; private static final int RESTART_DEBUG_OFF = 12; - private static final String[] debuggerProps = { magicProperty, "-Xdebug", - "-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=1323" }; protected final void setDebug(Boolean b) { debug = b; @@ -100,12 +98,14 @@ protected final void doExecute(AdminCommandContext context) { Thread.onSpinWait(); gfKernel = glassfishProvider.get(); } - - // else we just return a special int from System.exit() - gfKernel.stop(); if (!verbose) { - reincarnate(); + if (!setupReincarnationWithAsadmin() && !setupReincarnationWithOther()) { + throw new IllegalStateException(strings.get("restart.server.noStartupInfo", props)); + } + scheduleReincarnation(); } + // else we just return a special int from System.exit() + gfKernel.stop(); } catch (Exception e) { throw new Error(strings.get("restart.server.failure"), e); } @@ -131,25 +131,9 @@ private void init(AdminCommandContext context) { logger.info(strings.get("restart.server.init")); } - private void reincarnate() throws Exception { - if (setupReincarnationWithAsadmin() || setupReincarnationWithOther()) { - scheduleReincarnation(); - } else { - throw new IllegalStateException(strings.get("restart.server.noStartupInfo", props)); - } - } - private void scheduleReincarnation() throws RDCException { try { - final String[] sysProps; - if (Boolean.parseBoolean(System.getenv("AS_SUPER_DEBUG"))) { - // very very difficult to debug this stuff otherwise! - sysProps = debuggerProps; - } else { - sysProps = normalProps; - } - - Runtime.getRuntime().addShutdownHook(new StartServerShutdownHook(classpath, sysProps, classname, args)); + Runtime.getRuntime().addShutdownHook(new StartServerShutdownHook(classpath, normalProps, classname, args)); } catch (Exception e) { throw new RDCException(e); } diff --git a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java index 49a09994086..43ae119d848 100644 --- a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java +++ b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java @@ -19,21 +19,26 @@ import com.sun.enterprise.util.OS; import java.io.File; +import java.io.IOException; +import java.io.PrintStream; import java.lang.ProcessBuilder.Redirect; import java.lang.System.Logger; import java.nio.file.Path; +import java.time.Instant; +import java.util.ArrayList; import java.util.Arrays; -import java.util.LinkedList; import java.util.List; import java.util.Objects; -import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; +import java.util.function.Supplier; +import java.util.logging.Handler; import java.util.stream.Collectors; import java.util.stream.Stream; -import static java.lang.System.Logger.Level.DEBUG; +import org.glassfish.main.jul.GlassFishLogManager; + import static java.lang.System.Logger.Level.ERROR; import static java.lang.System.Logger.Level.INFO; -import static java.lang.System.Logger.Level.TRACE; /** @@ -50,86 +55,147 @@ class StartServerShutdownHook extends Thread { private static final Logger LOG = System.getLogger(StartServerShutdownHook.class.getName()); - private ProcessBuilder builder; + private static final boolean LOG_RESTART = Boolean.parseBoolean(System.getenv("AS_RESTART_LOGFILES")); + private static final Path LOGDIR = new File(System.getProperty("com.sun.aas.instanceRoot"), "logs").toPath() + .toAbsolutePath(); + private static final Predicate FILTER_OTHER_HOOKS = t -> t.getName().startsWith("GlassFish") + && t.getName().endsWith("Shutdown Hook"); + + private final PrintStream logFile; + private final ProcessBuilder builder; + private final Instant startTime; StartServerShutdownHook(String classpath, String[] sysProps, String classname, String[] args) { super("GlassFish Restart Shutdown Hook"); + setDaemon(false); if (classname == null || classname.isBlank()) { throw new IllegalArgumentException("classname was null"); } - this.builder = prepareStartup(classpath, sysProps, classname, args); + this.startTime = Instant.now(); + this.logFile = getLogFileOld(startTime); + this.builder = prepareStartup(startTime, classpath, sysProps, classname, args); } @Override public void run() { try { - blockUntilGlassFishIsOff(); + // We do want to be the last thread alive. + Thread.onSpinWait(); + waitForThreads(FILTER_OTHER_HOOKS); + // We do want this in the server.log. + LOG.log(INFO, "Starting process {0} in directory {1}", this.builder.command(), this.builder.directory()); + stopLogging(); + // We must not make any output to logging, because + // log files are now used by the new process. + // The only exception is when startup failed. startGlassFishInstance(); } catch (Exception e) { LOG.log(ERROR, "Failed to execute shutdown hook for server start. Restart failed.", e); + log(e); + } finally { + if (logFile != null) { + logFile.close(); + } } } + private void stopLogging() { + GlassFishLogManager logManager = GlassFishLogManager.getLogManager(); + if (logManager == null) { + return; + } + logManager.getAllHandlers().forEach(Handler::close); + } + + private void startGlassFishInstance() { - LOG.log(INFO, "Starting process {0} in directory {1}", this.builder.command(), this.builder.directory()); - Process process; try { - process = this.builder.start(); + this.builder.start(); } catch (Exception e) { throw new IllegalStateException("GlassFish instance startup failed.", e); } - boolean fail; - try { - fail = process.waitFor(2, TimeUnit.SECONDS); - } catch (InterruptedException e) { - throw new IllegalStateException("Waiting for GlassFish instance restart was interrupted.", e); - } - if (fail) { - throw new IllegalStateException( - "GlassFish instance startup failed - the process stopped with error code " + process.exitValue()); - } - // Final note: at this point we must not make any output to logging, because - // log files are now used by the new process. } - private void blockUntilGlassFishIsOff() { - Thread.onSpinWait(); - Thread[] threads = new Thread[Thread.activeCount() + 10]; - Thread.enumerate(threads); - LOG.log(DEBUG, () -> "Found threads: " + Arrays.toString(threads)); - List shutdownThreads = Stream.of(threads).filter(Objects::nonNull).filter(t -> t != this) - .filter(t -> t.getName().startsWith("GlassFish") && t.getName().endsWith("Shutdown Hook")) - .collect(Collectors.toList()); + private void waitForThreads(Predicate filter) { + final Thread[] threads = getThreadsToDie(filter); + log(() -> "Waiting for shutdown of threads:\n" + toString(threads)); while(true) { - if (isGlassFishTurnedOff(shutdownThreads)) { + Thread thread = getFirstAlive(threads); + if (thread == null) { break; } - Thread.onSpinWait(); + try { + thread.join(); + log(() -> "Joined thread " + toString(thread)); + } catch (InterruptedException e) { + log(e); + continue; + } } } - private boolean isGlassFishTurnedOff(List shutdown) { + private Thread[] getThreadsToDie(Predicate filter) { + final Thread[] threads = new Thread[Thread.activeCount() + 10]; + Thread.enumerate(threads); + log(() -> "Found threads:\n" + toString(threads)); + return Stream.of(threads).filter(Objects::nonNull).filter(t -> t != this).filter(t -> !t.isDaemon()) + .filter(filter).toArray(Thread[]::new); + } + + + private Thread getFirstAlive(Thread[] shutdown) { for (Thread thread : shutdown) { if (thread.isAlive()) { - LOG.log(TRACE, "Waiting for the death of " + thread); - return false; + return thread; } } - return true; + return null; + } + + + private void log(Supplier message) { + if (logFile == null) { + return; + } + logFile.append(Instant.now().toString()).append(' ').append(message.get()).append('\n'); + logFile.flush(); } - private static ProcessBuilder prepareStartup(String classpath, String[] sysprops, String classname, String[] args) { - Path javaExecutable = detectJavaExecutable(); - final List cmdline = new LinkedList<>(); + private void log(Exception e) { + if (logFile == null) { + return; + } + logFile.append(Instant.now().toString()).append(' ').append(e.getMessage()).append('\n'); + e.printStackTrace(logFile); + logFile.append('\n').flush(); + } + + + private static PrintStream getLogFileOld(Instant startTime) { + if (!LOG_RESTART) { + return null; + } + try { + return new PrintStream(LOGDIR.resolve("restart-" + startTime + "-old.log").toFile()); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + + private static ProcessBuilder prepareStartup(Instant now, String classpath, String[] sysprops, String classname, + String[] args) { + final Path javaExecutable = detectJavaExecutable(); + final List cmdline = new ArrayList<>(); if (!OS.isWindows()) { cmdline.add("nohup"); } - cmdline.add(javaExecutable.toString()); + cmdline.add(javaExecutable.toFile().getAbsolutePath()); cmdline.add("-cp"); cmdline.add(classpath); if (sysprops != null) { @@ -144,11 +210,28 @@ private static ProcessBuilder prepareStartup(String classpath, String[] sysprops } } - ProcessBuilder builder = new ProcessBuilder(cmdline); - final File workDir = new File(System.getProperty("user.dir")); - // We start and abandon the process. - builder.redirectError(Redirect.DISCARD).redirectOutput(Redirect.DISCARD).redirectInput(Redirect.DISCARD); - builder.directory(workDir); + final List outerCommand; + if (OS.isWindows() || OS.isDarwin()) { + outerCommand = cmdline; + } else { + // To avoid conflict of the debug port used both by old and new JVM, + // we will force waiting for the end of the old JVM. + outerCommand = new ArrayList<>(); + outerCommand.add("bash"); + outerCommand.add("-c"); + outerCommand.add("tail --pid=" + ProcessHandle.current().pid() + " -f /dev/null && " + + cmdline.stream().collect(Collectors.joining(" "))); + } + + final ProcessBuilder builder = new ProcessBuilder(outerCommand); + builder.directory(new File(System.getProperty("user.dir"))); + if (LOG_RESTART) { + builder.redirectErrorStream(true); + builder.redirectOutput(LOGDIR.resolve("restart-" + now + "-new.log").toFile()); + } else { + builder.redirectError(Redirect.DISCARD); + builder.redirectOutput(Redirect.DISCARD); + } return builder; } @@ -163,4 +246,23 @@ private static Path detectJavaExecutable() { final Path javaroot = Path.of(System.getProperty("java.home")); return javaroot.resolve("bin").resolve(javaName); } + + + private static String toString(Thread[] threads) { + return Arrays.stream(threads).filter(Objects::nonNull).map(StartServerShutdownHook::toString) + .collect(Collectors.joining("\n")); + } + + + private static String toString(Thread thread) { + return new StringBuilder() + .append("Thread[") + .append("id=").append(thread.getId()) + .append(", name=").append(thread.getName()) + .append(", daemon=").append(thread.isDaemon()) + .append(", priority=").append(thread.getPriority()) + .append(", state=").append(thread.getState()) + .append(", classloader=").append(thread.getContextClassLoader()) + .append(']').toString(); + } } From e95d599b21c721f0f553d2a1a18306b97bdfbe52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mat=C4=9Bj=C4=8Dek?= Date: Sat, 28 Dec 2024 01:25:55 +0100 Subject: [PATCH 3/7] The rotationTimer should be a daemon thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: David Matějček --- .../org/glassfish/main/jul/handler/GlassFishLogHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nucleus/glassfish-jul-extension/src/main/java/org/glassfish/main/jul/handler/GlassFishLogHandler.java b/nucleus/glassfish-jul-extension/src/main/java/org/glassfish/main/jul/handler/GlassFishLogHandler.java index b948801fafd..70203322f65 100644 --- a/nucleus/glassfish-jul-extension/src/main/java/org/glassfish/main/jul/handler/GlassFishLogHandler.java +++ b/nucleus/glassfish-jul-extension/src/main/java/org/glassfish/main/jul/handler/GlassFishLogHandler.java @@ -96,7 +96,7 @@ public class GlassFishLogHandler extends Handler implements ExternallyManagedLog private GlassFishLogHandlerConfiguration configuration; - private final Timer rotationTimer = new Timer("log-rotation-timer-for-" + getClass().getSimpleName()); + private final Timer rotationTimer = new Timer("log-rotation-timer-for-" + getClass().getSimpleName(), true); private volatile GlassFishLogHandlerStatus status; private LoggingPump pump; From d9c27a5e04f6f9930555e6abb75e9380a0d101b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mat=C4=9Bj=C4=8Dek?= Date: Sat, 28 Dec 2024 01:26:44 +0100 Subject: [PATCH 4/7] Deleted JavaClassRunner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - its only usage was for the domain restart which was reimplemented Signed-off-by: David Matějček --- .../universal/process/JavaClassRunner.java | 103 ------------------ 1 file changed, 103 deletions(-) delete mode 100644 nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/JavaClassRunner.java diff --git a/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/JavaClassRunner.java b/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/JavaClassRunner.java deleted file mode 100644 index 4a48cf172ee..00000000000 --- a/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/JavaClassRunner.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2022 Contributors to the Eclipse Foundation - * Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved. - * - * This program and the accompanying materials are made available under the - * terms of the Eclipse Public License v. 2.0, which is available at - * http://www.eclipse.org/legal/epl-2.0. - * - * This Source Code may also be made available under the following Secondary - * Licenses when the conditions for such availability set forth in the - * Eclipse Public License v. 2.0 are satisfied: GNU General Public License, - * version 2 with the GNU Classpath Exception, which is available at - * https://www.gnu.org/software/classpath/license.html. - * - * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 - */ - -package com.sun.enterprise.universal.process; - -import com.sun.enterprise.util.OS; - -import java.io.File; -import java.io.IOException; -import java.lang.System.Logger; -import java.nio.file.Path; -import java.util.LinkedList; -import java.util.List; - -import static java.lang.System.Logger.Level.INFO; - -/** - * Very simple initial implementation - * If it is useful there are plenty of improvements that can be made... - * - * @author bnevins - */ -public class JavaClassRunner { - private static final Logger LOG = System.getLogger(JavaClassRunner.class.getName()); - private static final Path javaExecutable; - private final ProcessBuilder builder; - - static{ - final String javaName; - if (OS.isWindows()) { - javaName = "java.exe"; - } else { - javaName = "java"; - } - final Path javaroot = Path.of(System.getProperty("java.home")); - javaExecutable = javaroot.resolve("bin").resolve(javaName); - } - - public JavaClassRunner(String classpath, String[] sysprops, String classname, String[] args) throws IOException { - if (javaExecutable == null) { - throw new IOException("Can not find a jvm"); - } - - if (!isEmpty(classname)) { - throw new IllegalArgumentException("classname was null"); - } - - final List cmdline = new LinkedList<>(); - if (!OS.isWindows()) { - cmdline.add("nohup"); - } - cmdline.add(javaExecutable.toString()); - if (isEmpty(classpath)) { - cmdline.add("-cp"); - cmdline.add(classpath); - } - if (sysprops != null) { - for (String sysprop : sysprops) { - cmdline.add(sysprop); - } - } - cmdline.add(classname); - if (args != null) { - for (String arg : args) { - cmdline.add(arg); - } - } - - this.builder = new ProcessBuilder(cmdline); - final File workDir = new File(System.getProperty("user.dir")); - this.builder.directory(workDir); - this.builder.inheritIO(); - } - - - public void run() throws IOException { - LOG.log(INFO, "Starting process {0} in directory {1}", this.builder.command(), this.builder.directory()); - final Process process = this.builder.start(); - if (process.isAlive()) { - LOG.log(INFO, "Started process with PID={0} and command line={1}", process.pid(), process.info().commandLine()); - } else { - throw new IllegalStateException("Process stopped with error code " + process.exitValue()); - } - } - - private boolean isEmpty(String s) { - return s != null && !s.isEmpty(); - } -} From 77b4cf49cc5b1e5f133fd69a1e025c6a663f81c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mat=C4=9Bj=C4=8Dek?= Date: Sat, 28 Dec 2024 17:53:46 +0100 Subject: [PATCH 5/7] RestTestBase - improved confusing log on test error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - backup of the server.log cannot be done if the server is dead - Using System.Logger instead of JUL Signed-off-by: David Matějček --- .../main/admin/test/rest/RestTestBase.java | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/appserver/tests/admin/tests/src/test/java/org/glassfish/main/admin/test/rest/RestTestBase.java b/appserver/tests/admin/tests/src/test/java/org/glassfish/main/admin/test/rest/RestTestBase.java index ecf13316d41..2a2de98cc39 100644 --- a/appserver/tests/admin/tests/src/test/java/org/glassfish/main/admin/test/rest/RestTestBase.java +++ b/appserver/tests/admin/tests/src/test/java/org/glassfish/main/admin/test/rest/RestTestBase.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2023 Contributors to the Eclipse Foundation. + * Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation. * Copyright (c) 2010, 2018 Oracle and/or its affiliates. All rights reserved. * * This program and the accompanying materials are made available under the @@ -19,6 +19,7 @@ import com.sun.enterprise.util.io.FileUtils; +import jakarta.ws.rs.ProcessingException; import jakarta.ws.rs.client.Client; import jakarta.ws.rs.core.MultivaluedMap; import jakarta.ws.rs.core.Response; @@ -26,12 +27,12 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.lang.System.Logger; +import java.net.ConnectException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; import org.glassfish.admin.rest.client.utils.MarshallingUtils; import org.glassfish.main.admin.test.webapp.HelloServlet; @@ -48,6 +49,8 @@ import static com.sun.enterprise.util.io.FileUtils.ensureWritableDir; import static jakarta.ws.rs.core.MediaType.TEXT_PLAIN; +import static java.lang.System.Logger.Level.ERROR; +import static java.lang.System.Logger.Level.INFO; import static org.glassfish.main.itest.tools.GlassFishTestEnvironment.getTargetDirectory; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.MatcherAssert.assertThat; @@ -55,7 +58,7 @@ public class RestTestBase { - private static final Logger LOG = Logger.getLogger(RestTestBase.class.getName()); + private static final Logger LOG = System.getLogger(RestTestBase.class.getName()); protected static final String CONTEXT_ROOT_MANAGEMENT = "/management"; @@ -98,6 +101,12 @@ public static void captureLogAndCloseClient(final TestInfo testInfo) throws Exce try (InputStream readEntity = response.readEntity(InputStream.class)) { FileUtils.copy(readEntity, reportFile); } + } catch (ProcessingException e) { + if (e.getCause() instanceof ConnectException) { + LOG.log(ERROR, "Unable to backup the server.log using REST, server is not listening on " + baseAdminUrl); + } else { + throw e; + } } } @@ -160,7 +169,7 @@ public void deleteCluster(String clusterName) { Map extraProperties = (Map) body.get("extraProperties"); if (extraProperties != null) { List> instanceList = (List>) extraProperties.get("instanceList"); - LOG.log(Level.INFO, "Found instances: {0}", instanceList); + LOG.log(INFO, "Found instances: {0}", instanceList); if (instanceList != null && !instanceList.isEmpty()) { for (Map instance : instanceList) { String instanceName = instance.get("name"); @@ -261,7 +270,7 @@ protected List getCommandResults(Response response) { protected Map getChildResources(Response response) { Map responseMap = MarshallingUtils.buildMapFromDocument(response.readEntity(String.class)); - LOG.log(Level.INFO, "responseMap: \n{0}", responseMap); + LOG.log(INFO, "responseMap: \n{0}", responseMap); Map extraProperties = (Map) responseMap.get("extraProperties"); if (extraProperties != null) { return extraProperties.get("childResources"); @@ -291,7 +300,7 @@ protected List> getProperties(Response response) { protected static File getEar(final String appName) { final EnterpriseArchive ear = ShrinkWrap.create(EnterpriseArchive.class).addAsModule(getWar(appName), "simple.war"); - LOG.info(ear.toString(true)); + LOG.log(INFO, ear.toString(true)); try { File tempFile = File.createTempFile(appName, ".ear"); ear.as(ZipExporter.class).exportTo(tempFile, true); @@ -303,7 +312,7 @@ protected static File getEar(final String appName) { protected static File getWar(final String appName) { final WebArchive war = ShrinkWrap.create(WebArchive.class).addPackage(HelloServlet.class.getPackage()); - LOG.info(war.toString(true)); + LOG.log(INFO, war.toString(true)); try { File tempFile = File.createTempFile(appName, ".war"); war.as(ZipExporter.class).exportTo(tempFile, true); From 21fe61c40b1a30d766b4d5491044ce340117dda9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mat=C4=9Bj=C4=8Dek?= Date: Sat, 28 Dec 2024 18:02:23 +0100 Subject: [PATCH 6/7] Improved effectivity of waiting for stop/start instance actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Original code caused local port exhaustion - Original code used busy spinning instead of signals Signed-off-by: David Matějček --- .../servermgmt/cli/DeleteDomainCommand.java | 4 +- .../servermgmt/cli/LocalServerCommand.java | 28 +-- .../servermgmt/cli/StartServerHelper.java | 20 +- .../servermgmt/cli/StopDomainCommand.java | 50 ++-- .../cli/cluster/StopLocalInstanceCommand.java | 39 ++- .../universal/process/ProcessUtils.java | 233 +++++++++++++----- 6 files changed, 225 insertions(+), 149 deletions(-) diff --git a/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/DeleteDomainCommand.java b/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/DeleteDomainCommand.java index d6746ef943c..9f0fe43ee33 100644 --- a/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/DeleteDomainCommand.java +++ b/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/DeleteDomainCommand.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Contributors to the Eclipse Foundation + * Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation * Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved. * * This program and the accompanying materials are made available under the @@ -81,7 +81,7 @@ protected int executeCommand() throws CommandException, CommandValidationExcepti private void checkRunning() throws CommandException { programOpts.setInteractive(false); // don't prompt for password - if (ProcessUtils.isListening(adminAddress) && isThisDAS(getDomainRootDir())) { + if (isThisDAS(getDomainRootDir()) && ProcessUtils.isListening(adminAddress)) { String msg = strings.get("domain.is.running", getDomainName(), getDomainRootDir()); throw new IllegalStateException(msg); } diff --git a/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/LocalServerCommand.java b/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/LocalServerCommand.java index 68e22fc0425..4652d4260d4 100644 --- a/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/LocalServerCommand.java +++ b/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/LocalServerCommand.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Contributors to the Eclipse Foundation + * Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation * Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved. * * This program and the accompanying materials are made available under the @@ -43,6 +43,7 @@ import org.glassfish.api.ActionReport.ExitCode; import org.glassfish.api.admin.CommandException; +import static com.sun.enterprise.admin.cli.CLIConstants.DEATH_TIMEOUT_MS; import static com.sun.enterprise.admin.cli.CLIConstants.DEFAULT_ADMIN_PORT; import static com.sun.enterprise.admin.cli.CLIConstants.DEFAULT_HOSTNAME; import static com.sun.enterprise.admin.cli.ProgramOptions.PasswordLocation.LOCAL_PASSWORD; @@ -331,22 +332,15 @@ protected final void waitForRestart(final int oldPid, final HostAndPort oldAdmin logger.log(Level.FINEST, "waitForRestart(oldPid={0}, oldAdminAddress={1}, newAdminAddress={2})", new Object[] {oldPid, oldAdminAddress, newAdminAddress}); - final Supplier signStop = () -> { - if (!ProcessUtils.isListening(oldAdminAddress)) { - return true; - } - int newPid = getServerPid(); - if (newPid < 0) { - // is listening, but not responding. - // Could be also another application, but then - // remote _restart-domain call should already fail. - return true; - } - // stopped and started again - return oldPid != newPid; - }; + final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS); final boolean printDots = !programOpts.isTerse(); - if (!ProcessUtils.waitFor(signStop, Duration.ofMillis(CLIConstants.DEATH_TIMEOUT_MS), printDots)) { + final boolean stopped; + if (isLocal()) { + stopped = ProcessUtils.waitWhileIsAlive(oldPid, timeout, printDots); + } else { + stopped = ProcessUtils.waitWhileListening(oldAdminAddress, timeout, printDots); + } + if (!stopped) { throw new CommandException(I18N.get("restartDomain.noGFStart")); } logger.log(CONFIG, "Server instance is stopped, now we wait for the start on {0}", newAdminAddress); @@ -383,7 +377,7 @@ protected final void waitForRestart(final int oldPid, final HostAndPort oldAdmin /** - * Get uptime from the server. + * @return uptime from the server. */ protected final long getUptime() throws CommandException { RemoteCLICommand cmd = new RemoteCLICommand("uptime", programOpts, env); diff --git a/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/StartServerHelper.java b/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/StartServerHelper.java index fa8df2aed17..13c39684b87 100644 --- a/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/StartServerHelper.java +++ b/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/StartServerHelper.java @@ -78,21 +78,21 @@ public StartServerHelper(boolean terse, ServerDirs serverDirs, GFLauncher launch boolean debug) { this.terse = terse; this.launcher = launcher; - info = launcher.getInfo(); + this.info = launcher.getInfo(); if (info.isDomain()) { - serverOrDomainName = info.getDomainName(); + this.serverOrDomainName = info.getDomainName(); } else { - serverOrDomainName = info.getInstanceName(); + this.serverOrDomainName = info.getInstanceName(); } - addresses = info.getAdminAddresses(); + this.addresses = info.getAdminAddresses(); this.serverDirs = serverDirs; - pidFile = serverDirs.getPidFile(); + this.pidFile = serverDirs.getPidFile(); this.masterPassword = masterPassword; // it will be < 0 if both --debug is false and debug-enabled=false in jvm-config - debugPort = launcher.getDebugPort(); + this.debugPort = launcher.getDebugPort(); } @@ -205,13 +205,7 @@ private void waitForParentToDie() throws CommandException { return; } LOG.log(DEBUG, "Waiting for death of the parent process with pid={0}", pid); - final Supplier parentDeathSign = () -> { - if (pid != null && ProcessUtils.isAlive(pid)) { - return false; - } - return !isListeningOnAnyEndpoint(); - }; - if (!ProcessUtils.waitFor(parentDeathSign, Duration.ofMillis(DEATH_TIMEOUT_MS), false)) { + if (!ProcessUtils.waitWhileIsAlive(pid, Duration.ofMillis(DEATH_TIMEOUT_MS), false)) { throw new CommandException(I18N.get("deathwait_timeout", DEATH_TIMEOUT_MS)); } LOG.log(DEBUG, "Parent process with PID={0} is dead and all admin endpoints are free.", pid); diff --git a/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/StopDomainCommand.java b/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/StopDomainCommand.java index d524248a075..3a238b2cc9f 100644 --- a/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/StopDomainCommand.java +++ b/nucleus/admin/server-mgmt/src/main/java/com/sun/enterprise/admin/servermgmt/cli/StopDomainCommand.java @@ -28,7 +28,6 @@ import java.lang.System.Logger; import java.lang.System.Logger.Level; import java.time.Duration; -import java.util.function.Supplier; import org.glassfish.api.Param; import org.glassfish.api.admin.CommandException; @@ -143,8 +142,7 @@ protected int dasNotRunning() throws CommandException { if (isLocal()) { try { File prevPid = getServerDirs().getLastPidFile(); - File watchedPid = getServerDirs().getPidFile(); - ProcessUtils.kill(prevPid, watchedPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse()); + ProcessUtils.kill(prevPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse()); } catch (KillNotPossibleException e) { throw new CommandException(e.getMessage(), e); } @@ -169,11 +167,26 @@ protected int dasNotRunning() throws CommandException { */ protected void doCommand() throws CommandException { // run the remote stop-domain command and throw away the output - RemoteCLICommand cmd = new RemoteCLICommand(getName(), programOpts, env); - File watchedPid = isLocal() ? getServerDirs().getPidFile() : null; + final RemoteCLICommand cmd = new RemoteCLICommand(getName(), programOpts, env); + final File watchedPid = isLocal() ? getServerDirs().getPidFile() : null; + final Long oldPid = ProcessUtils.loadPid(watchedPid); + final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS); + final boolean printDots = !programOpts.isTerse(); try { cmd.executeAndReturnOutput("stop-domain", "--force", force.toString()); - waitForDeath(watchedPid); + if (printDots) { + // use stdout because logger always appends a newline + System.out.print(Strings.get("StopDomain.WaitDASDeath") + " "); + } + final boolean dead; + if (isLocal()) { + dead = ProcessUtils.waitWhileIsAlive(oldPid, timeout, printDots); + } else { + dead = ProcessUtils.waitWhileListening(addr, timeout, printDots); + } + if (!dead) { + throw new CommandException(Strings.get("StopDomain.DASNotDead", timeout.toSeconds())); + } } catch (Exception e) { // The domain server may have died so fast we didn't have time to // get the (always successful!!) return data. This is NOT AN ERROR! @@ -181,7 +194,7 @@ protected void doCommand() throws CommandException { if (kill && isLocal()) { try { File prevPid = getServerDirs().getLastPidFile(); - ProcessUtils.kill(prevPid, watchedPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse()); + ProcessUtils.kill(prevPid, timeout, printDots); return; } catch (Exception ex) { e.addSuppressed(ex); @@ -190,27 +203,4 @@ protected void doCommand() throws CommandException { throw e; } } - - /** - * Wait for the server to die. - * - * @param watchedPid - */ - private void waitForDeath(File watchedPid) throws CommandException { - if (!programOpts.isTerse()) { - // use stdout because logger always appends a newline - System.out.print(Strings.get("StopDomain.WaitDASDeath") + " "); - } - final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS); - final Supplier deathSign; - if (isLocal()) { - deathSign = () -> !ProcessUtils.isListening(addr) && !ProcessUtils.isAlive(watchedPid); - } else { - deathSign = () -> !ProcessUtils.isListening(addr); - } - final boolean dead = ProcessUtils.waitFor(deathSign, timeout, !programOpts.isTerse()); - if (!dead) { - throw new CommandException(Strings.get("StopDomain.DASNotDead", timeout.toSeconds())); - } - } } diff --git a/nucleus/cluster/cli/src/main/java/com/sun/enterprise/admin/cli/cluster/StopLocalInstanceCommand.java b/nucleus/cluster/cli/src/main/java/com/sun/enterprise/admin/cli/cluster/StopLocalInstanceCommand.java index 08a90f4b86f..d17712f53e4 100644 --- a/nucleus/cluster/cli/src/main/java/com/sun/enterprise/admin/cli/cluster/StopLocalInstanceCommand.java +++ b/nucleus/cluster/cli/src/main/java/com/sun/enterprise/admin/cli/cluster/StopLocalInstanceCommand.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Contributors to the Eclipse Foundation + * Copyright (c) 2022, 2024 Contributors to the Eclipse Foundation * Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved. * * This program and the accompanying materials are made available under the @@ -24,7 +24,6 @@ import java.io.File; import java.time.Duration; -import java.util.function.Supplier; import java.util.logging.Level; import org.glassfish.api.Param; @@ -111,8 +110,7 @@ protected int instanceNotRunning() throws CommandException { if (kill) { try { File lastPid = getServerDirs().getLastPidFile(); - File watchedPid = getServerDirs().getPidFile(); - ProcessUtils.kill(lastPid, watchedPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse()); + ProcessUtils.kill(lastPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse()); } catch (KillNotPossibleException e) { logger.log(Level.SEVERE, e.getMessage(), e); return -1; @@ -150,10 +148,20 @@ protected void doCommand() throws CommandException { * most likely means we're talking to the wrong server. */ programOpts.setInteractive(false); - RemoteCLICommand cmd = new RemoteCLICommand("_stop-instance", programOpts, env); + + final Long pid = ProcessUtils.loadPid(getServerDirs().getPidFile()); + final boolean printDots = !programOpts.isTerse(); + final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS); + final RemoteCLICommand cmd = new RemoteCLICommand("_stop-instance", programOpts, env); try { cmd.executeAndReturnOutput("_stop-instance", "--force", force.toString()); - waitForDeath(); + if (printDots) { + System.out.print(Strings.get("StopInstance.waitForDeath") + " "); + } + final boolean dead = ProcessUtils.waitWhileIsAlive(pid, timeout, printDots); + if (!dead) { + throw new CommandException(Strings.get("StopInstance.instanceNotDead", DEATH_TIMEOUT_MS / 1000)); + } } catch (Exception e) { // The server may have died so fast we didn't have time to // get the (always successful!!) return data. This is NOT AN ERROR! @@ -161,8 +169,7 @@ protected void doCommand() throws CommandException { if (kill) { try { File prevPid = getServerDirs().getLastPidFile(); - File watchedPid = getServerDirs().getPidFile(); - ProcessUtils.kill(prevPid, watchedPid, Duration.ofMillis(DEATH_TIMEOUT_MS), !programOpts.isTerse()); + ProcessUtils.kill(prevPid, timeout, printDots); return; } catch (Exception ex) { e.addSuppressed(ex); @@ -171,20 +178,4 @@ protected void doCommand() throws CommandException { throw e; } } - - /** - * Wait for the server to die. - */ - private void waitForDeath() throws CommandException { - if (!programOpts.isTerse()) { - // use stdout because logger always appends a newline - System.out.print(Strings.get("StopInstance.waitForDeath") + " "); - } - final Duration timeout = Duration.ofMillis(DEATH_TIMEOUT_MS); - final Supplier deathSign = () -> !ProcessUtils.isAlive(getServerDirs().getPidFile()); - final boolean dead = ProcessUtils.waitFor(deathSign, timeout, !programOpts.isTerse()); - if (!dead) { - throw new CommandException(Strings.get("StopInstance.instanceNotDead", DEATH_TIMEOUT_MS / 1000)); - } - } } diff --git a/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java b/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java index a70c18a6a49..26b40498e8e 100644 --- a/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java +++ b/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java @@ -28,17 +28,23 @@ import java.lang.System.Logger; import java.net.InetSocketAddress; import java.net.Socket; -import java.nio.file.Files; +import java.net.SocketException; +import java.net.SocketTimeoutException; import java.text.MessageFormat; import java.time.Duration; import java.time.Instant; import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.function.Supplier; import static com.sun.enterprise.util.StringUtils.ok; import static java.lang.System.Logger.Level.DEBUG; +import static java.lang.System.Logger.Level.ERROR; import static java.lang.System.Logger.Level.INFO; import static java.lang.System.Logger.Level.TRACE; +import static java.lang.System.Logger.Level.WARNING; import static java.nio.charset.StandardCharsets.ISO_8859_1; /** @@ -62,32 +68,31 @@ private ProcessUtils() { /** - * Look for name in the Path. If it is found and if it is - * executable then return a File object pointing to it. Otherwise return nu + * Blocks until the pid's handle exits or timeout comes first. * - * @param name the name of the file with no path - * @return the File object or null + * @param pid process identifier + * @param timeout + * @param printDots true to print dots to STDOUT while waiting. One dot per second. + * @return true if the handle was not found or exited before timeout. False otherwise. */ - public static File getExe(String name) { - for (String path : PATH) { - File f = new File(path + "/" + name); - - if (f.canExecute()) { - return SmartFile.sanitize(f); - } + public static boolean waitWhileIsAlive(final long pid, Duration timeout, boolean printDots) { + Optional handle = ProcessHandle.of(pid); + if (handle.isEmpty()) { + return true; + } + final DotPrinter dotPrinter = DotPrinter.startWaiting(printDots); + try { + handle.get().onExit().get(timeout.toSeconds(), TimeUnit.SECONDS); + return true; + } catch (TimeoutException e) { + LOG.log(TRACE, "Timeout while waiting for exit of process with id " + pid + ". Returning false.", e); + return false; + } catch (InterruptedException | ExecutionException e) { + LOG.log(TRACE, "Exception while waiting for exit of process with id " + pid + ". Returning true.", e); + return true; + } finally { + DotPrinter.stopWaiting(dotPrinter); } - return null; - } - - - /** - * Saves current pid file to the file. - * - * @param pidFile - * @throws IOException - */ - public static void saveCurrentPid(final File pidFile) throws IOException { - FileUtils.writeStringToFile(Long.toString(ProcessHandle.current().pid()), pidFile, ISO_8859_1); } @@ -96,21 +101,18 @@ public static void saveCurrentPid(final File pidFile) throws IOException { * @return true if the pid file exists and the process with the pid inside is alive. */ public static boolean isAlive(final File pidFile) { - if (!pidFile.exists()) { - return false; - } - final long pid; - try { - pid = loadPid(pidFile); - } catch (Exception e) { - LOG.log(TRACE, "Could not load the pid file " + pidFile - + ", therefore we assume that the process stopped.", e); + final Long pid = loadPid(pidFile); + if (pid == null) { return false; } return isAlive(pid); } + /** + * @param pid + * @return true if the process with is alive. + */ public static boolean isAlive(final long pid) { Optional handle = ProcessHandle.of(pid); if (handle.isEmpty()) { @@ -130,15 +132,45 @@ public static boolean isAlive(final long pid) { /** - * @param pidFile existing file containing pid. - * @return pid from the file - * @throws IllegalArgumentException non-existing, empty or unparseable file + * Blocks until the endpoint closes the connection or timeout comes first. + * + * @param endpoint endpoint host and port to use. + * @param timeout + * @param printDots true to print dots to STDOUT while waiting. One dot per second. + * @return true if the connection was closed before timeout. False otherwise. */ - public static long loadPid(final File pidFile) throws IllegalArgumentException { - try { - return Long.parseLong(FileUtils.readSmallFile(pidFile, ISO_8859_1).trim()); - } catch (NumberFormatException | IOException e) { - throw new IllegalArgumentException("Could not parse the PID file: " + pidFile, e); + public static boolean waitWhileListening(HostAndPort endpoint, Duration timeout, boolean printDots) { + final DotPrinter dotPrinter = DotPrinter.startWaiting(printDots); + try (Socket server = new Socket()) { + server.setSoTimeout((int) timeout.toMillis()); + // Max 5 seconds to connect. It is an extreme value for local endpoint. + try { + server.connect(new InetSocketAddress(endpoint.getHost(), endpoint.getPort()), SOCKET_TIMEOUT); + } catch (IOException e) { + LOG.log(TRACE, "Unable to connect - server is probably down.!", e); + return true; + } + try { + int result = server.getInputStream().read(); + if (result == -1) { + LOG.log(TRACE, "Input stream closed - server probably stopped!"); + return true; + } + LOG.log(ERROR, "We were able to read something: {0}. Returning false.", result); + return false; + } catch (SocketTimeoutException e) { + LOG.log(TRACE, "Timeout while reading. Returning false.", e); + return false; + } catch (SocketException e) { + LOG.log(TRACE, "Socket read failed. Returning true.", e); + return true; + } + } catch (Exception ex) { + LOG.log(WARNING, "An attempt to open a socket to " + endpoint + + " resulted in exception. Therefore we assume the server has stopped.", ex); + return true; + } finally { + DotPrinter.stopWaiting(dotPrinter); } } @@ -167,20 +199,18 @@ public static boolean isListening(HostAndPort endpoint) { * the process via {@link Info#commandLine()}. * * @param pidFile - used to load pid - * @param watchedPidFile - if this file vanish, we expect that the process stopped. * @param timeout - timeout to wait until to meet conditions meaning that the process stopped * @param printDots - print one dot per second when waiting. * @throws KillNotPossibleException It wasn't possible to send the kill signal to the process. * @throws KillTimeoutException Signal was sent, but process is still alive after the timeout. */ - public static void kill(File pidFile, - File watchedPidFile, Duration timeout, boolean printDots) throws KillNotPossibleException, KillTimeoutException { - LOG.log(DEBUG, "kill(pidFile={0}, watchedPidFile={1}, timeout={2}, printDots={3})", - pidFile, watchedPidFile, timeout, printDots); - if (!pidFile.exists()) { + public static void kill(File pidFile, Duration timeout, boolean printDots) + throws KillNotPossibleException, KillTimeoutException { + LOG.log(DEBUG, "kill(pidFile={0}, timeout={1}, printDots={2})", pidFile, timeout, printDots); + final Long pid = loadPid(pidFile); + if (pid == null) { return; } - final long pid = loadPid(pidFile); if (!isAlive(pid)) { LOG.log(INFO, "Process with pid {0} has already stopped.", pid); return; @@ -198,8 +228,7 @@ public static void kill(File pidFile, return; } // This is because File.exists() can cache file attributes - Supplier deathSign = () -> !isAlive(pid) || !Files.exists(watchedPidFile.toPath()); - if (!waitFor(deathSign, timeout, printDots)) { + if (!waitWhileIsAlive(pid, timeout, printDots)) { throw new KillTimeoutException(MessageFormat.format( "The process {0} was killed, but it is still alive after timeout {1} s.", pid, timeout.getSeconds())); } @@ -214,34 +243,71 @@ public static void kill(File pidFile, */ public static boolean waitFor(Supplier sign, Duration timeout, boolean printDots) { LOG.log(DEBUG, "waitFor(sign={0}, timeout={1}, printDots={2})", sign, timeout, printDots); + final DotPrinter dotPrinter = DotPrinter.startWaiting(printDots); final Instant start = Instant.now(); try { final Instant deadline = start.plus(timeout); - Instant nextDot = start; while (Instant.now().isBefore(deadline)) { if (sign.get()) { return true; } - if (printDots) { - Instant now = Instant.now(); - if (now.isAfter(nextDot)) { - nextDot = now.plusSeconds(1L); - System.out.print("."); - System.out.flush(); - } - } - Thread.yield(); + Thread.onSpinWait(); } return false; } finally { - if (printDots) { - System.out.println(); - } + DotPrinter.stopWaiting(dotPrinter); LOG.log(INFO, "Waiting finished after {0} ms.", Duration.between(start, Instant.now()).toMillis()); } } + /** + * Look for name in the Path. If it is found and if it is + * executable then return a File object pointing to it. Otherwise return nu + * + * @param name the name of the file with no path + * @return the File object or null + */ + public static File getExe(String name) { + for (String path : PATH) { + File f = new File(path + "/" + name); + + if (f.canExecute()) { + return SmartFile.sanitize(f); + } + } + return null; + } + + + /** + * Saves current pid file to the file. + * + * @param pidFile + * @throws IOException + */ + public static void saveCurrentPid(final File pidFile) throws IOException { + FileUtils.writeStringToFile(Long.toString(ProcessHandle.current().pid()), pidFile, ISO_8859_1); + } + + + /** + * @param pidFile file containing pid. + * @return pid from the file or null if the file does not exist or is null + * @throws IllegalArgumentException for unparseable file + */ + public static Long loadPid(final File pidFile) throws IllegalArgumentException { + if (pidFile == null || !pidFile.exists()) { + return null; + } + try { + return Long.valueOf(FileUtils.readSmallFile(pidFile, ISO_8859_1).trim()); + } catch (NumberFormatException | IOException e) { + throw new IllegalArgumentException("Could not parse the PID file: " + pidFile, e); + } + } + + private static String[] getSystemPath() { String tempPaths; if (OS.isWindows()) { @@ -258,4 +324,45 @@ private static String[] getSystemPath() { } return new String[0]; } + + + private static class DotPrinter extends Thread { + + public DotPrinter() { + super("DotPrinter"); + setDaemon(true); + } + + + @Override + public void run() { + try { + while (true) { + Thread.sleep(1000L); + System.out.print("."); + System.out.flush(); + } + } catch (InterruptedException e) { + System.out.println(); + Thread.currentThread().interrupt(); + } + } + + + public static DotPrinter startWaiting(boolean printDots) { + if (printDots) { + DotPrinter dotPrinter = new DotPrinter(); + dotPrinter.start(); + return dotPrinter; + } + return null; + } + + + public static void stopWaiting(DotPrinter dotPrinter) { + if (dotPrinter != null) { + dotPrinter.interrupt(); + } + } + } } From 190fd0b9a62b5039e6ff51e6d3c93155c48df9d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Mat=C4=9Bj=C4=8Dek?= Date: Sat, 28 Dec 2024 19:34:26 +0100 Subject: [PATCH 7/7] More robust waiting for the process death MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Reverted usage of ProcessHandle.onExit.get as it doesn't work in containers which don't have strict reaper. The zombie project is considered as alive and get then hangs forever. - Added waitpid, however it is not installed everywhere - if it is missing, we sleep for 1 second instead. That should be enough so the operating system could do the cleanup. Signed-off-by: David Matějček --- .../universal/process/ProcessUtils.java | 47 +++++++++---------- .../enterprise/v3/admin/RestartServer.java | 2 +- .../enterprise/v3/admin/StartServerHook.java | 18 ++++--- 3 files changed, 30 insertions(+), 37 deletions(-) diff --git a/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java b/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java index 26b40498e8e..f938e934110 100644 --- a/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java +++ b/nucleus/common/common-util/src/main/java/com/sun/enterprise/universal/process/ProcessUtils.java @@ -34,9 +34,6 @@ import java.time.Duration; import java.time.Instant; import java.util.Optional; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import java.util.function.Supplier; import static com.sun.enterprise.util.StringUtils.ok; @@ -76,23 +73,7 @@ private ProcessUtils() { * @return true if the handle was not found or exited before timeout. False otherwise. */ public static boolean waitWhileIsAlive(final long pid, Duration timeout, boolean printDots) { - Optional handle = ProcessHandle.of(pid); - if (handle.isEmpty()) { - return true; - } - final DotPrinter dotPrinter = DotPrinter.startWaiting(printDots); - try { - handle.get().onExit().get(timeout.toSeconds(), TimeUnit.SECONDS); - return true; - } catch (TimeoutException e) { - LOG.log(TRACE, "Timeout while waiting for exit of process with id " + pid + ". Returning false.", e); - return false; - } catch (InterruptedException | ExecutionException e) { - LOG.log(TRACE, "Exception while waiting for exit of process with id " + pid + ". Returning true.", e); - return true; - } finally { - DotPrinter.stopWaiting(dotPrinter); - } + return waitFor(() -> !isAlive(pid), timeout, printDots); } @@ -115,13 +96,28 @@ public static boolean isAlive(final File pidFile) { */ public static boolean isAlive(final long pid) { Optional handle = ProcessHandle.of(pid); - if (handle.isEmpty()) { - return false; - } - if (!handle.get().isAlive()) { + return handle.isPresent() ? isAlive(handle.get()) : false; + } + + + /** + * The {@link Process#isAlive()} returns true even for zombies so we implemented + * this method which considers zombies as dead. + * + * @param process + * @return true if the process with is alive. + */ + public static boolean isAlive(final ProcessHandle process) { + if (!process.isAlive()) { return false; } - Info info = handle.get().info(); + // This is a trick to avoid zombies on some systems (ie containers on Jenkins) + // Operating system there does the cleanup much later, so we can still access + // zombies to process their output despite for us would be better we would + // not see them any more. + // The ProcessHandle.onExit blocks forever for zombies in docker containers + // without proper process reaper. + final Info info = process.info(); if (info.commandLine().isEmpty() && !(OS.isWindowsForSure() && info.command().isPresent())) { LOG.log(TRACE, "Could not retrieve command line for the pid {0}," + " therefore we assume that the process stopped."); @@ -130,7 +126,6 @@ public static boolean isAlive(final long pid) { return true; } - /** * Blocks until the endpoint closes the connection or timeout comes first. * diff --git a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java index 6214fe54d4f..a0c663715ce 100644 --- a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java +++ b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/RestartServer.java @@ -104,7 +104,6 @@ protected final void doExecute(AdminCommandContext context) { } scheduleReincarnation(); } - // else we just return a special int from System.exit() gfKernel.stop(); } catch (Exception e) { throw new Error(strings.get("restart.server.failure"), e); @@ -116,6 +115,7 @@ protected final void doExecute(AdminCommandContext context) { } else { restartType = debug ? RESTART_DEBUG_ON : RESTART_DEBUG_OFF; } + // return a special int from System.exit() System.exit(restartType); } diff --git a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java index 43ae119d848..34f9a5c73fc 100644 --- a/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java +++ b/nucleus/core/kernel/src/main/java/com/sun/enterprise/v3/admin/StartServerHook.java @@ -188,7 +188,7 @@ private static PrintStream getLogFileOld(Instant startTime) { } - private static ProcessBuilder prepareStartup(Instant now, String classpath, String[] sysprops, String classname, + private static ProcessBuilder prepareStartup(Instant startTime, String classpath, String[] sysprops, String classname, String[] args) { final Path javaExecutable = detectJavaExecutable(); final List cmdline = new ArrayList<>(); @@ -217,21 +217,19 @@ private static ProcessBuilder prepareStartup(Instant now, String classpath, Stri // To avoid conflict of the debug port used both by old and new JVM, // we will force waiting for the end of the old JVM. outerCommand = new ArrayList<>(); + outerCommand.add("nohup"); outerCommand.add("bash"); outerCommand.add("-c"); - outerCommand.add("tail --pid=" + ProcessHandle.current().pid() + " -f /dev/null && " - + cmdline.stream().collect(Collectors.joining(" "))); + // waitpid is not everywhere. + outerCommand.add("(waitpid -e " + ProcessHandle.current().pid() + " || sleep 1) && '" + + cmdline.stream().collect(Collectors.joining("' '")) + + (LOG_RESTART ? "' > '" + LOGDIR.resolve("restart-" + startTime + "-new.log'") : "'")); } final ProcessBuilder builder = new ProcessBuilder(outerCommand); builder.directory(new File(System.getProperty("user.dir"))); - if (LOG_RESTART) { - builder.redirectErrorStream(true); - builder.redirectOutput(LOGDIR.resolve("restart-" + now + "-new.log").toFile()); - } else { - builder.redirectError(Redirect.DISCARD); - builder.redirectOutput(Redirect.DISCARD); - } + builder.redirectError(Redirect.DISCARD); + builder.redirectOutput(Redirect.DISCARD); return builder; }