426 lines
15 KiB
Java
426 lines
15 KiB
Java
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
package org.apache.catalina.valves;
|
|
|
|
import java.io.IOException;
|
|
import java.util.ArrayList;
|
|
import java.util.Date;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Queue;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
|
import java.util.concurrent.Semaphore;
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
import javax.servlet.ServletException;
|
|
|
|
import org.apache.catalina.LifecycleException;
|
|
import org.apache.catalina.connector.Request;
|
|
import org.apache.catalina.connector.Response;
|
|
import org.apache.juli.logging.Log;
|
|
import org.apache.juli.logging.LogFactory;
|
|
import org.apache.tomcat.util.res.StringManager;
|
|
|
|
/**
|
|
* This valve allows to detect requests that take a long time to process, which
|
|
* might indicate that the thread that is processing it is stuck.
|
|
*/
|
|
public class StuckThreadDetectionValve extends ValveBase {
|
|
|
|
/**
|
|
* Logger
|
|
*/
|
|
private static final Log log = LogFactory.getLog(StuckThreadDetectionValve.class);
|
|
|
|
/**
|
|
* The string manager for this package.
|
|
*/
|
|
private static final StringManager sm =
|
|
StringManager.getManager(Constants.Package);
|
|
|
|
/**
|
|
* Keeps count of the number of stuck threads detected
|
|
*/
|
|
private final AtomicInteger stuckCount = new AtomicInteger(0);
|
|
|
|
/**
|
|
* Keeps count of the number of stuck threads that have been interrupted
|
|
*/
|
|
private AtomicLong interruptedThreadsCount = new AtomicLong();
|
|
|
|
/**
|
|
* In seconds. Default 600 (10 minutes).
|
|
*/
|
|
private int threshold = 600;
|
|
|
|
/**
|
|
* In seconds. Default is -1 to disable interruption.
|
|
*/
|
|
private int interruptThreadThreshold;
|
|
|
|
/**
|
|
* The only references we keep to actual running Thread objects are in
|
|
* this Map (which is automatically cleaned in invoke()s finally clause).
|
|
* That way, Threads can be GC'ed, even though the Valve still thinks they
|
|
* are stuck (caused by a long monitor interval)
|
|
*/
|
|
private final Map<Long, MonitoredThread> activeThreads = new ConcurrentHashMap<>();
|
|
|
|
private final Queue<CompletedStuckThread> completedStuckThreadsQueue =
|
|
new ConcurrentLinkedQueue<>();
|
|
|
|
/**
|
|
* Specifies the threshold (in seconds) used when checking for stuck threads.
|
|
* If <=0, the detection is disabled. The default is 600 seconds.
|
|
*
|
|
* @param threshold
|
|
* The new threshold in seconds
|
|
*/
|
|
public void setThreshold(int threshold) {
|
|
this.threshold = threshold;
|
|
}
|
|
|
|
/**
|
|
* @see #setThreshold(int)
|
|
* @return The current threshold in seconds
|
|
*/
|
|
public int getThreshold() {
|
|
return threshold;
|
|
}
|
|
|
|
|
|
public int getInterruptThreadThreshold() {
|
|
return interruptThreadThreshold;
|
|
}
|
|
|
|
/**
|
|
* Specifies the threshold (in seconds) before stuck threads are interrupted.
|
|
* If <=0, the interruption is disabled. The default is -1.
|
|
* If >=0, the value must actually be >= threshold.
|
|
*
|
|
* @param interruptThreadThreshold
|
|
* The new thread interruption threshold in seconds
|
|
*/
|
|
public void setInterruptThreadThreshold(int interruptThreadThreshold) {
|
|
this.interruptThreadThreshold = interruptThreadThreshold;
|
|
}
|
|
|
|
/**
|
|
* Required to enable async support.
|
|
*/
|
|
public StuckThreadDetectionValve() {
|
|
super(true);
|
|
}
|
|
|
|
|
|
@Override
|
|
protected void initInternal() throws LifecycleException {
|
|
super.initInternal();
|
|
|
|
if (log.isDebugEnabled()) {
|
|
log.debug("Monitoring stuck threads with threshold = "
|
|
+ threshold
|
|
+ " sec");
|
|
}
|
|
}
|
|
|
|
private void notifyStuckThreadDetected(MonitoredThread monitoredThread,
|
|
long activeTime, int numStuckThreads) {
|
|
if (log.isWarnEnabled()) {
|
|
String msg = sm.getString(
|
|
"stuckThreadDetectionValve.notifyStuckThreadDetected",
|
|
monitoredThread.getThread().getName(),
|
|
Long.valueOf(activeTime),
|
|
monitoredThread.getStartTime(),
|
|
Integer.valueOf(numStuckThreads),
|
|
monitoredThread.getRequestUri(),
|
|
Integer.valueOf(threshold),
|
|
String.valueOf(monitoredThread.getThread().getId())
|
|
);
|
|
// msg += "\n" + getStackTraceAsString(trace);
|
|
Throwable th = new Throwable();
|
|
th.setStackTrace(monitoredThread.getThread().getStackTrace());
|
|
log.warn(msg, th);
|
|
}
|
|
}
|
|
|
|
private void notifyStuckThreadCompleted(CompletedStuckThread thread,
|
|
int numStuckThreads) {
|
|
if (log.isWarnEnabled()) {
|
|
String msg = sm.getString(
|
|
"stuckThreadDetectionValve.notifyStuckThreadCompleted",
|
|
thread.getName(),
|
|
Long.valueOf(thread.getTotalActiveTime()),
|
|
Integer.valueOf(numStuckThreads),
|
|
String.valueOf(thread.getId()));
|
|
// Since the "stuck thread notification" is warn, this should also
|
|
// be warn
|
|
log.warn(msg);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* {@inheritDoc}
|
|
*/
|
|
@Override
|
|
public void invoke(Request request, Response response)
|
|
throws IOException, ServletException {
|
|
|
|
if (threshold <= 0) {
|
|
// short-circuit if not monitoring stuck threads
|
|
getNext().invoke(request, response);
|
|
return;
|
|
}
|
|
|
|
// Save the thread/runnable
|
|
// Keeping a reference to the thread object here does not prevent
|
|
// GC'ing, as the reference is removed from the Map in the finally clause
|
|
|
|
Long key = Long.valueOf(Thread.currentThread().getId());
|
|
StringBuffer requestUrl = request.getRequestURL();
|
|
if(request.getQueryString()!=null) {
|
|
requestUrl.append("?");
|
|
requestUrl.append(request.getQueryString());
|
|
}
|
|
MonitoredThread monitoredThread = new MonitoredThread(Thread.currentThread(),
|
|
requestUrl.toString(), interruptThreadThreshold > 0);
|
|
activeThreads.put(key, monitoredThread);
|
|
|
|
try {
|
|
getNext().invoke(request, response);
|
|
} finally {
|
|
activeThreads.remove(key);
|
|
if (monitoredThread.markAsDone() == MonitoredThreadState.STUCK) {
|
|
if(monitoredThread.wasInterrupted()) {
|
|
interruptedThreadsCount.incrementAndGet();
|
|
}
|
|
completedStuckThreadsQueue.add(
|
|
new CompletedStuckThread(monitoredThread.getThread(),
|
|
monitoredThread.getActiveTimeInMillis()));
|
|
}
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void backgroundProcess() {
|
|
super.backgroundProcess();
|
|
|
|
long thresholdInMillis = threshold * 1000L;
|
|
|
|
// Check monitored threads, being careful that the request might have
|
|
// completed by the time we examine it
|
|
for (MonitoredThread monitoredThread : activeThreads.values()) {
|
|
long activeTime = monitoredThread.getActiveTimeInMillis();
|
|
|
|
if (activeTime >= thresholdInMillis && monitoredThread.markAsStuckIfStillRunning()) {
|
|
int numStuckThreads = stuckCount.incrementAndGet();
|
|
notifyStuckThreadDetected(monitoredThread, activeTime, numStuckThreads);
|
|
}
|
|
if(interruptThreadThreshold > 0 && activeTime >= interruptThreadThreshold*1000L) {
|
|
monitoredThread.interruptIfStuck(interruptThreadThreshold);
|
|
}
|
|
}
|
|
// Check if any threads previously reported as stuck, have finished.
|
|
for (CompletedStuckThread completedStuckThread = completedStuckThreadsQueue.poll();
|
|
completedStuckThread != null; completedStuckThread = completedStuckThreadsQueue.poll()) {
|
|
|
|
int numStuckThreads = stuckCount.decrementAndGet();
|
|
notifyStuckThreadCompleted(completedStuckThread, numStuckThreads);
|
|
}
|
|
}
|
|
|
|
public int getStuckThreadCount() {
|
|
return stuckCount.get();
|
|
}
|
|
|
|
public long[] getStuckThreadIds() {
|
|
List<Long> idList = new ArrayList<>();
|
|
for (MonitoredThread monitoredThread : activeThreads.values()) {
|
|
if (monitoredThread.isMarkedAsStuck()) {
|
|
idList.add(Long.valueOf(monitoredThread.getThread().getId()));
|
|
}
|
|
}
|
|
|
|
long[] result = new long[idList.size()];
|
|
for (int i = 0; i < result.length; i++) {
|
|
result[i] = idList.get(i).longValue();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
public String[] getStuckThreadNames() {
|
|
List<String> nameList = new ArrayList<>();
|
|
for (MonitoredThread monitoredThread : activeThreads.values()) {
|
|
if (monitoredThread.isMarkedAsStuck()) {
|
|
nameList.add(monitoredThread.getThread().getName());
|
|
}
|
|
}
|
|
return nameList.toArray(new String[nameList.size()]);
|
|
}
|
|
|
|
public long getInterruptedThreadsCount() {
|
|
return interruptedThreadsCount.get();
|
|
}
|
|
|
|
|
|
private static class MonitoredThread {
|
|
|
|
/**
|
|
* Reference to the thread to get a stack trace from background task
|
|
*/
|
|
private final Thread thread;
|
|
private final String requestUri;
|
|
private final long start;
|
|
private final AtomicInteger state = new AtomicInteger(
|
|
MonitoredThreadState.RUNNING.ordinal());
|
|
/**
|
|
* Semaphore to synchronize the stuck thread with the background-process
|
|
* thread. It's not used if the interruption feature is not active.
|
|
*/
|
|
private final Semaphore interruptionSemaphore;
|
|
/**
|
|
* Set to true after the thread is interrupted. No need to make it
|
|
* volatile since it is accessed right after acquiring the semaphore.
|
|
*/
|
|
private boolean interrupted;
|
|
|
|
public MonitoredThread(Thread thread, String requestUri,
|
|
boolean interruptible) {
|
|
this.thread = thread;
|
|
this.requestUri = requestUri;
|
|
this.start = System.currentTimeMillis();
|
|
if (interruptible) {
|
|
interruptionSemaphore = new Semaphore(1);
|
|
} else {
|
|
interruptionSemaphore = null;
|
|
}
|
|
}
|
|
|
|
public Thread getThread() {
|
|
return this.thread;
|
|
}
|
|
|
|
public String getRequestUri() {
|
|
return requestUri;
|
|
}
|
|
|
|
public long getActiveTimeInMillis() {
|
|
return System.currentTimeMillis() - start;
|
|
}
|
|
|
|
public Date getStartTime() {
|
|
return new Date(start);
|
|
}
|
|
|
|
public boolean markAsStuckIfStillRunning() {
|
|
return this.state.compareAndSet(MonitoredThreadState.RUNNING.ordinal(),
|
|
MonitoredThreadState.STUCK.ordinal());
|
|
}
|
|
|
|
public MonitoredThreadState markAsDone() {
|
|
int val = this.state.getAndSet(MonitoredThreadState.DONE.ordinal());
|
|
MonitoredThreadState threadState = MonitoredThreadState.values()[val];
|
|
|
|
if (threadState == MonitoredThreadState.STUCK
|
|
&& interruptionSemaphore != null) {
|
|
try {
|
|
// use the semaphore to synchronize with the background thread
|
|
// which might try to interrupt this current thread.
|
|
// Otherwise, the current thread might be interrupted after
|
|
// going out from here, maybe already serving a new request
|
|
this.interruptionSemaphore.acquire();
|
|
} catch (InterruptedException e) {
|
|
log.debug(
|
|
"thread interrupted after the request is finished, ignoring",
|
|
e);
|
|
}
|
|
// no need to release the semaphore, it will be GCed
|
|
}
|
|
//else the request went through before being marked as stuck, no need
|
|
//to sync against the semaphore
|
|
return threadState;
|
|
}
|
|
|
|
boolean isMarkedAsStuck() {
|
|
return this.state.get() == MonitoredThreadState.STUCK.ordinal();
|
|
}
|
|
|
|
public boolean interruptIfStuck(long interruptThreadThreshold) {
|
|
if (!isMarkedAsStuck() || interruptionSemaphore == null
|
|
|| !this.interruptionSemaphore.tryAcquire()) {
|
|
// if the semaphore is already acquired, it means that the
|
|
// request thread got unstuck before we interrupted it
|
|
return false;
|
|
}
|
|
try {
|
|
if (log.isWarnEnabled()) {
|
|
String msg = sm.getString(
|
|
"stuckThreadDetectionValve.notifyStuckThreadInterrupted",
|
|
this.getThread().getName(),
|
|
Long.valueOf(getActiveTimeInMillis()),
|
|
this.getStartTime(), this.getRequestUri(),
|
|
Long.valueOf(interruptThreadThreshold),
|
|
String.valueOf(this.getThread().getId()));
|
|
Throwable th = new Throwable();
|
|
th.setStackTrace(this.getThread().getStackTrace());
|
|
log.warn(msg, th);
|
|
}
|
|
this.thread.interrupt();
|
|
} finally {
|
|
this.interrupted = true;
|
|
this.interruptionSemaphore.release();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
public boolean wasInterrupted() {
|
|
return interrupted;
|
|
}
|
|
}
|
|
|
|
private static class CompletedStuckThread {
|
|
|
|
private final String threadName;
|
|
private final long threadId;
|
|
private final long totalActiveTime;
|
|
|
|
public CompletedStuckThread(Thread thread, long totalActiveTime) {
|
|
this.threadName = thread.getName();
|
|
this.threadId = thread.getId();
|
|
this.totalActiveTime = totalActiveTime;
|
|
}
|
|
|
|
public String getName() {
|
|
return this.threadName;
|
|
}
|
|
|
|
public long getId() {
|
|
return this.threadId;
|
|
}
|
|
|
|
public long getTotalActiveTime() {
|
|
return this.totalActiveTime;
|
|
}
|
|
}
|
|
|
|
private enum MonitoredThreadState {
|
|
RUNNING, STUCK, DONE;
|
|
}
|
|
}
|