init
This commit is contained in:
425
java/org/apache/catalina/valves/StuckThreadDetectionValve.java
Normal file
425
java/org/apache/catalina/valves/StuckThreadDetectionValve.java
Normal file
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.catalina.valves;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
|
||||
import org.apache.catalina.LifecycleException;
|
||||
import org.apache.catalina.connector.Request;
|
||||
import org.apache.catalina.connector.Response;
|
||||
import org.apache.juli.logging.Log;
|
||||
import org.apache.juli.logging.LogFactory;
|
||||
import org.apache.tomcat.util.res.StringManager;
|
||||
|
||||
/**
|
||||
* This valve allows to detect requests that take a long time to process, which
|
||||
* might indicate that the thread that is processing it is stuck.
|
||||
*/
|
||||
public class StuckThreadDetectionValve extends ValveBase {
|
||||
|
||||
/**
|
||||
* Logger
|
||||
*/
|
||||
private static final Log log = LogFactory.getLog(StuckThreadDetectionValve.class);
|
||||
|
||||
/**
|
||||
* The string manager for this package.
|
||||
*/
|
||||
private static final StringManager sm =
|
||||
StringManager.getManager(Constants.Package);
|
||||
|
||||
/**
|
||||
* Keeps count of the number of stuck threads detected
|
||||
*/
|
||||
private final AtomicInteger stuckCount = new AtomicInteger(0);
|
||||
|
||||
/**
|
||||
* Keeps count of the number of stuck threads that have been interrupted
|
||||
*/
|
||||
private AtomicLong interruptedThreadsCount = new AtomicLong();
|
||||
|
||||
/**
|
||||
* In seconds. Default 600 (10 minutes).
|
||||
*/
|
||||
private int threshold = 600;
|
||||
|
||||
/**
|
||||
* In seconds. Default is -1 to disable interruption.
|
||||
*/
|
||||
private int interruptThreadThreshold;
|
||||
|
||||
/**
|
||||
* The only references we keep to actual running Thread objects are in
|
||||
* this Map (which is automatically cleaned in invoke()s finally clause).
|
||||
* That way, Threads can be GC'ed, even though the Valve still thinks they
|
||||
* are stuck (caused by a long monitor interval)
|
||||
*/
|
||||
private final Map<Long, MonitoredThread> activeThreads = new ConcurrentHashMap<>();
|
||||
|
||||
private final Queue<CompletedStuckThread> completedStuckThreadsQueue =
|
||||
new ConcurrentLinkedQueue<>();
|
||||
|
||||
/**
|
||||
* Specifies the threshold (in seconds) used when checking for stuck threads.
|
||||
* If <=0, the detection is disabled. The default is 600 seconds.
|
||||
*
|
||||
* @param threshold
|
||||
* The new threshold in seconds
|
||||
*/
|
||||
public void setThreshold(int threshold) {
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setThreshold(int)
|
||||
* @return The current threshold in seconds
|
||||
*/
|
||||
public int getThreshold() {
|
||||
return threshold;
|
||||
}
|
||||
|
||||
|
||||
public int getInterruptThreadThreshold() {
|
||||
return interruptThreadThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the threshold (in seconds) before stuck threads are interrupted.
|
||||
* If <=0, the interruption is disabled. The default is -1.
|
||||
* If >=0, the value must actually be >= threshold.
|
||||
*
|
||||
* @param interruptThreadThreshold
|
||||
* The new thread interruption threshold in seconds
|
||||
*/
|
||||
public void setInterruptThreadThreshold(int interruptThreadThreshold) {
|
||||
this.interruptThreadThreshold = interruptThreadThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Required to enable async support.
|
||||
*/
|
||||
public StuckThreadDetectionValve() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void initInternal() throws LifecycleException {
|
||||
super.initInternal();
|
||||
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Monitoring stuck threads with threshold = "
|
||||
+ threshold
|
||||
+ " sec");
|
||||
}
|
||||
}
|
||||
|
||||
private void notifyStuckThreadDetected(MonitoredThread monitoredThread,
|
||||
long activeTime, int numStuckThreads) {
|
||||
if (log.isWarnEnabled()) {
|
||||
String msg = sm.getString(
|
||||
"stuckThreadDetectionValve.notifyStuckThreadDetected",
|
||||
monitoredThread.getThread().getName(),
|
||||
Long.valueOf(activeTime),
|
||||
monitoredThread.getStartTime(),
|
||||
Integer.valueOf(numStuckThreads),
|
||||
monitoredThread.getRequestUri(),
|
||||
Integer.valueOf(threshold),
|
||||
String.valueOf(monitoredThread.getThread().getId())
|
||||
);
|
||||
// msg += "\n" + getStackTraceAsString(trace);
|
||||
Throwable th = new Throwable();
|
||||
th.setStackTrace(monitoredThread.getThread().getStackTrace());
|
||||
log.warn(msg, th);
|
||||
}
|
||||
}
|
||||
|
||||
private void notifyStuckThreadCompleted(CompletedStuckThread thread,
|
||||
int numStuckThreads) {
|
||||
if (log.isWarnEnabled()) {
|
||||
String msg = sm.getString(
|
||||
"stuckThreadDetectionValve.notifyStuckThreadCompleted",
|
||||
thread.getName(),
|
||||
Long.valueOf(thread.getTotalActiveTime()),
|
||||
Integer.valueOf(numStuckThreads),
|
||||
String.valueOf(thread.getId()));
|
||||
// Since the "stuck thread notification" is warn, this should also
|
||||
// be warn
|
||||
log.warn(msg);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void invoke(Request request, Response response)
|
||||
throws IOException, ServletException {
|
||||
|
||||
if (threshold <= 0) {
|
||||
// short-circuit if not monitoring stuck threads
|
||||
getNext().invoke(request, response);
|
||||
return;
|
||||
}
|
||||
|
||||
// Save the thread/runnable
|
||||
// Keeping a reference to the thread object here does not prevent
|
||||
// GC'ing, as the reference is removed from the Map in the finally clause
|
||||
|
||||
Long key = Long.valueOf(Thread.currentThread().getId());
|
||||
StringBuffer requestUrl = request.getRequestURL();
|
||||
if(request.getQueryString()!=null) {
|
||||
requestUrl.append("?");
|
||||
requestUrl.append(request.getQueryString());
|
||||
}
|
||||
MonitoredThread monitoredThread = new MonitoredThread(Thread.currentThread(),
|
||||
requestUrl.toString(), interruptThreadThreshold > 0);
|
||||
activeThreads.put(key, monitoredThread);
|
||||
|
||||
try {
|
||||
getNext().invoke(request, response);
|
||||
} finally {
|
||||
activeThreads.remove(key);
|
||||
if (monitoredThread.markAsDone() == MonitoredThreadState.STUCK) {
|
||||
if(monitoredThread.wasInterrupted()) {
|
||||
interruptedThreadsCount.incrementAndGet();
|
||||
}
|
||||
completedStuckThreadsQueue.add(
|
||||
new CompletedStuckThread(monitoredThread.getThread(),
|
||||
monitoredThread.getActiveTimeInMillis()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void backgroundProcess() {
|
||||
super.backgroundProcess();
|
||||
|
||||
long thresholdInMillis = threshold * 1000L;
|
||||
|
||||
// Check monitored threads, being careful that the request might have
|
||||
// completed by the time we examine it
|
||||
for (MonitoredThread monitoredThread : activeThreads.values()) {
|
||||
long activeTime = monitoredThread.getActiveTimeInMillis();
|
||||
|
||||
if (activeTime >= thresholdInMillis && monitoredThread.markAsStuckIfStillRunning()) {
|
||||
int numStuckThreads = stuckCount.incrementAndGet();
|
||||
notifyStuckThreadDetected(monitoredThread, activeTime, numStuckThreads);
|
||||
}
|
||||
if(interruptThreadThreshold > 0 && activeTime >= interruptThreadThreshold*1000L) {
|
||||
monitoredThread.interruptIfStuck(interruptThreadThreshold);
|
||||
}
|
||||
}
|
||||
// Check if any threads previously reported as stuck, have finished.
|
||||
for (CompletedStuckThread completedStuckThread = completedStuckThreadsQueue.poll();
|
||||
completedStuckThread != null; completedStuckThread = completedStuckThreadsQueue.poll()) {
|
||||
|
||||
int numStuckThreads = stuckCount.decrementAndGet();
|
||||
notifyStuckThreadCompleted(completedStuckThread, numStuckThreads);
|
||||
}
|
||||
}
|
||||
|
||||
public int getStuckThreadCount() {
|
||||
return stuckCount.get();
|
||||
}
|
||||
|
||||
public long[] getStuckThreadIds() {
|
||||
List<Long> idList = new ArrayList<>();
|
||||
for (MonitoredThread monitoredThread : activeThreads.values()) {
|
||||
if (monitoredThread.isMarkedAsStuck()) {
|
||||
idList.add(Long.valueOf(monitoredThread.getThread().getId()));
|
||||
}
|
||||
}
|
||||
|
||||
long[] result = new long[idList.size()];
|
||||
for (int i = 0; i < result.length; i++) {
|
||||
result[i] = idList.get(i).longValue();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public String[] getStuckThreadNames() {
|
||||
List<String> nameList = new ArrayList<>();
|
||||
for (MonitoredThread monitoredThread : activeThreads.values()) {
|
||||
if (monitoredThread.isMarkedAsStuck()) {
|
||||
nameList.add(monitoredThread.getThread().getName());
|
||||
}
|
||||
}
|
||||
return nameList.toArray(new String[nameList.size()]);
|
||||
}
|
||||
|
||||
public long getInterruptedThreadsCount() {
|
||||
return interruptedThreadsCount.get();
|
||||
}
|
||||
|
||||
|
||||
private static class MonitoredThread {
|
||||
|
||||
/**
|
||||
* Reference to the thread to get a stack trace from background task
|
||||
*/
|
||||
private final Thread thread;
|
||||
private final String requestUri;
|
||||
private final long start;
|
||||
private final AtomicInteger state = new AtomicInteger(
|
||||
MonitoredThreadState.RUNNING.ordinal());
|
||||
/**
|
||||
* Semaphore to synchronize the stuck thread with the background-process
|
||||
* thread. It's not used if the interruption feature is not active.
|
||||
*/
|
||||
private final Semaphore interruptionSemaphore;
|
||||
/**
|
||||
* Set to true after the thread is interrupted. No need to make it
|
||||
* volatile since it is accessed right after acquiring the semaphore.
|
||||
*/
|
||||
private boolean interrupted;
|
||||
|
||||
public MonitoredThread(Thread thread, String requestUri,
|
||||
boolean interruptible) {
|
||||
this.thread = thread;
|
||||
this.requestUri = requestUri;
|
||||
this.start = System.currentTimeMillis();
|
||||
if (interruptible) {
|
||||
interruptionSemaphore = new Semaphore(1);
|
||||
} else {
|
||||
interruptionSemaphore = null;
|
||||
}
|
||||
}
|
||||
|
||||
public Thread getThread() {
|
||||
return this.thread;
|
||||
}
|
||||
|
||||
public String getRequestUri() {
|
||||
return requestUri;
|
||||
}
|
||||
|
||||
public long getActiveTimeInMillis() {
|
||||
return System.currentTimeMillis() - start;
|
||||
}
|
||||
|
||||
public Date getStartTime() {
|
||||
return new Date(start);
|
||||
}
|
||||
|
||||
public boolean markAsStuckIfStillRunning() {
|
||||
return this.state.compareAndSet(MonitoredThreadState.RUNNING.ordinal(),
|
||||
MonitoredThreadState.STUCK.ordinal());
|
||||
}
|
||||
|
||||
public MonitoredThreadState markAsDone() {
|
||||
int val = this.state.getAndSet(MonitoredThreadState.DONE.ordinal());
|
||||
MonitoredThreadState threadState = MonitoredThreadState.values()[val];
|
||||
|
||||
if (threadState == MonitoredThreadState.STUCK
|
||||
&& interruptionSemaphore != null) {
|
||||
try {
|
||||
// use the semaphore to synchronize with the background thread
|
||||
// which might try to interrupt this current thread.
|
||||
// Otherwise, the current thread might be interrupted after
|
||||
// going out from here, maybe already serving a new request
|
||||
this.interruptionSemaphore.acquire();
|
||||
} catch (InterruptedException e) {
|
||||
log.debug(
|
||||
"thread interrupted after the request is finished, ignoring",
|
||||
e);
|
||||
}
|
||||
// no need to release the semaphore, it will be GCed
|
||||
}
|
||||
//else the request went through before being marked as stuck, no need
|
||||
//to sync against the semaphore
|
||||
return threadState;
|
||||
}
|
||||
|
||||
boolean isMarkedAsStuck() {
|
||||
return this.state.get() == MonitoredThreadState.STUCK.ordinal();
|
||||
}
|
||||
|
||||
public boolean interruptIfStuck(long interruptThreadThreshold) {
|
||||
if (!isMarkedAsStuck() || interruptionSemaphore == null
|
||||
|| !this.interruptionSemaphore.tryAcquire()) {
|
||||
// if the semaphore is already acquired, it means that the
|
||||
// request thread got unstuck before we interrupted it
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
if (log.isWarnEnabled()) {
|
||||
String msg = sm.getString(
|
||||
"stuckThreadDetectionValve.notifyStuckThreadInterrupted",
|
||||
this.getThread().getName(),
|
||||
Long.valueOf(getActiveTimeInMillis()),
|
||||
this.getStartTime(), this.getRequestUri(),
|
||||
Long.valueOf(interruptThreadThreshold),
|
||||
String.valueOf(this.getThread().getId()));
|
||||
Throwable th = new Throwable();
|
||||
th.setStackTrace(this.getThread().getStackTrace());
|
||||
log.warn(msg, th);
|
||||
}
|
||||
this.thread.interrupt();
|
||||
} finally {
|
||||
this.interrupted = true;
|
||||
this.interruptionSemaphore.release();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean wasInterrupted() {
|
||||
return interrupted;
|
||||
}
|
||||
}
|
||||
|
||||
private static class CompletedStuckThread {
|
||||
|
||||
private final String threadName;
|
||||
private final long threadId;
|
||||
private final long totalActiveTime;
|
||||
|
||||
public CompletedStuckThread(Thread thread, long totalActiveTime) {
|
||||
this.threadName = thread.getName();
|
||||
this.threadId = thread.getId();
|
||||
this.totalActiveTime = totalActiveTime;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return this.threadName;
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
return this.threadId;
|
||||
}
|
||||
|
||||
public long getTotalActiveTime() {
|
||||
return this.totalActiveTime;
|
||||
}
|
||||
}
|
||||
|
||||
private enum MonitoredThreadState {
|
||||
RUNNING, STUCK, DONE;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user