问题描述
线程卡死指的是线程一直阻塞在某个方法的调用上,线程栈看起来是一动不动。已经处理过的线程卡死的案例:
线程卡死对业务系统的影响是非常巨大的,当线程逐步卡死的过程中,业务系统的处理能力逐步降低,响应延迟不断增大,最终带来的是糟糕的用户体验… …
分析思路
发现阻塞的线程
系统在运行的过程中,怎么确定哪些线程处于卡死状态呢?
要解决的是能够及时发现卡死的线程;
发现了卡死的线程,如何分析线程卡死的原因呢?
线程栈记录的是线程执行的过程,所以获取到了卡死线程的线程栈就能够确定线程卡死在了哪个方法调用上;
通过线程栈也可以知道逻辑执行的过程,以方便问题的排查。
通过分析和实践,总结了一篇发现线程卡死的方案,详情见:【一种检测线程阻塞的实现思路】
分析阻塞的思路
分析线程栈
通常情况下,通过线程栈可以分析出代码执行流程,结合业务代码可以定位一些问题;而有些情况通过线程栈是无法定位问题的,还需要更细致的debug。
一种定位的思路是能够像我们在IDE中打断点调试一样,可以知道线程栈帧中局部变量,通过一步步分析栈帧的执行过程和栈帧中的数据能够更清晰的知道业务逻辑是如何执行的。
获取线程栈信息
根据线程tid,在arthas中通过:thread <tid>获取线程栈信息;通过线程栈可以知道每个线程栈帧的位置索引:depth。
获取局部变量表
typedef struct { jlocation start_location; jint length; char* name; char* signature; char* generic_signature; jint slot; } jvmtiLocalVariableEntry; jvmtiError GetLocalVariableTable(jvmtiEnv* env, jmethodID method, jint* entry_count_ptr, jvmtiLocalVariableEntry** table_ptr)
获取到栈帧中每个局部变量的slot。
获取栈帧数据
根据thread、depth、slot获取depth栈帧中slot位置的局部变量,获取局部变量的相关方法:
- Get Local Variable - Object
- Get Local Instance
- Get Local Variable - Int
- Get Local Variable - Long
- Get Local Variable - Float
- Get Local Variable - Double
- Set Local Variable - Object
- Set Local Variable - Int
- Set Local Variable - Long
- Set Local Variable - Float
- Set Local Variable - Double
实现思路
arthas vmtool已经打通了java与JVMTI之间的调用,所以只需要我们在已有的基础上增加相应JVMTI接口的实现就可以了。
VmTool
在arthas.VmTool添加获取本地变量表、线程栈帧slot数据的逻辑。
private static synchronized native Object[] getLocalVariableTable0(Class clazz,String methodName,String signature,boolean isStatic); private static synchronized native int getLocalInt0(Thread jthread,int depth,int slot); private static synchronized native long getLocalLong0(Thread jthread,int depth,int slot); private static synchronized native float getLocalFloat0(Thread jthread,int depth,int slot); private static synchronized native double getLocalDouble0(Thread jthread,int depth,int slot); private static synchronized native Object getLocalObject0(Thread jthread,int depth,int slot); public Object[] getLocalVariableTable(Class clazz,String methodName,String signature,boolean isStatic){ return getLocalVariableTable0(clazz,methodName,signature,isStatic); } public int getLocalInt(Thread jthread,int depth,int slot) { return getLocalInt0(jthread,depth,slot); } public long getLocalLong(Thread jthread,int depth,int slot) { return getLocalLong0(jthread,depth,slot); } public float getLocalFloat(Thread jthread,int depth,int slot) { return getLocalFloat0(jthread,depth,slot); } public double getLocalDouble(Thread jthread,int depth,int slot) { return getLocalDouble0(jthread,depth,slot); } public Object getLocalObject(Thread jthread,int depth,int slot) { return getLocalObject0(jthread,depth,slot); }
jni-library.cpp
增加VmTool中native方法实现逻辑。
init_agent
主要是添加can_access_local_variables的访问能力。
jvmtiCapabilities capabilitiess; (void)memset(&capabilitiess,0,sizeof(capabilitiess)); capabilitiess.can_tag_objects = 1; capabilitiess.can_access_local_variables = 1;
其他Native方法
不懂C/C++,仅仅完成了必要的功能实现。
extern "C" JNIEXPORT jobjectArray JNICALL Java_arthas_VmTool_getLocalVariableTable0 (JNIEnv *env, jclass thisClass, jclass clazz, jstring methodName, jstring sig, jboolean isStatic){ jsize len = env->GetStringLength(methodName); char *mName = (char*) malloc(sizeof(char) * (len + 1)); env->GetStringUTFRegion(methodName, 0, len, mName); //printf("JVMTI methodName : %s \n", mName); jsize len1 = env->GetStringLength(sig); char *sigStr = (char*) malloc(sizeof(char) * (len1 + 1)); env->GetStringUTFRegion(sig, 0, len1, sigStr); //printf("JVMTI sig : %s \n", sigStr); jmethodID methodId; if(!isStatic){ methodId = env->GetMethodID(clazz, mName, sigStr); }else { methodId = env->GetStaticMethodID(clazz, mName, sigStr); } jclass klass = env->FindClass("java/util/LinkedHashMap"); jint entry_count_ptr; jvmtiLocalVariableEntry *table_ptr; jvmtiError error = jvmti->GetLocalVariableTable(methodId, &entry_count_ptr, &table_ptr); if(error){ printf("JVMTI GetLocalVariableTable entry_count_ptr : %u,error : %u\n", entry_count_ptr,error); return env->NewObjectArray(0, klass, NULL);; } jobjectArray array = env->NewObjectArray(entry_count_ptr, klass, NULL); jmethodID jmethod = env->GetMethodID(klass, "put", "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"); jclass integer_kclass = env->FindClass("java/lang/Integer"); jmethodID jmethodID_valueOf = env->GetStaticMethodID(integer_kclass, "valueOf", "(I)Ljava/lang/Integer;"); for(int i = 0;i < entry_count_ptr;i++){ jvmtiLocalVariableEntry ji = table_ptr[i]; printf("JVMTI jvmtiLocalVariableEntry slot : %u , signature : %s\n", ji.slot, ji.signature); jobject jo = env->AllocObject(klass); jstring slot_key = env->NewStringUTF("slot"); jobject jo_int = env->CallStaticObjectMethod(integer_kclass, jmethodID_valueOf,ji.slot); env->CallObjectMethod(jo, jmethod,slot_key,jo_int); jstring signature_key = env->NewStringUTF("signature"); jstring signature_val = env->NewStringUTF(ji.signature); env->CallObjectMethod(jo, jmethod,signature_key,signature_val); env->SetObjectArrayElement(array, i, jo); } return array; } extern "C" JNIEXPORT jint JNICALL Java_arthas_VmTool_getLocalInt0 (JNIEnv *env, jclass thisClass, jobject jthread, jint depth, jint slot){ jint value_ptr; jvmtiError error = jvmti->GetLocalInt(jthread, depth, slot, &value_ptr); if (error) { printf("ERROR: JVMTI GetLocalInt failed!%u\n", error); } return value_ptr; } extern "C" JNIEXPORT jlong JNICALL Java_arthas_VmTool_getLocalLong0 (JNIEnv *env, jclass thisClass, jobject jthread, jint depth, jint slot){ jlong value_ptr; jvmtiError error = jvmti->GetLocalLong(jthread, depth, slot, &value_ptr); if (error) { printf("ERROR: JVMTI GetLocalLong failed!%u\n", error); } return value_ptr; } extern "C" JNIEXPORT jfloat JNICALL Java_arthas_VmTool_getLocalFloat0 (JNIEnv *env, jclass thisClass, jobject jthread, jint depth, jint slot){ jfloat value_ptr; jvmtiError error = jvmti->GetLocalFloat(jthread, depth, slot, &value_ptr); if (error) { printf("ERROR: JVMTI GetLocalFloat failed!%u\n", error); } return value_ptr; } extern "C" JNIEXPORT jdouble JNICALL Java_arthas_VmTool_getLocalDouble0 (JNIEnv *env, jclass thisClass, jobject jthread, jint depth, jint slot){ jdouble value_ptr; jvmtiError error = jvmti->GetLocalDouble(jthread, depth, slot, &value_ptr); if (error) { printf("ERROR: JVMTI GetLocalDouble failed!%u\n", error); } return value_ptr; } extern "C" JNIEXPORT jobject JNICALL Java_arthas_VmTool_getLocalObject0 (JNIEnv *env, jclass thisClass, jobject jthread, jint depth, jint slot){ jobject value_ptr; jvmtiError error = jvmti->GetLocalObject(jthread, depth, slot, &value_ptr); if (error) { printf("ERROR: JVMTI GetLocalObject failed!%u\n", error); char error_code[] = "error_code : "; char ret[50] = ""; strcat(ret,error_code); jint errorCode = error; char buf[6]; sprintf(buf, "%d", errorCode); strcat(ret,buf); value_ptr = env->NewStringUTF(ret); } return value_ptr; }
VmToolCommand
增加访问线程栈帧本地变量逻辑。
package arthas; import java.lang.management.ManagementFactory; import java.lang.management.ThreadMXBean; import java.lang.reflect.Method; import java.lang.reflect.Modifier; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; public class VmToolCommand { private static Object[] empty = new Object[1]; private static ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean(); private static Map<String,Type> typeMap = new ConcurrentHashMap(); static { typeMap.put(boolean.class.getName(), new Type("Z",boolean.class)); typeMap.put(byte.class.getName(), new Type("B",byte.class)); typeMap.put(char.class.getName(), new Type("C",char.class)); typeMap.put(int.class.getName(), new Type("I",int.class)); typeMap.put(long.class.getName(), new Type("J",long.class)); typeMap.put(short.class.getName(), new Type("S",short.class)); typeMap.put(float.class.getName(), new Type("F",float.class)); typeMap.put(double.class.getName(), new Type("D",double.class)); typeMap.put(boolean.class.getName(), new Type("Z",boolean.class)); typeMap.put(Byte.class.getName(), new Type("B",Byte.class)); typeMap.put(Character.class.getName(), new Type("C",Character.class)); typeMap.put(Integer.class.getName(), new Type("I",Integer.class)); typeMap.put(Long.class.getName(), new Type("J",Long.class)); typeMap.put(Short.class.getName(), new Type("S",Short.class)); typeMap.put(Float.class.getName(), new Type("F",Float.class)); typeMap.put(Double.class.getName(), new Type("D",Double.class)); typeMap.put(void.class.getName(), new Type("V",void.class)); } private VmTool vmtool; public VmToolCommand(String libPath) { this.vmtool = VmTool.getInstance(libPath); } public Object[] accessLocalVariables(long threadId,int depth,String... parameterTypes) { Thread jthread = this.getThreadById(threadId,depth,this.empty); if(jthread == null) { return empty; } Object[] localVariableTable = this.getLocalVariableTable(jthread, depth, parameterTypes); if(localVariableTable == null) { return empty; } return getLocalVariableValues(jthread,depth,localVariableTable); } private Object[] getLocalVariableValues(Thread jthread,int depth,Object[] localVariableTable) { int slotLength = localVariableTable.length; Object[] ret = new Object[slotLength]; for (int i = 0; i < slotLength; i++) { Map<String,Object> map = (Map) localVariableTable[i]; Integer slot = (Integer) map.get("slot"); String signature = (String) map.get("signature"); char signature0 = signature.charAt(0); Object val = null; if (signature0 == 'Z') { int val_int = vmtool.getLocalInt(jthread, depth, slot); val = (val_int == 1); } else if (signature0 == 'B') { val = (byte)vmtool.getLocalInt(jthread, depth, slot); } else if (signature0 == 'C') { val = (char)vmtool.getLocalInt(jthread, depth, slot); } else if (signature0 == 'S') { val = (short)vmtool.getLocalInt(jthread, depth, slot); } else if (signature0 == 'I') { val = vmtool.getLocalInt(jthread, depth, slot); } else if (signature0 == 'J') { val = vmtool.getLocalLong(jthread, depth, slot); } else if (signature0 == 'F') { val = vmtool.getLocalFloat(jthread, depth, slot); } else if (signature0 == 'D') { val = vmtool.getLocalDouble(jthread, depth, slot); } else if (signature0 == 'L') { val = vmtool.getLocalObject(jthread, depth, slot); } else if (signature0 == '[') { val = vmtool.getLocalObject(jthread, depth, slot); } ret[i] = val; } return ret; } private Object[] getLocalVariableTable(Thread jthread,int depth,String... parameterTypes) { Object[] ret = null; StackTraceElement[] stackTraceElements = jthread.getStackTrace(); StackTraceElement stackTraceElement = stackTraceElements[depth]; String className = stackTraceElement.getClassName(); String methodName = stackTraceElement.getMethodName(); int lineNumber = stackTraceElement.getLineNumber(); if(lineNumber < 0) { this.empty[0] = className + "." + methodName + "(" + lineNumber + ") in native code!"; return ret; } Class targetClass = forName(className,this.empty); if(targetClass == null) { return ret; } String signature = null; Method targetMethod = null; if(parameterTypes == null || parameterTypes.length == 0) { targetMethod = getDeclaredMethod(targetClass,methodName,this.empty); }else { int length = parameterTypes.length; Class[] parameterClassTypes = new Class[length]; for(int i = 0;i < length;i++) { parameterClassTypes[i] = forName(parameterTypes[i],this.empty); if(parameterClassTypes[i] == null) { return ret; } } targetMethod = getDeclaredMethod(targetClass,methodName,this.empty,parameterClassTypes); } if(targetMethod == null) { return ret; } signature = signature(targetMethod.getParameterTypes(),targetMethod.getReturnType()); boolean isStatic = Modifier.isStatic(targetMethod.getModifiers()); return vmtool.getLocalVariableTable(targetClass, methodName, signature, isStatic); } private static String signature(Class<?>[] parameterTypes,Class<?> returnType) { StringBuffer signature = new StringBuffer(); signature.append("("); for(Class parameterType : parameterTypes) { signature.append(typeToSignature(parameterType)); } signature.append(")").append(typeToSignature(returnType)); return signature.toString(); } private static String typeToSignature(Class clazz) { String name = clazz.getName(); Type type = typeMap.get(name); if(type != null) { return type.signature; } if(clazz.isArray()){ return clazz.getName(); } else { String string = name.replaceAll("\\.", "/"); return "L" + string + ";"; } } private Thread getThreadById(long threadId,int depth,Object[] empty) { Thread jthread = null; Class clazz = threadMXBean.getClass(); Method method = getDeclaredMethod(clazz,"getThreads",this.empty); if(method == null) { return jthread; } method.setAccessible(true); Thread[] threads = (Thread[])invoke(method,threadMXBean,this.empty); if(threads == null) { return jthread; } for (Thread thread : threads) { long id = thread.getId(); if (id == threadId) { String name = thread.getName(); jthread = thread; break; } } if(jthread == null) { empty[0] = "can not find thread for thread id : " + threadId; return jthread; } int size = jthread.getStackTrace().length; if(depth < 0 || depth >= size) { empty[0] = "the argument depth [" + depth + "] is illegal,thread [" + threadId + "] depth in [0," + (size -1) + "]"; return jthread; } return jthread; } private static Class forName(String parameterType,Object[] empty) { Type type = typeMap.get(parameterType); if(type != null) { return type.clazz; } Class clazz = null; try { clazz = Class.forName(parameterType); } catch (ClassNotFoundException e) { e.printStackTrace(); empty[0] = e; } return clazz; } private static Method getDeclaredMethod(Class clazz,String methodName,Object[] empty, Class<?>... parameterTypes) { Method method = null; try { method = clazz.getDeclaredMethod(methodName,parameterTypes); }catch(NoSuchMethodException e) { e.printStackTrace(); empty[0] = e; }catch(Exception e) { e.printStackTrace(); empty[0] = e; } return method; } private static Object invoke(Method method,Object target,Object[] empty) { Object ret = null; try { ret = method.invoke(target); }catch(Exception e) { e.printStackTrace(); empty[0] = e; } return ret; } } class Type{ public String signature; public Class clazz; public Type(String signature,Class clazz) { this.signature = signature; this.clazz = clazz; } }
测试
测试逻辑如下:
- 启动一个线程,实现A调用B,B调用C,在执行C的时候卡死;
- 获取阻塞线程某个栈帧的本地变量数据
Test
package test; import java.security.CodeSource; import java.util.concurrent.CountDownLatch; import arthas.VmToolCommand; public class Test { public static CountDownLatch cdl = new CountDownLatch(1); public static void main(String[] args) throws Exception { Thread thread = new Thread(new Runnable() { @Override public void run() { try { new A().a(); } catch (InterruptedException e) { e.printStackTrace(); } } }); thread.start(); try { Thread.sleep(1000L); } catch (Exception e) { e.printStackTrace(); } Exception e = new Exception(); e.setStackTrace(thread.getStackTrace()); e.printStackTrace(); CodeSource codeSource = VmToolCommand.class.getProtectionDomain().getCodeSource(); String path = codeSource.getLocation().getPath(); String libPath = path + "libArthasJniLibrary-x86.dll"; VmToolCommand vmToolCommand = new VmToolCommand(libPath); long threadId = thread.getId(); int depth = 7; Object[] ret = vmToolCommand.accessLocalVariables(threadId, depth ); if(ret == null || ret.length == 0) { System.out.println("Class information does not include local variable information."); }else { for(Object obj : ret) { System.out.println(obj); } } cdl.countDown(); } }
A
package test; public class A { public void a() throws InterruptedException { new B().b(); } }
B
package test; public class B { private String name; public void b() throws InterruptedException { boolean a = true; byte b = 1; short c = 2; char d = 'a'; int e = 3; long f = 4; float g = 5.01F; double h = 6.01D; String str = "abcdefg"; String[] arr = {"abc","def"}; C cc = new C(); cc.c(); } }
C
package test; public class C { public void c() throws InterruptedException { int a = 1; try { Test.cdl.await(); }catch(Exception e) { e.printStackTrace(); } } }
总结
总体思路是:
- 通过线程ID找到对应的线程对象;
- 通过指定的depth获取线程栈在depth处的类名、方法名;
- 通过指定的方法入参获取对应的Method对象,进而获取到方法的本地变量表;
- 通过线程对象、方法的本地变量表,最终获取到指定depth的栈帧数据