diff options
Diffstat (limited to 'doc/note/openshift/dbg-mem-issues.txt')
-rw-r--r-- | doc/note/openshift/dbg-mem-issues.txt | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/doc/note/openshift/dbg-mem-issues.txt b/doc/note/openshift/dbg-mem-issues.txt new file mode 100644 index 0000000..c730629 --- /dev/null +++ b/doc/note/openshift/dbg-mem-issues.txt @@ -0,0 +1,170 @@ + +How to hunt memory issues in production +======================================= + +true \ + && SVCNAME=foo-prod \ + && PID=42 \ + && OC= \ + && JMX= \ + && MemLeakTry1="lua -W MemLeakTry1.lua" \ + && dropPadding () { sed -E 's_ *; *_;_g'; } \ + && getPodName () { ${OC:?} get pods | egrep ston-[0-9] | cut -d' ' -f1; } \ + && true + + +${OC:?} exec -ti "$(${OC:?} get pods|egrep ston-1|cut -f1 -d' ')" -- pmap 9 > "pmap/${SVCNAME:?}"-pmap-$(date -u +%Y%m%d-%H%M%S).txt + +true `# Track pod memory` \ + && ${OC:?} exec -ti "$(${OC:?} get pods|grep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'true \ + && printf '\''h;PageSize;%s\n'\'' $(getconf PAGESIZE) \ + && printf '\''c;%-24s;%8s;%8s;%8s;%5s;%4s;%3s;%8s;%3s;%7s\n'\'' When nThrds size RSS SHR text lib data dt nFds \ + && while true; do true \ + && printf '\''r;%s;%8s;%8d;%8d;%5d;%4d;%3d;%8d;%3d;%7d\n'\'' \ + "$(date -Is)" \ + $(cat /proc/'${PID:?}'/stat|cut -d" " -f20) \ + $(cat /proc/'${PID:?}'/statm) \ + $(ls -1 /proc/'${PID:?}'/fd | wc -l) \ + && sleep $(expr 60 - \( $(date +%s) % 60 \)) || break; done' \ + | tee "mem/${SVCNAME:?}"-mem-$(date +%Y%m%d-%H%M%S%z).csv + +true `# log JMX stuff` \ + && grepUsed () { egrep 'used : ' | sed -r 's_^[^0-9]+ ([0-9]+) [^0-9]+$_\1_'; } \ + && grepPureNumberLine () { egrep $(printf '^[0-9]+\r?$') | sed -r 's_^(.*)\r$_\1_'; } \ + && (true \ + && printf 'c; When ; JvmMetaspace; jvmNonHeap; JvmClassCnt; JvmHeap\n' \ + && while true; do true \ + && metaSpcByts="$(curl -sS "${JMX:?}"'/servers/0/domains/java.lang/mbeans/name=Metaspace,type=MemoryPool/attributes/Usage/' | grepUsed)" \ + && jvmNonHeap="$(curl -sS "${JMX:?}"'/servers/0/domains/java.lang/mbeans/type=Memory/attributes/NonHeapMemoryUsage/' | grepUsed)" \ + && ldClassCnt="$(curl -sS "${JMX:?}"'/servers/0/domains/java.lang/mbeans/type=ClassLoading/attributes/LoadedClassCount/' | grepPureNumberLine)" \ + && jvmHeap="$(curl -sS "${JMX:?}"'/servers/0/domains/java.lang/mbeans/type=Memory/attributes/HeapMemoryUsage/' | grepUsed)" \ + && printf 'r;%s;%13d;%11d;%12d;%11d\n' "$(date -Is)" "${metaSpcByts:?}" "${jvmNonHeap:?}" "${ldClassCnt:?}" "${jvmHeap:?}" \ + && sleep $(expr 60 - \( $(date +%s) % 60 \)) || break \ + ; done) \ + | tee "jmx/houston-prod-jmx-Metaspace-used-$(date +%Y%m%d-%H%M%S).log" \ + && true + +(true `# Fetch logs` \ + && while true; do true \ + && printf '%s - Fetch logs\n' "$(date -Is)" \ + && ${OC:?} exec -i "$(getPodName)" -- sh -c 'cd /usr/local/vertx/logs && (tar -cz houston* || test $? -eq 1)' \ + > "logs/${SVCNAME:?}-log-$(date -u +%Y%m%d-%H%M%SZ).tgz" \ + && sleep $(expr 14400 - \( $(date +%s) % 14400 \)) || break; done \ + && true) + +true `# Merge logs` \ + && PREFX="houston-prod-log-" \ + && SUFFX=".tgz" \ + && for SRCTGZ in \ + ${PREFX:?}20231110-160510Z${SUFFX:?} \ + ${PREFX:?}20231110-181226Z${SUFFX:?} \ + ${PREFX:?}20231114-093133Z${SUFFX:?} \ + ${PREFX:?}20231114-120002Z${SUFFX:?} \ + ${PREFX:?}20231114-123040Z${SUFFX:?} \ + ${PREFX:?}20231114-160001Z${SUFFX:?} \ + ${PREFX:?}20231116-082933Z${SUFFX:?} \ + ${PREFX:?}20231116-120002Z${SUFFX:?} \ + ${PREFX:?}20231116-160002Z${SUFFX:?} \ + ${PREFX:?}20231117-081112Z${SUFFX:?} \ + ${PREFX:?}20231117-120001Z${SUFFX:?} \ + ${PREFX:?}20231117-164612Z${SUFFX:?} \ + ; do true \ + && echo "[INFO ] Create ${SRCTGZ%.*}.log" \ + && tar xf ../logs/${SRCTGZ:?} \ + && unzip houston.log.1.zip \ + && cat houston.log.1 houston.log > "${SRCTGZ%.*}.log" \ + && rm houston.log.1.zip houston.log.1 houston.log \ + ;done && true \ + && printf '%s' ' + local newLogFileMerger = require("AndisLogUtils").newLogFileMerger + local merger = newLogFileMerger{ + sources = { + io.open("houston-prod-log-20231110-160510Z.log", "r"), + io.open("houston-prod-log-20231110-181226Z.log", "r"), + io.open("houston-prod-log-20231114-093133Z.log", "r"), + io.open("houston-prod-log-20231114-120002Z.log", "r"), + io.open("houston-prod-log-20231114-123040Z.log", "r"), + io.open("houston-prod-log-20231114-160001Z.log", "r"), + io.open("houston-prod-log-20231116-082933Z.log", "r"), + io.open("houston-prod-log-20231116-120002Z.log", "r"), + io.open("houston-prod-log-20231116-160002Z.log", "r"), + io.open("houston-prod-log-20231117-081112Z.log", "r"), + io.open("houston-prod-log-20231117-120001Z.log", "r"), + io.open("houston-prod-log-20231117-164612Z.log", "r"), + }, + snk = { write = function( t, buf, b, c ) io.stdout:write(buf) io.stdout:write("\n") end, }, + }' | lua -W - | gzip -n > houston-log-merged-$(date -u +%Y%m%d-%H%M%S)Z.log.gz \ + && true + + +`# Create heap dump` +com.sun.management.dumpHeap("/usr/local/vertx/houston-storage-file/houston-___-heap-2023____-____Z.hprof", true) + +`# Inspect` +ocprod exec -i "$(ocprod get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && ls -Ahl' + +true `# Get made heap dump` \ + && echo create checksum. \ + && ocprod exec -i "$(ocprod get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && md5sum -b houston-*.hprof >> MD5SUM-$(date -u +%Y%m%d-%H%M%SZ)' \ + && echo checksum done. Begin dload. \ + && ocprod exec -i "$(ocprod get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && tar c houston-*.hprof MD5SUM*' | (cd heapDump && tar x) \ + && echo dload done \ + && true + +true `# Probe` \ + && stage=prod \ + && logFile="houston-${stage:?}-ready-$(date -u +%Y%m%d-%H%M%SZ).log" \ + && printf 'c; when ;rspCode; connectSec; trsfSec; totlSec; curlExit\n' | tee -a "${logFile:?}" \ + && while true; do true \ + && printf 'r;%s;%7d;%11.3f;%8.3f;%8.3f;%9d\n' \ + $(date +%Y-%m-%dT%H:%M:%S%z) \ + $(curl -sSw "%{http_code} %{time_connect} %{time_starttransfer} %{time_total}" "${houstonServerInfoUrl:?}" -o /dev/null || ex=$? && echo " $ex") \ + | tee -a "${logFile:?}" \ + && sleep $(expr 60 - $(date +%s) % 60) || break \ + ;done \ + && true + +true \ + && ONE="houston-prod-pmap-20231102-163425.txt" \ + && TWO="houston-prod-pmap-20231103-074301.txt" \ + && diff -U0 "${ONE:?}" "${TWO:?}" | egrep '^\+' | sed -r 's_\+([^ ]+) .*$_\1_'|sort|uniq \ + && true + +(true \ + && for F in $(ls *pmap*.txt); do true \ + && printf "$F\n" \ + && DATE="$(date +%s -d "$(echo $F|sed -r 's_.*([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2}).*_\1-\2-\3T\4:\5:\6Z_')")" \ + && <"$F" ${MemLeakTry1:?} --date "${DATE:?}" > "${F%.*}.csv" \ + ;done) + + + + + + + +Zwischenfall auf INT 20231124 + + + +`# Create heap dump` +com.sun.management.dumpHeap("/usr/local/vertx/houston-storage-file/houston-___-heap-2023____-____Z.hprof", true) + +`# Inspect` +${OC:?} exec -i "$(${OC:?} get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && ls -Ahl' + +true `# Get made heap dump` \ + && if test ! -d heapDump; then echo "Dir heapDump missing"; false ;fi \ + && echo create checksum. \ + && ${OC:?} exec -i "$(${OC:?} get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && md5sum -b houston-*.hprof >> MD5SUM-$(date -u +%Y%m%d-%H%M%SZ)' \ + && echo checksum done. Begin dload. \ + && ${OC:?} exec -i "$(${OC:?} get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && tar c houston-*.hprof MD5SUM*' | (cd heapDump && tar x) \ + && echo dload done \ + && true + + + + + + + |