summaryrefslogtreecommitdiff
path: root/doc/note/openshift
diff options
context:
space:
mode:
Diffstat (limited to 'doc/note/openshift')
-rw-r--r--doc/note/openshift/dbg-mem-issues.txt170
-rw-r--r--doc/note/openshift/openshift.txt14
2 files changed, 181 insertions, 3 deletions
diff --git a/doc/note/openshift/dbg-mem-issues.txt b/doc/note/openshift/dbg-mem-issues.txt
new file mode 100644
index 0000000..c730629
--- /dev/null
+++ b/doc/note/openshift/dbg-mem-issues.txt
@@ -0,0 +1,170 @@
+
+How to hunt memory issues in production
+=======================================
+
+true \
+ && SVCNAME=foo-prod \
+ && PID=42 \
+ && OC= \
+ && JMX= \
+ && MemLeakTry1="lua -W MemLeakTry1.lua" \
+ && dropPadding () { sed -E 's_ *; *_;_g'; } \
+ && getPodName () { ${OC:?} get pods | egrep ston-[0-9] | cut -d' ' -f1; } \
+ && true
+
+
+${OC:?} exec -ti "$(${OC:?} get pods|egrep ston-1|cut -f1 -d' ')" -- pmap 9 > "pmap/${SVCNAME:?}"-pmap-$(date -u +%Y%m%d-%H%M%S).txt
+
+true `# Track pod memory` \
+ && ${OC:?} exec -ti "$(${OC:?} get pods|grep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'true \
+ && printf '\''h;PageSize;%s\n'\'' $(getconf PAGESIZE) \
+ && printf '\''c;%-24s;%8s;%8s;%8s;%5s;%4s;%3s;%8s;%3s;%7s\n'\'' When nThrds size RSS SHR text lib data dt nFds \
+ && while true; do true \
+ && printf '\''r;%s;%8s;%8d;%8d;%5d;%4d;%3d;%8d;%3d;%7d\n'\'' \
+ "$(date -Is)" \
+ $(cat /proc/'${PID:?}'/stat|cut -d" " -f20) \
+ $(cat /proc/'${PID:?}'/statm) \
+ $(ls -1 /proc/'${PID:?}'/fd | wc -l) \
+ && sleep $(expr 60 - \( $(date +%s) % 60 \)) || break; done' \
+ | tee "mem/${SVCNAME:?}"-mem-$(date +%Y%m%d-%H%M%S%z).csv
+
+true `# log JMX stuff` \
+ && grepUsed () { egrep 'used : ' | sed -r 's_^[^0-9]+ ([0-9]+) [^0-9]+$_\1_'; } \
+ && grepPureNumberLine () { egrep $(printf '^[0-9]+\r?$') | sed -r 's_^(.*)\r$_\1_'; } \
+ && (true \
+ && printf 'c; When ; JvmMetaspace; jvmNonHeap; JvmClassCnt; JvmHeap\n' \
+ && while true; do true \
+ && metaSpcByts="$(curl -sS "${JMX:?}"'/servers/0/domains/java.lang/mbeans/name=Metaspace,type=MemoryPool/attributes/Usage/' | grepUsed)" \
+ && jvmNonHeap="$(curl -sS "${JMX:?}"'/servers/0/domains/java.lang/mbeans/type=Memory/attributes/NonHeapMemoryUsage/' | grepUsed)" \
+ && ldClassCnt="$(curl -sS "${JMX:?}"'/servers/0/domains/java.lang/mbeans/type=ClassLoading/attributes/LoadedClassCount/' | grepPureNumberLine)" \
+ && jvmHeap="$(curl -sS "${JMX:?}"'/servers/0/domains/java.lang/mbeans/type=Memory/attributes/HeapMemoryUsage/' | grepUsed)" \
+ && printf 'r;%s;%13d;%11d;%12d;%11d\n' "$(date -Is)" "${metaSpcByts:?}" "${jvmNonHeap:?}" "${ldClassCnt:?}" "${jvmHeap:?}" \
+ && sleep $(expr 60 - \( $(date +%s) % 60 \)) || break \
+ ; done) \
+ | tee "jmx/houston-prod-jmx-Metaspace-used-$(date +%Y%m%d-%H%M%S).log" \
+ && true
+
+(true `# Fetch logs` \
+ && while true; do true \
+ && printf '%s - Fetch logs\n' "$(date -Is)" \
+ && ${OC:?} exec -i "$(getPodName)" -- sh -c 'cd /usr/local/vertx/logs && (tar -cz houston* || test $? -eq 1)' \
+ > "logs/${SVCNAME:?}-log-$(date -u +%Y%m%d-%H%M%SZ).tgz" \
+ && sleep $(expr 14400 - \( $(date +%s) % 14400 \)) || break; done \
+ && true)
+
+true `# Merge logs` \
+ && PREFX="houston-prod-log-" \
+ && SUFFX=".tgz" \
+ && for SRCTGZ in \
+ ${PREFX:?}20231110-160510Z${SUFFX:?} \
+ ${PREFX:?}20231110-181226Z${SUFFX:?} \
+ ${PREFX:?}20231114-093133Z${SUFFX:?} \
+ ${PREFX:?}20231114-120002Z${SUFFX:?} \
+ ${PREFX:?}20231114-123040Z${SUFFX:?} \
+ ${PREFX:?}20231114-160001Z${SUFFX:?} \
+ ${PREFX:?}20231116-082933Z${SUFFX:?} \
+ ${PREFX:?}20231116-120002Z${SUFFX:?} \
+ ${PREFX:?}20231116-160002Z${SUFFX:?} \
+ ${PREFX:?}20231117-081112Z${SUFFX:?} \
+ ${PREFX:?}20231117-120001Z${SUFFX:?} \
+ ${PREFX:?}20231117-164612Z${SUFFX:?} \
+ ; do true \
+ && echo "[INFO ] Create ${SRCTGZ%.*}.log" \
+ && tar xf ../logs/${SRCTGZ:?} \
+ && unzip houston.log.1.zip \
+ && cat houston.log.1 houston.log > "${SRCTGZ%.*}.log" \
+ && rm houston.log.1.zip houston.log.1 houston.log \
+ ;done && true \
+ && printf '%s' '
+ local newLogFileMerger = require("AndisLogUtils").newLogFileMerger
+ local merger = newLogFileMerger{
+ sources = {
+ io.open("houston-prod-log-20231110-160510Z.log", "r"),
+ io.open("houston-prod-log-20231110-181226Z.log", "r"),
+ io.open("houston-prod-log-20231114-093133Z.log", "r"),
+ io.open("houston-prod-log-20231114-120002Z.log", "r"),
+ io.open("houston-prod-log-20231114-123040Z.log", "r"),
+ io.open("houston-prod-log-20231114-160001Z.log", "r"),
+ io.open("houston-prod-log-20231116-082933Z.log", "r"),
+ io.open("houston-prod-log-20231116-120002Z.log", "r"),
+ io.open("houston-prod-log-20231116-160002Z.log", "r"),
+ io.open("houston-prod-log-20231117-081112Z.log", "r"),
+ io.open("houston-prod-log-20231117-120001Z.log", "r"),
+ io.open("houston-prod-log-20231117-164612Z.log", "r"),
+ },
+ snk = { write = function( t, buf, b, c ) io.stdout:write(buf) io.stdout:write("\n") end, },
+ }' | lua -W - | gzip -n > houston-log-merged-$(date -u +%Y%m%d-%H%M%S)Z.log.gz \
+ && true
+
+
+`# Create heap dump`
+com.sun.management.dumpHeap("/usr/local/vertx/houston-storage-file/houston-___-heap-2023____-____Z.hprof", true)
+
+`# Inspect`
+ocprod exec -i "$(ocprod get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && ls -Ahl'
+
+true `# Get made heap dump` \
+ && echo create checksum. \
+ && ocprod exec -i "$(ocprod get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && md5sum -b houston-*.hprof >> MD5SUM-$(date -u +%Y%m%d-%H%M%SZ)' \
+ && echo checksum done. Begin dload. \
+ && ocprod exec -i "$(ocprod get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && tar c houston-*.hprof MD5SUM*' | (cd heapDump && tar x) \
+ && echo dload done \
+ && true
+
+true `# Probe` \
+ && stage=prod \
+ && logFile="houston-${stage:?}-ready-$(date -u +%Y%m%d-%H%M%SZ).log" \
+ && printf 'c; when ;rspCode; connectSec; trsfSec; totlSec; curlExit\n' | tee -a "${logFile:?}" \
+ && while true; do true \
+ && printf 'r;%s;%7d;%11.3f;%8.3f;%8.3f;%9d\n' \
+ $(date +%Y-%m-%dT%H:%M:%S%z) \
+ $(curl -sSw "%{http_code} %{time_connect} %{time_starttransfer} %{time_total}" "${houstonServerInfoUrl:?}" -o /dev/null || ex=$? && echo " $ex") \
+ | tee -a "${logFile:?}" \
+ && sleep $(expr 60 - $(date +%s) % 60) || break \
+ ;done \
+ && true
+
+true \
+ && ONE="houston-prod-pmap-20231102-163425.txt" \
+ && TWO="houston-prod-pmap-20231103-074301.txt" \
+ && diff -U0 "${ONE:?}" "${TWO:?}" | egrep '^\+' | sed -r 's_\+([^ ]+) .*$_\1_'|sort|uniq \
+ && true
+
+(true \
+ && for F in $(ls *pmap*.txt); do true \
+ && printf "$F\n" \
+ && DATE="$(date +%s -d "$(echo $F|sed -r 's_.*([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2}).*_\1-\2-\3T\4:\5:\6Z_')")" \
+ && <"$F" ${MemLeakTry1:?} --date "${DATE:?}" > "${F%.*}.csv" \
+ ;done)
+
+
+
+
+
+
+
+Zwischenfall auf INT 20231124
+
+
+
+`# Create heap dump`
+com.sun.management.dumpHeap("/usr/local/vertx/houston-storage-file/houston-___-heap-2023____-____Z.hprof", true)
+
+`# Inspect`
+${OC:?} exec -i "$(${OC:?} get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && ls -Ahl'
+
+true `# Get made heap dump` \
+ && if test ! -d heapDump; then echo "Dir heapDump missing"; false ;fi \
+ && echo create checksum. \
+ && ${OC:?} exec -i "$(${OC:?} get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && md5sum -b houston-*.hprof >> MD5SUM-$(date -u +%Y%m%d-%H%M%SZ)' \
+ && echo checksum done. Begin dload. \
+ && ${OC:?} exec -i "$(${OC:?} get pods|egrep ston-[1-9]|cut -f1 -d' ')" -- sh -c 'cd /usr/local/vertx/houston-storage-file && tar c houston-*.hprof MD5SUM*' | (cd heapDump && tar x) \
+ && echo dload done \
+ && true
+
+
+
+
+
+
+
diff --git a/doc/note/openshift/openshift.txt b/doc/note/openshift/openshift.txt
index e807da3..88e33ee 100644
--- a/doc/note/openshift/openshift.txt
+++ b/doc/note/openshift/openshift.txt
@@ -77,10 +77,18 @@ HINT: ALL files from Current dir (.) will get uploaded (when global rsync not av
## up/down scale from cli. input von thom (20230815)
- oc scale dc/preflux --replicas=1
+ oc scale dc/${SVCNAME:?} --replicas=1
-## TODO what was this for?
- oc get pvc
+
+## Kube Probe
+
+ echo && ocprod exec -ti "$(ocprod get pods|egrep ston-[0-9]|cut -f1 -d' ')" -- sh -c 'true \
+ && printf "c; When ; rsp_code; time_connect; time_redirect; time_starttransfer; time_total\n" \
+ && while true; do true \
+ && now=$(date -uIs) \
+ && curl -sS -o/dev/null -w "r; $(date -uIs); %{response_code}; %{time_connect}s; %{time_redirect}s; %{time_starttransfer}s; %{time_total}s\n" 127.0.0.1:7012/houston/server/info \
+ && sleep 5 || break \
+ ;done' | tee -a C:/work/tmp/houston-prod-inPod-probe.log