diff options
author | Andreas Fankhauser hiddenalpha.ch | 2023-10-19 21:58:55 +0200 |
---|---|---|
committer | Andreas Fankhauser hiddenalpha.ch | 2023-10-19 21:58:55 +0200 |
commit | acf2ebad30b50cfb7666d8e9945892c0d7e4b737 (patch) | |
tree | 3115881f7befe76e2635480e1f4f6498364c0ef7 | |
parent | cce297db6088882c2680dc7f64d785a7e1ee3774 (diff) | |
download | UnspecifiedGarbage-acf2ebad30b50cfb7666d8e9945892c0d7e4b737.zip UnspecifiedGarbage-acf2ebad30b50cfb7666d8e9945892c0d7e4b737.tar.gz |
(LogDig) Update rules. Fix some bugs.
-rw-r--r-- | src/main/lua/paisa-logs/DigHoustonLogs.lua | 329 | ||||
-rw-r--r-- | src/main/lua/paisa-logs/PaisaLogParser.lua | 14 |
2 files changed, 267 insertions, 76 deletions
diff --git a/src/main/lua/paisa-logs/DigHoustonLogs.lua b/src/main/lua/paisa-logs/DigHoustonLogs.lua index c587d71..d7da356 100644 --- a/src/main/lua/paisa-logs/DigHoustonLogs.lua +++ b/src/main/lua/paisa-logs/DigHoustonLogs.lua @@ -9,11 +9,12 @@ local PaisaLogParser = require("PaisaLogParser") local normalizeIsoDateTime = require("PaisaLogParser").normalizeIsoDateTime +local LOGDBG = function(msg)io.stderr:write(msg)end local main, onLogEntry, isWorthToPrint, loadFilters, initFilters -local function main() +function main() local that = { logPattern = "DATE STAGE SERVICE LEVEL FILE - MSG", -- Since 2021-09-24 on prod printRaw = true, @@ -30,112 +31,289 @@ local function main() end -local function loadFilters( that ) +function loadFilters( that ) assert(not that.filters) that.filters = { - { action = "drop", beforeDate = "2023-10-18 03:00:00.000" }, - { action = "drop", afterDate = "2023-10-18 15:00:00.000" }, + { action = "drop", beforeDate = "2023-10-18 03:00:00.000", }, + { action = "drop", afterDate = "2023-10-18 15:00:00.000", }, { action = "drop", level = "TRACE" }, { action = "drop", level = "DEBUG" }, { action = "drop", level = "INFO" }, + { action = "drop", level = "WARN" }, + + -- Seen: 2023-10-18 prod + { action = "drop", file = "ContextImpl", level = "ERROR", + msgEquals = "Unhandled exception\njava.lang.NullPointerException: No null handler accepted", + stackPattern = "^" + .."\tat java.util.Objects.requireNonNull.Objects.java:246. ~..:..\n" + .."\tat io.vertx.core.impl.future.FutureImpl.onComplete.FutureImpl.java:132. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n" + .."\tat io.vertx.core.impl.future.PromiseImpl.onComplete.PromiseImpl.java:23. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n" + .."\tat io.vertx.core.file.impl.FileSystemImpl.delete.FileSystemImpl.java:290. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n" + .."\tat org.swisspush.reststorage.FilePutter.FileCleanupManager.deleteFile.FilePutter.java:218. ~.rest.storage.[0-9.]+.jar:..\n" + .."\tat org.swisspush.reststorage.FilePutter.FileCleanupManager.lambda.cleanupFile.0.FilePutter.java:192. ~.rest.storage.[0-9.]+.jar:..\n" + .."\tat io.vertx.core.impl.future.FutureImpl.3.onSuccess.FutureImpl.java:141. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n" + }, + + -- Seen: 2023-10-18 prod + -- TODO open PR to add some logging so we have a chance to find submarine. + { action = "drop", file = "ContextImpl", level = "ERROR", + msgEquals = "Unhandled exception\njava.lang.IllegalStateException: Response head already sent", + stackPattern = "^" + .."\tat io.vertx.core.http.impl.Http1xServerResponse.checkHeadWritten.Http1xServerResponse.java:684. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n" + .."\tat io.vertx.core.http.impl.Http1xServerResponse.setStatusCode.Http1xServerResponse.java:153. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n" + .."\tat org.swisspush.gateleen.routing.Forwarder.lambda.getAsyncHttpClientResponseHandler.7.Forwarder.java:430. ~.gateleen.routing.[0-9.]+.jar:..\n" + .."\tat io.vertx.core.impl.future.FutureImpl.3.onFailure.FutureImpl.java:153. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n", + }, + + -- Reported: SDCISA-13717 + -- Seen: 2023-10-18 prod + { action = "drop", file = "LocalHttpServerResponse", level = "ERROR", + msgPattern = "^non%-proper HttpServerResponse occured\r?\n" + .."java.lang.IllegalStateException:" + .." You must set the Content%-Length header to be the total size of the message body BEFORE sending any data if you are not using" + .." HTTP chunked encoding.", }, + + -- Reported: <none> + -- Seen: 2023-10-18 prod + { action = "drop", file = "ContextImpl", level = "ERROR", + msgPattern = "Unhandled exception\n" + .."java.lang.IllegalStateException: You must set the Content%-Length header to be the total size of the message body BEFORE sending" + .." any data if you are not using HTTP chunked encoding.", }, + + -- Seen: 2023-10-18 + -- Opened nsync PR 49 as a first counter measure. + { action = "drop", file = "ContextImpl", level = "ERROR", msgEquals = "Unhandled exception\njava.lang.NullPointerException: null", + stackStartsWith = "\tat org.swisspush.nsync.multiget.MultiGetServer.lambda$tryLaunchOneRequest$2(MultiGetServer.java:107) ~[nsync-0.6.0.jar:?]" }, + + + -- Bunch of nonsense !ERROR!s which happen all the time as eddies go offline. + + -- Seen: 2023-10-18 + -- Happens all the time as gateleens error reporting is broken-by-desing. + { action = "drop", file = "Forwarder", level = "WARN", + msgPattern = "^..... ................................ Problem to request /from%-houston/[0-9]+/eagle/nsync/v1/push/trillian%-phonebooks" + .."%-affiliated%-planning%-area%-[0-9]+%-vehicles: io.netty.channel.ConnectTimeoutException: connection timed out:" + .." eddie[0-9]+.pnet.ch/[0-9]+:7012", }, + -- Seen: 2023-10-18 + -- Nearly same as above but on ERROR level instead. + { action = "drop", file = "Forwarder", level = "ERROR", + msgPattern = "^%%%w+ %x+ http://eddie%d+:7012/from.houston/%d+/eagle/nsync/v1/push/trillian.phonebooks.affiliated.planning.area.%d+.vehicles" + .." The timeout period of 30000ms has been exceeded while executing POST /from.houston/%d+/eagle/nsync/v1/push/" + .."trillian.phonebooks.affiliated.planning.area.%d+.vehicles for server eddie%d+:7012", }, + -- Seen: 2023-10-18 prod + { action = "drop", file = "Forwarder", level = "ERROR", msgPattern = "^%%%w+ %x+" + .." http://localhost:9089/houston/vehicles/%d+/vehicle/backup/v1/executions/%d+/backup.zip The timeout period of 30000ms has been exceeded" + .." while executing PUT /houston/vehicles/%d+/vehicle/backup/v1/executions/%d+/backup.zip for server localhost:9089", }, + -- Seen: 2023-10-18 prod + { action = "drop", file = "Forwarder", level = "ERROR", msgPattern = "^%%%w+ %x+" + .." http://localhost:9089/houston/vehicles/%d+/vehicle/backup/v1/executions/%d+/backup.zip Timeout$" }, + -- Seen: 2023-10-18 prod + -- I guess this happens if an eddie tries to put his "backup.zip" via shaky connection. + { action = "drop", file = "FilePutter", level = "ERROR", + msgEquals = "Put file failed:\nio.vertx.core.VertxException: Connection was closed", }, + -- Seen: 2023-10-18 prod + -- There are a whole bunch of related errors behind this filter which AFAICT all relate to shaky eddie connections. + { action = "drop", file = "Forwarder", level = "ERROR", msgPattern = "^%%%w+ %x+ http://eddie%d+:7012/from.houston/%d+/eagle/[^ ]+" + .." The timeout period of 30000ms has been exceeded while executing [DEGLOPSTU]+ /from.houston/%d+/eagle/[^ ]+ for server eddie%d+:7012$", }, + -- Seen: 2023-10-18 prod + { action = "drop", file = "Forwarder", level = "ERROR", + msgPattern = "^%%%w+ %x+ http://eddie%d+:7012/from.houston/%d+/eagle/[^ ]+ Connection was closed$", }, + + -- Seen: 2023-10-18 prod + { action = "drop", file = "ConnectionBase", level = "ERROR", msgEquals = "Connection reset by peer", }, + + -- Seen: 2023-10-18 prod + { action = "drop", file = "EventBusBridgeImpl", level = "ERROR", msgEquals = "SockJSSocket exception\nio.vertx.core.VertxException: Connection was closed", }, + + -- Seen: 2023-10-18 prod + -- Reported: TODO link existing issue here + { action = "drop", file = "HttpHeaderUtil", level = "ERROR", + msgPattern = "Keep%-Alive%} values do not match timeout=42 != timeout=120 for request /googleplex/.*", }, + + -- Seen: 2023-10-18 prod + -- Reported: <unknown> + { action = "drop", file = "Utils", level = "ERROR", + msgPattern = "^Exception occurred\nio.vertx.core.eventbus.ReplyException: Sync failed.\n" + .."{\n" + ..' "countIndexQueries" : 1,\n' + ..' "countSentBytes" : 119,\n' + ..' "countReceivedBytes" : 0,\n' + ..' "countMultiGetRequests" : 0,\n' + ..' "countPuts" : 0,\n' + ..' "countDeletes" : 0,\n' + ..' "durationSeconds" : 0.0,\n' + ..' "iterationDepth" : 0\n' + .."}", }, + + -- Seen: 2023-10-18 prod + -- Reported: <unknown> + { action = "drop", file = "ContextImpl", level = "ERROR", msgEquals = "Unhandled exception\njava.lang.UnsupportedOperationException: null", + stackPattern = "^" + .."\tat org.swisspush.gateleen.core.http.LocalHttpClientRequest.connection.LocalHttpClientRequest.java:754. ~.gateleen.core.[0-9.]+.jar:..\n" + .."\tat org.swisspush.gateleen.routing.Forwarder.1.lambda.handle.0.Forwarder.java:362. ~.gateleen.routing.[0-9.]+.jar:..\n" + .."\tat io.vertx.core.impl.AbstractContext.dispatch.AbstractContext.java:100. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n", + }, + + -- Seen: 2023-10-18 prod + -- Reported: <unknown> + { action = "drop", file = "Utils", level = "ERROR", + msgPattern = "^Exception occurred\nio.vertx.core.eventbus.ReplyException: Timed out after waiting 30000.ms. for a reply. address:" + .." __vertx.reply.[0-9]+, repliedAddress: nsync.reregister.sync/slarti.vehicle.setup.sync.[0-9]+", + }, + + -- Seen: 2023-10-18 prod + -- Reported: <unknown> + { action = "drop", file = "Utils", level = "ERROR", msgPattern = "^Exception occurred\n" + .."io.vertx.core.eventbus.ReplyException: Timed out after waiting 30000.ms. for a reply. address: __vertx.reply.[0-9]+, repliedAddress: nsync.register.sync" }, + + -- Seen: 2023-10-18 prod + { action = "drop", file = "HttpClientRequestImpl", level = "ERROR", + msgEquals = "Connection was closed\nio.vertx.core.VertxException: Connection was closed", }, + + -- Seen: 2023-10-18 prod + { action = "drop", file = "Forwarder", level = "ERROR", + msgPattern = "^..... ................................ http://bistr:8080/bistr/vending/accounting/v1/information/lastSessionEnd Connection was closed$", }, + + -- Seen: 2023-10-18 prod + { action = "drop", file = "Forwarder", level = "ERROR", + msgPattern = "..... ................................ http://bob:8080/bob/vending/transaction/v1/systems/%d+/dates/[0-9-]+/transactions/%d+/start" + .." The timeout period of 30000ms has been exceeded while executing PUT /bob/vending/transaction/v1/systems/%d+/dates/[0-9-]+/transactions/%d+/start" + .." for server bob:8080", }, + + -- Seen: 2023-10-18 prod + { action = "drop", file = "ContextImpl", level = "ERROR", msgEquals = "Unhandled exception\njava.lang.IllegalStateException: null", + stackStartsWith = "" + .."\tat io.vertx.core.http.impl.HttpClientResponseImpl.checkEnded(HttpClientResponseImpl.java:150) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat io.vertx.core.http.impl.HttpClientResponseImpl.endHandler(HttpClientResponseImpl.java:172) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat org.swisspush.gateleen.routing.Forwarder.lambda$getAsyncHttpClientResponseHandler$7(Forwarder.java:476) ~[gateleen-routing-1.3.25.jar:?]\n" + .."\tat io.vertx.core.impl.future.FutureImpl$3.onSuccess(FutureImpl.java:141) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat io.vertx.core.impl.future.FutureBase.emitSuccess(FutureBase.java:60) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat io.vertx.core.impl.future.FutureImpl.addListener(FutureImpl.java:196) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat io.vertx.core.impl.future.PromiseImpl.addListener(PromiseImpl.java:23) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat io.vertx.core.impl.future.FutureImpl.onComplete(FutureImpl.java:164) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat io.vertx.core.impl.future.PromiseImpl.onComplete(PromiseImpl.java:23) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat io.vertx.core.http.impl.HttpClientRequestBase.response(HttpClientRequestBase.java:240) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat io.vertx.core.http.HttpClientRequest.send(HttpClientRequest.java:330) ~[vertx-core-4.2.1.jar:4.2.1]\n" + .."\tat org.swisspush.gateleen.routing.Forwarder$1.lambda$handle$1(Forwarder.java:377) ~[gateleen-routing-1.3.25.jar:?]\n" + .."\tat org.swisspush.gateleen.core.http.BufferBridge.lambda$pump$0(BufferBridge.java:43) ~[gateleen-core-1.3.25.jar:?]\n" + .."\tat io.vertx.core.impl.AbstractContext.dispatch(AbstractContext.java:100) ~[vertx-core-4.2.1.jar:4.2.1]\n", + }, + + -- Seen: 2023-10-18 prod + -- TODO Push issue to my backlog to fix this. + { action = "drop", file = "ContextImpl", level = "ERROR", + msgEquals = "Unhandled exception\njava.lang.UnsupportedOperationException: Do override this method to mock expected behaviour.", + stackPattern = "^" + .."\tat org.swisspush.gateleen.core.http.FastFailHttpServerResponse.drainHandler.FastFailHttpServerResponse.java:41. ~.gateleen.core.[0-9.]+.jar:..\n" + .."\tat org.swisspush.gateleen.core.http.FastFailHttpServerResponse.drainHandler.FastFailHttpServerResponse.java:24. ~.gateleen.core.[0-9.]+.jar:..\n" + .."\tat org.swisspush.gateleen.logging.LoggingWriteStream.drainHandler.LoggingWriteStream.java:73. ~.gateleen.logging.[0-9.]+.jar:..\n" + .."\tat io.vertx.core.streams.impl.PumpImpl.stop.PumpImpl.java:95. ~.vertx.core.[0-9.]+.jar:[0-9.]+]\n" + .."\tat io.vertx.core.streams.impl.PumpImpl.stop.PumpImpl.java:39. ~.vertx.core.[0-9.]+.jar:[0-9.]+]\n" + .."\tat org.swisspush.gateleen.routing.Forwarder.lambda$getAsyncHttpClientResponseHandler.4.Forwarder.java:494. ~.gateleen.routing.[0-9.]+.jar:..\n" + .."\tat org.swisspush.gateleen.routing.Forwarder.lambda$getAsyncHttpClientResponseHandler.5.Forwarder.java:503. ~.gateleen.routing.[0-9.]+.jar:..\n" + .."\tat io.vertx.core.impl.AbstractContext.dispatch.AbstractContext.java:100. ~.vertx.core.[0-9.]+.jar:[0-9.]+.\n", + }, + + { action = "drop", file = "Forwarder", level = "ERROR", + msgPattern = "^..... ................................ http://thought:8080/thought/vehicleoperation/recording/v1/events The timeout period of 60000ms has been exceeded while executing PUT /thought/vehicleoperation/recording/v1/events for server thought:8080$", + }, + + -- WELL_KNOWN: I guess happens when vehicle looses connection. Seen 2023-10-18 prod. + { action = "drop", file = "Forwarder", level = "ERROR", msgPattern = "^%%%w+ %x+" + .." http://eddie%d+:7012/from.houston/%d+/eagle/vending/accounting/v1/users/%d+/years/%d+/months/%d%d/account Connection was closed$", }, + -- WELL_KNOWN: I guess happens when vehicle looses connection. Seen 2023-10-18 prod. + { action = "drop", file = "Forwarder", level = "ERROR", msgPattern = "^%%%w+ %x+" + .." http://eddie%d+:7012/from.houston/%d+/eagle/nsync/v1/push/trillian.phonebooks.affiliated.planning.area.%d+.vehicles Connection was closed$", }, + -- WELL_KNOWN: I guess happens when vehicle looses connection. Seen 2023-10-18 prod. + { action = "drop", file = "Forwarder", level = "ERROR", msgPattern = "^%%%w+ %x+" + .." http://eddie%d+:7012/from.houston/%d+/eagle/nsync/v1/query.index The timeout period of 30000ms has been exceeded while executing" + .." POST /from.houston/%d+/eagle/nsync/v1/query-index for server eddie%d+:7012$", }, + -- WELL_KNOWN: I guess happens when vehicle looses connection. Seen 2023-10-18 prod. + { action = "drop", file = "Forwarder", level = "ERROR", msgPattern = "^%%%w+ %x+" + .." http://eddie%d+:7012/from.houston/%d+/eagle/timetable/notification/v1/planningareas/%d+/notifications/%x+ Connection was closed$", }, + -- WELL_KNOWN: I guess happens when vehicle looses connection. Seen 2023-10-18 prod. + { action = "drop", file = "Forwarder", level = "ERROR", msgPattern = "^%%%w+ %x+ http://eddie%d+:7012/from.houston/%d+/eagle/nsync/v1/push/trillian.phonebooks.affiliated.planning.area.%d+.vehicles Connection reset by peer$", }, + + ---- TODO Thought timeout? Can happen. But how often is ok? + ---- HINT: Occurred 15 times in 6 hrs (avg 1x per 24min) (2021-09-17_12:00 to 2021-09-17_18:00) + ---- Seen 2022-06-20, 2022-08-30 prod + --{ action = "drop", file = "Forwarder", level = "ERROR", + -- msgPattern = "http://[aghilmostuwy]+:8080/[aghilmostuwy]+/vehicleoperation/recording/v1/.+ Timeout", }, ---- [SDCISA-9572] pag --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "http://[8acgilmnpsvwy]+:[78]080/[_aegilmopstwy]+/.+ Connection was closed", }, + -- msgPattern = "http://[8acgilmnpsvwy]+:[78]080/[_aegilmopstwy]+/.+ Connection was closed", }, --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "t.ch:7022/brox/from/vehicles/.+Connection refused: ", }, + -- msgPattern = "t.ch:7022/brox/from/vehicles/.+Connection refused: ", }, --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = " http://%w+.pnet.ch:7022/brox/info Connection refused: %w+.pnet.ch/[%d.]+:7022" }, + -- msgPattern = " http://%w+.pnet.ch:7022/brox/info Connection refused: %w+.pnet.ch/[%d.]+:7022" }, ---- TODO Analyze ---- Observed 20014 times within 6 hrs (~1/sec) (2021-09-17_12:00 to 2021-09-17_18:00) ---- HINT: Eddie connections issues also have around 20000 occurrences. Maybe related? ---- Seen: 2021-09-17 --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "http://eddie%d+:7012/from.houston/[^/]+/eagle/[^ ]+ Response already written. Not sure about the" + -- msgPattern = "http://eddie%d+:7012/from.houston/[^/]+/eagle/[^ ]+ Response already written. Not sure about the" -- .." state. Closing server connection for stability reason", }, - ---- TODO link or create issue - ---- HINT: Occurred 774 times within 6 hrs (~2x/min) (2021-09-17_12:00 to 2021-09-17_18:00) - ---- Seen: 2022-06-20 prod - --{ action = "drop", file = "Utils", level = "ERROR", - -- msgPat = "Exception occurred\n%(TIMEOUT,%-1%) Timed out after waiting 30000%(ms%) for a reply. address:" - -- .." __vertx.reply.+, repliedAddress: nsync%-register%-sync", }, - ---- TODO Analyze ---- HINT: Occurred 1538 times in 6 hrs (~ 1x per 15sec) (2021-09-17_12:00 to 2021-09-17_18:00) --{ action = "drop", file = "Forwarder", level = "WARN", - -- msgPat = "Failed to '[^ ]+ /from%-houston/%d+/eagle/.+'\n.+VertxException: Connection was closed", }, + -- msgPattern = "Failed to '[^ ]+ /from%-houston/%d+/eagle/.+'\n.+VertxException: Connection was closed", }, --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "http://eddie%d+:7012/from%-houston/%d+/eagle/.+ Connection was closed", }, + -- msgPattern = "http://eddie%d+:7012/from%-houston/%d+/eagle/.+ Connection was closed", }, ---- TODO Analyze ---- Seen: 2021-09-17, ..., 2022-06-20 --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "http://pag:8080/pag/user/information/v1/directory/sync/request Timeout", }, + -- msgPattern = "http://pag:8080/pag/user/information/v1/directory/sync/request Timeout", }, ---- Seen 2021-10-25, 2022-08-30 prod --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "http://[8acgilmnpsvwy]+:8080/[_aegilmopstwy]+/.+ Response already written. Not sure about the" + -- msgPattern = "http://[8acgilmnpsvwy]+:8080/[_aegilmopstwy]+/.+ Response already written. Not sure about the" -- .." state. Closing server connection for stability reason", }, ---- TODO Analyze. ---- Seen 2021-09-17, 2022-06-20 --{ action = "drop", file = "BisectClient", level = "WARN", - -- msgPat = "statusCode=503 received for POST /houston/routes/vehicles/%d+/eagle/nsync/v1/query%-index", }, + -- msgPattern = "statusCode=503 received for POST /houston/routes/vehicles/%d+/eagle/nsync/v1/query%-index", }, ---- Seen 2022-06-20 prod --{ action = "drop", file = "BisectClient", level = "WARN", - -- msgPat = "statusCode=504 received for POST /houston/routes/vehicles/%d+/eagle/nsync/v1/query%-index", }, + -- msgPattern = "statusCode=504 received for POST /houston/routes/vehicles/%d+/eagle/nsync/v1/query%-index", }, ---- TODO rm filter when fixed ---- Reported: SDCISA-9573 ---- Seen: 2022-08-30 prod, 2022-06-20, 2021-09-17 --{ action = "drop", file = "BisectClient", level = "WARN", - -- msgPat = "Index id=slarti%-vehicle%-setup%-sync%-%d+ rootPath=/houston/from/vehicles/%d+/vehicle/setup/v1 size=%d+ not %(nor no more%) ready. Aborting BisectClient", }, - - ---- TODO Thought timeout? Can happen. But how often is ok? - ---- HINT: Occurred 15 times in 6 hrs (avg 1x per 24min) (2021-09-17_12:00 to 2021-09-17_18:00) - ---- Seen 2022-06-20, 2022-08-30 prod - --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "http://[aghilmostuwy]+:8080/[aghilmostuwy]+/vehicleoperation/recording/v1/.+ Timeout", }, + -- msgPattern = "Index id=slarti%-vehicle%-setup%-sync%-%d+ rootPath=/houston/from/vehicles/%d+/vehicle/setup/v1 size=%d+ not %(nor no more%) ready. Aborting BisectClient", }, ---- Reported: SDCISA-9574 ---- TODO rm when resolved ---- Seen: 2021-09-17 2022-06-20, 2022-08-30 prod, --{ action = "drop", file = "Utils", level = "ERROR", - -- msgPat = "Exception occurred\n%(RECIPIENT_FAILURE,500%) Sync failed.\n{.+}", }, + -- msgPattern = "Exception occurred\n%(RECIPIENT_FAILURE,500%) Sync failed.\n{.+}", }, ---- TODO Analyze --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "http://preflux:8080/preflux/data/preflux/rollout/hosts/eddie%d+/instances/default/situation Timeout", }, + -- msgPattern = "http://preflux:8080/preflux/data/preflux/rollout/hosts/eddie%d+/instances/default/situation Timeout", }, ---- TODO Analyze. ---- Seen 2022-08-30 prod, 2022-06-20, 2021-09-17 --{ action = "drop", file = "RedisQues", level = "WARN", - -- msgPat = "Registration for queue .+ has changed to null", }, + -- msgPattern = "Registration for queue .+ has changed to null", }, ---- Reported: SDCISA-10973 ---- Seen: 2023-10-18 prod. --{ action = "drop", file = "HttpClientRequestImpl", level = "ERROR", - -- msgPat = "The timeout period of 30000ms has been exceeded while executing PUT /houston/vehicles/[0-9]+" + -- msgPattern = "The timeout period of 30000ms has been exceeded while executing PUT /houston/vehicles/[0-9]+" -- .."/vehicle/backup/v1/executions/[0-9]+/backup.zip for server localhost:9089", }, - ---- Reported: TODO - ---- Seen: 2023-10-18 prod. - --{ action = "drop", file = "Utils", level = "ERROR", - -- msgPat = "Exception occurred\nio.vertx.core.eventbus.ReplyException: Timed out after waiting 30000%(ms%) for" - -- .." a reply. address: __vertx.reply.[0-9]+, repliedAddress: nsync%-re", }, - - ---- Seen: 2023-10-18 prod - --{ action = "drop", file = "HttpHeaderUtil", level = "ERROR", - -- msgPat = "Keep%-Alive%} values do not match timeout=42 != timeout=120 for request /googleplex/internal/security/login_state", }, - --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = "[%a-z0-9]+ [a-z0-9]+ http://eddie.....:7012/from%-houston/[^/]+/eagle/nsync/v1/push/trillian" + -- msgPattern = "[%a-z0-9]+ [a-z0-9]+ http://eddie.....:7012/from%-houston/[^/]+/eagle/nsync/v1/push/trillian" -- .."%-phonebooks%-affiliated%-planning%-area%-[^-]+%-vehicles The timeout period of 30000ms has been" -- .." exceeded while executing POST /from%-houston/[0-9]+/eagle/nsync/v1/push/trillian%-phonebooks" -- .."%-affiliated%-planning%-area%-[^%-]+-vehicles for server eddie.....:7012", }, @@ -144,46 +322,64 @@ local function loadFilters( that ) ---- TODO rm when fixed ---- Seen 2022-08-30 prod, 2022-06-20 prod --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = " http://vhfspa1.pnet.ch:7022/brox/from/vehicles/[^/]+/navigation/location/v1/position/collected" + -- msgPattern = " http://vhfspa1.pnet.ch:7022/brox/from/vehicles/[^/]+/navigation/location/v1/position/collected" -- .." Connection reset by peer", }, --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = " http://vhfspa1.pnet.ch:7022/brox/from/vehicles/[^/]+/navigation/location/v1/position/collected" + -- msgPattern = " http://vhfspa1.pnet.ch:7022/brox/from/vehicles/[^/]+/navigation/location/v1/position/collected" -- .." Connection was closed", }, --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = " http://vhfspa1.pnet.ch:7022/brox/from/vehicles/[^/]+/navigation/location/v1/position/collected" + -- msgPattern = " http://vhfspa1.pnet.ch:7022/brox/from/vehicles/[^/]+/navigation/location/v1/position/collected" -- .." Response already written. Not sure about the state. Closing server connection for stability reason", }, ---- TODO analyze ---- Seen 2022-06-20 prod --{ action = "drop", file = "Forwarder", level = "ERROR", - -- msgPat = " http://vhfspa1.pnet.ch:7022/brox/from/vehicles/[^/]+/timetable/private/v1/trip/state/%w+.xml Connection was closed", }, + -- msgPattern = " http://vhfspa1.pnet.ch:7022/brox/from/vehicles/[^/]+/timetable/private/v1/trip/state/%w+.xml Connection was closed", }, } end -local function initFilters( that ) +function initFilters( that ) for iF = 1, #(that.filters) do local descr = that.filters[iF] - local beforeDate, afterDate = descr.beforeDate, descr.afterDate - local file, level, msgPat = descr.file, descr.level, descr.msgPat + local beforeDate = descr.beforeDate and normalizeIsoDateTime(descr.beforeDate) + local afterDate = descr.afterDate and normalizeIsoDateTime(descr.afterDate) + local file, level, msgPattern = descr.file, descr.level, descr.msgPattern + local rawPattern, stackPattern = descr.rawPattern, descr.stackPattern + local stackStartsWith = descr.stackStartsWith local filter = { action = descr.action, matches = false, } - filter.matches = function( that, log ) - if file and file ~= log.file then return false end - if level and level ~= log.level then return false end - local logDate = normalizeIsoDateTime(log.date) - if logDate < beforeDate then return false end - if logDate >= afterDate then return false end - if msgPat and not log.msg:find(msgPat) then return false end - return true + local hasAnyCondition = (beforeDate or afterDate or file or level or msgPattern or rawPattern or stackPattern or stackStartsWith); + if not hasAnyCondition then + filter.matches = function( that, log ) --[[LOGDBG("match unconditionally\n")]] return true end + else + filter.matches = function( that, log ) + local match, mismatch = true, false + if not log.date then log:debugPrint() end + if level and level ~= log.level then --[[LOGDBG("level mismatch: \"".. level .."\" != \"".. log.level .."\"\n")]] return mismatch end + if file and file ~= log.file then --[[LOGDBG("file mismatch: \"".. file .."\" != \"".. log.file .."\"\n")]] return mismatch end + local logDate = normalizeIsoDateTime(log.date) + local isBeforeDate = (not beforeDate or logDate < beforeDate); + local isAfterDate = (not afterDate or logDate >= afterDate); + if not isBeforeDate then --[[LOGDBG("not before: \"".. tostring(beforeDate) .."\", \"".. logDate .."\"\n")]] return mismatch end + if not isAfterDate then --[[LOGDBG("not after: \"".. tostring(afterDate) .."\", \"".. logDate .."\"\n")]] return mismatch end + if msgEquals and log.msg ~= msgEquals then return mismatch end + if stackStartsWith and log.stack and log.stack:sub(1, #stackStartsWith) ~= stackStartsWith then return mismatch end + if msgPattern and not log.msg:find(msgPattern) then --[[LOGDBG("match: msgPattern\n")]] return mismatch end + if stackPattern and log.stack and not log.stack:find(stackPattern) then return mismatch end + if rawPattern and not log.raw:find(rawPattern) then return mismatch end + --LOGDBG("DEFAULT match\n") + return match + end end that.filters[iF] = filter end end -local function onLogEntry( log, that ) - if isWorthToPrint(that, log) then +function onLogEntry( log, that ) + local isWorthIt = isWorthToPrint(that, log) + if isWorthIt then if that.printRaw then print(log.raw) else @@ -193,19 +389,8 @@ local function onLogEntry( log, that ) end -local function isWorthToPrint( that, log ) +function isWorthToPrint( that, log ) local pass, drop = true, false - -- Time range - local begDate, endDate = that.begDate, that.endDate - if begDate or endDate then - local date = normalizeIsoDateTime(log.date) - if begDate and date <= begDate then return drop end - if endDate and date > endDate then return drop end - end - -- log level - local skipLevels = that.skipLevels - if skipLevels and skipLevels[log.level:upper()] then return drop end - -- dynamic filters for iF = 1, #(that.filters) do local filter = that.filters[iF] if filter.matches(that, log) then diff --git a/src/main/lua/paisa-logs/PaisaLogParser.lua b/src/main/lua/paisa-logs/PaisaLogParser.lua index 8620e0c..f6ac0ce 100644 --- a/src/main/lua/paisa-logs/PaisaLogParser.lua +++ b/src/main/lua/paisa-logs/PaisaLogParser.lua @@ -357,8 +357,11 @@ end function LogParse:appendStacktraceLine() local log = self:getOrNewLogEntry() - log.stack = log.stack or ""; - log.stack = log.stack .."\n".. self.line; + if not log.stack then + log.stack = self.line + else + log.stack = log.stack .."\n".. self.line + end -- Also append to raw to have the complete entry there. log.raw = log.raw .."\n".. self.line; end @@ -376,7 +379,10 @@ function LogParse:publishLogEntry() error(msg); return end self.log = nil; -- Mark as consumed - self.cb_onLogEntry( log, self.cb_cls ) + -- Make sure log lines do NOT end in 0x0D + local msg = log.msg + if msg:byte(msg:len()) == 0x0D then log.msg = msg:sub(1, -2) end + self.cb_onLogEntry(log, self.cb_cls) end @@ -388,7 +394,7 @@ end function exports.normalizeIsoDateTime( str ) if str:find("%d%d%d%d%-%d%d%-%d%dT%d%d:%d%d:%d%d%.%d%d%d") then return str end - local y, mo, d, h, mi, s, ms = str:match("(%d%d%d)-(%d%d)-(%d%d)[ T_-](%d%d):(%d%d):(%d%d)[,.](%d%d%d)") + local y, mo, d, h, mi, s, ms = str:match("^(%d%d%d%d)-(%d%d)-(%d%d)[ T_-](%d%d):(%d%d):(%d%d)[,.](%d%d%d)$") return y .."-".. mo .."-".. d .."T".. h ..":".. mi ..":".. s ..".".. ms end |