Dirk Zimoch has proposed merging ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0.
Commit message:
fix several problems in logClient that lead to lost messages when the server (or firewall in between) disconnects
Requested reviews:
mdavidsaver (mdavidsaver)
For more details, see:
https://code.launchpad.net/~dirk.zimoch/epics-base/+git/epics-base/+merge/372925
Bugfix: don't send errlog to all logClients, only to iocLog
Improvements to minimize message loss when server disconnects:
* detect lost connection to server early
* do not discard buffer at disconnect
* flush buffer directly after reconnect
Other improvements:
* avoid delays on ioc startup and shutdown
* avoid repeated memmove
Relates to bug report https://bugs.launchpad.net/epics-base/+bug/1841608 and Tech-Talk thread https://epics.anl.gov/tech-talk/2019/msg01329.php.
--
Your team EPICS Core Developers is subscribed to branch epics-base:7.0.
diff --git a/modules/database/src/ioc/misc/dbCore.dbd b/modules/database/src/ioc/misc/dbCore.dbd
index 51f9c96..adde24e 100644
--- a/modules/database/src/ioc/misc/dbCore.dbd
+++ b/modules/database/src/ioc/misc/dbCore.dbd
@@ -33,3 +33,6 @@ variable(callbackParallelThreadsDefault,int)
# Real-time operation
variable(dbThreadRealtimeLock,int)
+
+# show logClient network activity
+variable(logClientDebug,int)
diff --git a/modules/libcom/src/log/iocLog.c b/modules/libcom/src/log/iocLog.c
index e62da20..8cb1349 100644
--- a/modules/libcom/src/log/iocLog.c
+++ b/modules/libcom/src/log/iocLog.c
@@ -18,8 +18,10 @@
#define epicsExportSharedSymbols
#include "envDefs.h"
+#include "errlog.h"
#include "logClient.h"
#include "iocLog.h"
+#include "epicsExit.h"
int iocLogDisable = 0;
@@ -75,6 +77,14 @@ void epicsShareAPI epicsShareAPI iocLogFlush (void)
}
/*
+ * logClientDestroy()
+ */
+static void iocLogClientDestroy (logClientId id)
+{
+ errlogRemoveListeners (logClientSendMessage, id);
+}
+
+/*
* iocLogClientInit()
*/
static logClientId iocLogClientInit (void)
@@ -89,6 +99,10 @@ static logClientId iocLogClientInit (void)
return NULL;
}
id = logClientCreate (addr, port);
+ if (id != NULL) {
+ errlogAddListener (logClientSendMessage, id);
+ epicsAtExit (iocLogClientDestroy, id);
+ }
return id;
}
diff --git a/modules/libcom/src/log/logClient.c b/modules/libcom/src/log/logClient.c
index 99ee671..7627034 100644
--- a/modules/libcom/src/log/logClient.c
+++ b/modules/libcom/src/log/logClient.c
@@ -21,11 +21,9 @@
#include <string.h>
#include <stdio.h>
-#define epicsExportSharedSymbols
#include "dbDefs.h"
#include "epicsEvent.h"
#include "iocLog.h"
-#include "errlog.h"
#include "epicsMutex.h"
#include "epicsThread.h"
#include "epicsTime.h"
@@ -33,9 +31,13 @@
#include "epicsAssert.h"
#include "epicsExit.h"
#include "epicsSignal.h"
+#include "epicsExport.h"
#include "logClient.h"
+int logClientDebug = 0;
+epicsExportAddress (int, logClientDebug);
+
typedef struct {
char msgBuf[0x4000];
struct sockaddr_in addr;
@@ -44,8 +46,10 @@ typedef struct {
SOCKET sock;
epicsThreadId restartThreadId;
epicsEventId stateChangeNotify;
+ epicsEventId shutdownNotify;
unsigned connectCount;
unsigned nextMsgIndex;
+ unsigned backlog;
unsigned connected;
unsigned shutdown;
unsigned shutdownConfirm;
@@ -53,7 +57,6 @@ typedef struct {
} logClient;
static const double LOG_RESTART_DELAY = 5.0; /* sec */
-static const double LOG_SERVER_CREATE_CONNECT_SYNC_TIMEOUT = 5.0; /* sec */
static const double LOG_SERVER_SHUTDOWN_TIMEOUT = 30.0; /* sec */
/*
@@ -66,10 +69,10 @@ static char* logClientPrefix = NULL;
*/
static void logClientClose ( logClient *pClient )
{
-# ifdef DEBUG
+ if (logClientDebug) {
fprintf (stderr, "log client: lingering for connection close...");
fflush (stderr);
-# endif
+ }
/*
* mutex on
@@ -84,8 +87,6 @@ static void logClientClose ( logClient *pClient )
pClient->sock = INVALID_SOCKET;
}
- pClient->nextMsgIndex = 0u;
- memset ( pClient->msgBuf, '\0', sizeof ( pClient->msgBuf ) );
pClient->connected = 0u;
/*
@@ -93,9 +94,8 @@ static void logClientClose ( logClient *pClient )
*/
epicsMutexUnlock (pClient->mutex);
-# ifdef DEBUG
+ if (logClientDebug)
fprintf (stderr, "done\n");
-# endif
}
/*
@@ -113,6 +113,7 @@ static void logClientDestroy (logClientId id)
epicsMutexMustLock ( pClient->mutex );
pClient->shutdown = 1u;
epicsMutexUnlock ( pClient->mutex );
+ epicsEventSignal ( pClient->shutdownNotify );
/* unblock log client thread blocking in send() or connect() */
interruptInfo =
@@ -154,13 +155,11 @@ static void logClientDestroy (logClientId id)
return;
}
- errlogRemoveListeners ( logClientSendMessage, (void *) pClient );
-
logClientClose ( pClient );
epicsMutexDestroy ( pClient->mutex );
-
epicsEventDestroy ( pClient->stateChangeNotify );
+ epicsEventDestroy ( pClient->shutdownNotify );
free ( pClient );
}
@@ -176,60 +175,59 @@ static void sendMessageChunk(logClient * pClient, const char * message) {
unsigned msgBufBytesLeft =
sizeof ( pClient->msgBuf ) - pClient->nextMsgIndex;
- if ( strSize > msgBufBytesLeft ) {
- int status;
-
- if ( ! pClient->connected ) {
- break;
- }
-
- if ( msgBufBytesLeft > 0u ) {
- memcpy ( & pClient->msgBuf[pClient->nextMsgIndex],
- message, msgBufBytesLeft );
- pClient->nextMsgIndex += msgBufBytesLeft;
- strSize -= msgBufBytesLeft;
- message += msgBufBytesLeft;
- }
-
- status = send ( pClient->sock, pClient->msgBuf,
- pClient->nextMsgIndex, 0 );
- if ( status > 0 ) {
- unsigned nSent = (unsigned) status;
- if ( nSent < pClient->nextMsgIndex ) {
- unsigned newNextMsgIndex = pClient->nextMsgIndex - nSent;
- memmove ( pClient->msgBuf, & pClient->msgBuf[nSent],
- newNextMsgIndex );
- pClient->nextMsgIndex = newNextMsgIndex;
- }
- else {
- pClient->nextMsgIndex = 0u;
- }
- }
- else {
- if ( ! pClient->shutdown ) {
- char sockErrBuf[64];
- if ( status ) {
- epicsSocketConvertErrnoToString ( sockErrBuf, sizeof ( sockErrBuf ) );
- }
- else {
- strcpy ( sockErrBuf, "server initiated disconnect" );
- }
- fprintf ( stderr, "log client: lost contact with log server at \"%s\" because \"%s\"\n",
- pClient->name, sockErrBuf );
- }
- logClientClose ( pClient );
- break;
- }
+ if ( msgBufBytesLeft < strSize && pClient->nextMsgIndex != 0u && pClient->connected)
+ {
+ /* buffer is full, thus flush it */
+ logClientFlush ( pClient );
+ msgBufBytesLeft = sizeof ( pClient->msgBuf ) - pClient->nextMsgIndex;
}
- else {
- memcpy ( & pClient->msgBuf[pClient->nextMsgIndex],
- message, strSize );
- pClient->nextMsgIndex += strSize;
+ if ( msgBufBytesLeft == 0u ) {
+ fprintf ( stderr, "log client: messages to \"%s\" are lost\n",
+ pClient->name );
break;
}
+ if ( msgBufBytesLeft > strSize) msgBufBytesLeft = strSize;
+ memcpy ( & pClient->msgBuf[pClient->nextMsgIndex],
+ message, msgBufBytesLeft );
+ pClient->nextMsgIndex += msgBufBytesLeft;
+ strSize -= msgBufBytesLeft;
+ message += msgBufBytesLeft;
}
}
+/*
+ * epicsSockCountUnsentBytes ()
+ * Should go to osd socket support
+ */
+#if defined (_WIN32) && WINVER >= _WIN32_WINNT_WIN10
+#include <mstcpip.h>
+#endif
+
+static int epicsSockCountUnsentBytes(SOCKET sock) {
+#if defined (_WIN32) && WINVER >= _WIN32_WINNT_WIN10
+/* Windows 10 Version 1703 / Server 2016 */
+/* https://docs.microsoft.com/en-us/windows/win32/api/mstcpip/ns-mstcpip-tcp_info_v0 */
+ DWORD infoVersion = 0, bytesReturned;
+ TCP_INFO_v0 tcpInfo;
+ int status;
+ if ((status = WSAIoctl(sock, SIO_TCP_INFO, &infoVersion, sizeof(infoVersion),
+ &tcpInfo, sizeof(tcpInfo), &bytesReturned, NULL, NULL)) == 0)
+ return tcpInfo.BytesInFlight;
+#elif defined (SO_NWRITE)
+/* macOS / iOS */
+/* https://www.unix.com/man-page/osx/2/setsockopt/ */
+ int unsent;
+ if (getsockopt(sock, SOL_SOCKET, SO_NWRITE, &unsent) == 0)
+ return unsent;
+#elif defined (TIOCOUTQ)
+/* Linux */
+/* https://linux.die.net/man/7/tcp */
+ int unsent;
+ if (ioctl(sock, TIOCOUTQ, &unsent) == 0)
+ return unsent;
+#endif
+ return 0;
+}
/*
* logClientSend ()
@@ -255,43 +253,42 @@ void epicsShareAPI logClientSend ( logClientId id, const char * message )
void epicsShareAPI logClientFlush ( logClientId id )
{
+ unsigned nSent;
+ int status = 0;
+
logClient * pClient = ( logClient * ) id;
- if ( ! pClient ) {
+ if ( ! pClient || ! pClient->connected ) {
return;
}
epicsMutexMustLock ( pClient->mutex );
- while ( pClient->nextMsgIndex && pClient->connected ) {
- int status = send ( pClient->sock, pClient->msgBuf,
- pClient->nextMsgIndex, 0 );
- if ( status > 0 ) {
- unsigned nSent = (unsigned) status;
- if ( nSent < pClient->nextMsgIndex ) {
- unsigned newNextMsgIndex = pClient->nextMsgIndex - nSent;
- memmove ( pClient->msgBuf, & pClient->msgBuf[nSent],
- newNextMsgIndex );
- pClient->nextMsgIndex = newNextMsgIndex;
- }
- else {
- pClient->nextMsgIndex = 0u;
- }
+ nSent = pClient->backlog;
+ while ( nSent < pClient->nextMsgIndex && pClient->connected ) {
+ status = send ( pClient->sock, pClient->msgBuf + nSent,
+ pClient->nextMsgIndex - nSent, 0 );
+ if ( status < 0 ) break;
+ nSent += status;
+ }
+
+ if ( status < 0 ) {
+ if ( ! pClient->shutdown ) {
+ char sockErrBuf[128];
+ epicsSocketConvertErrnoToString ( sockErrBuf, sizeof ( sockErrBuf ) );
+ fprintf ( stderr, "log client: lost contact with log server at \"%s\" because \"%s\"\n",
+ pClient->name, sockErrBuf );
}
- else {
- if ( ! pClient->shutdown ) {
- char sockErrBuf[64];
- if ( status ) {
- epicsSocketConvertErrnoToString ( sockErrBuf, sizeof ( sockErrBuf ) );
- }
- else {
- strcpy ( sockErrBuf, "server initiated disconnect" );
- }
- fprintf ( stderr, "log client: lost contact with log server at \"%s\" because \"%s\"\n",
- pClient->name, sockErrBuf );
- }
- logClientClose ( pClient );
- break;
+ pClient->backlog = 0;
+ logClientClose ( pClient );
+ }
+ else if ( nSent > 0 && pClient->nextMsgIndex > 0 ) {
+ pClient->backlog = epicsSockCountUnsentBytes ( pClient->sock );
+ nSent -= pClient->backlog;
+ if ( nSent > 0 ) {
+ memmove ( pClient->msgBuf, & pClient->msgBuf[nSent],
+ pClient->nextMsgIndex );
+ pClient->nextMsgIndex -= nSent;
}
}
epicsMutexUnlock ( pClient->mutex );
@@ -302,10 +299,10 @@ void epicsShareAPI logClientFlush ( logClientId id )
*/
static void logClientMakeSock (logClient *pClient)
{
-
-# ifdef DEBUG
+ if (logClientDebug) {
fprintf (stderr, "log client: creating socket...");
-# endif
+ fflush (stderr);
+ }
epicsMutexMustLock (pClient->mutex);
@@ -314,7 +311,7 @@ static void logClientMakeSock (logClient *pClient)
*/
pClient->sock = epicsSocketCreate ( AF_INET, SOCK_STREAM, 0 );
if ( pClient->sock == INVALID_SOCKET ) {
- char sockErrBuf[64];
+ char sockErrBuf[128];
epicsSocketConvertErrnoToString (
sockErrBuf, sizeof ( sockErrBuf ) );
fprintf ( stderr, "log client: no socket error %s\n",
@@ -323,10 +320,8 @@ static void logClientMakeSock (logClient *pClient)
epicsMutexUnlock (pClient->mutex);
-# ifdef DEBUG
+ if (logClientDebug)
fprintf (stderr, "done\n");
-# endif
-
}
/*
@@ -366,7 +361,7 @@ static void logClientConnect (logClient *pClient)
}
else {
if ( pClient->connFailStatus != errnoCpy && ! pClient->shutdown ) {
- char sockErrBuf[64];
+ char sockErrBuf[128];
epicsSocketConvertErrnoToString (
sockErrBuf, sizeof ( sockErrBuf ) );
fprintf (stderr,
@@ -392,7 +387,7 @@ static void logClientConnect (logClient *pClient)
optval = TRUE;
status = setsockopt (pClient->sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&optval, sizeof(optval));
if (status<0) {
- char sockErrBuf[64];
+ char sockErrBuf[128];
epicsSocketConvertErrnoToString (
sockErrBuf, sizeof ( sockErrBuf ) );
fprintf (stderr, "log client: unable to enable keepalive option because \"%s\"\n", sockErrBuf);
@@ -404,11 +399,11 @@ static void logClientConnect (logClient *pClient)
*/
status = shutdown (pClient->sock, SHUT_RD);
if (status < 0) {
- char sockErrBuf[64];
+ char sockErrBuf[128];
epicsSocketConvertErrnoToString (
sockErrBuf, sizeof ( sockErrBuf ) );
- fprintf (stderr, "%s:%d shutdown(%d,SHUT_RD) error was \"%s\"\n",
- __FILE__, __LINE__, pClient->sock, sockErrBuf);
+ fprintf (stderr, "%s:%d shutdown(sock,SHUT_RD) error was \"%s\"\n",
+ __FILE__, __LINE__, sockErrBuf);
/* not fatal (although it shouldn't happen) */
}
@@ -425,7 +420,7 @@ static void logClientConnect (logClient *pClient)
lingerval.l_linger = 60*5;
status = setsockopt (pClient->sock, SOL_SOCKET, SO_LINGER, (char *) &lingerval, sizeof(lingerval));
if (status<0) {
- char sockErrBuf[64];
+ char sockErrBuf[128];
epicsSocketConvertErrnoToString (
sockErrBuf, sizeof ( sockErrBuf ) );
fprintf (stderr, "log client: unable to set linger options because \"%s\"\n", sockErrBuf);
@@ -457,14 +452,10 @@ static void logClientRestart ( logClientId id )
epicsMutexUnlock ( pClient->mutex );
- if ( isConn ) {
- logClientFlush ( pClient );
- }
- else {
- logClientConnect ( pClient );
- }
-
- epicsThreadSleep ( LOG_RESTART_DELAY );
+ if ( ! isConn ) logClientConnect ( pClient );
+ logClientFlush ( pClient );
+
+ epicsEventWaitWithTimeout ( pClient->shutdownNotify, LOG_RESTART_DELAY);
epicsMutexMustLock ( pClient->mutex );
}
@@ -480,9 +471,7 @@ static void logClientRestart ( logClientId id )
logClientId epicsShareAPI logClientCreate (
struct in_addr server_addr, unsigned short server_port)
{
- epicsTimeStamp begin, current;
logClient *pClient;
- double diff;
pClient = calloc (1, sizeof (*pClient));
if (pClient==NULL) {
@@ -507,14 +496,22 @@ logClientId epicsShareAPI logClientCreate (
pClient->shutdownConfirm = 0;
epicsAtExit (logClientDestroy, (void*) pClient);
-
+
pClient->stateChangeNotify = epicsEventCreate (epicsEventEmpty);
if ( ! pClient->stateChangeNotify ) {
epicsMutexDestroy ( pClient->mutex );
free ( pClient );
return NULL;
}
-
+
+ pClient->shutdownNotify = epicsEventCreate (epicsEventEmpty);
+ if ( ! pClient->shutdownNotify ) {
+ epicsMutexDestroy ( pClient->mutex );
+ epicsEventDestroy ( pClient->stateChangeNotify );
+ free ( pClient );
+ return NULL;
+ }
+
pClient->restartThreadId = epicsThreadCreate (
"logRestart", epicsThreadPriorityLow,
epicsThreadGetStackSize(epicsThreadStackSmall),
@@ -522,35 +519,12 @@ logClientId epicsShareAPI logClientCreate (
if ( pClient->restartThreadId == NULL ) {
epicsMutexDestroy ( pClient->mutex );
epicsEventDestroy ( pClient->stateChangeNotify );
+ epicsEventDestroy ( pClient->shutdownNotify );
free (pClient);
fprintf(stderr, "log client: unable to start log client connection watch dog thread\n");
return NULL;
}
- /*
- * attempt to synchronize with circuit connect
- */
- epicsTimeGetCurrent ( & begin );
- epicsMutexMustLock ( pClient->mutex );
- do {
- epicsMutexUnlock ( pClient->mutex );
- epicsEventWaitWithTimeout (
- pClient->stateChangeNotify,
- LOG_SERVER_CREATE_CONNECT_SYNC_TIMEOUT / 10.0 );
- epicsTimeGetCurrent ( & current );
- diff = epicsTimeDiffInSeconds ( & current, & begin );
- epicsMutexMustLock ( pClient->mutex );
- }
- while ( ! pClient->connected && diff < LOG_SERVER_CREATE_CONNECT_SYNC_TIMEOUT );
- epicsMutexUnlock ( pClient->mutex );
-
- if ( ! pClient->connected ) {
- fprintf (stderr, "log client create: timed out synchronizing with circuit connect to \"%s\" after %.1f seconds\n",
- pClient->name, LOG_SERVER_CREATE_CONNECT_SYNC_TIMEOUT );
- }
-
- errlogAddListener ( logClientSendMessage, (void *) pClient );
-
return (void *) pClient;
}
@@ -568,14 +542,21 @@ void epicsShareAPI logClientShow (logClientId id, unsigned level)
printf ("log client: disconnected from log server at \"%s\"\n", pClient->name);
}
- if (level>1) {
- printf ("log client: sock=%s, connect cycles = %u\n",
+ if (logClientPrefix) {
+ printf ("log client: prefix is \"%s\"\n", logClientPrefix);
+ }
+
+ if (level>0) {
+ printf ("log client: sock %s, connect cycles = %u\n",
pClient->sock==INVALID_SOCKET?"INVALID":"OK",
pClient->connectCount);
}
-
- if (logClientPrefix) {
- printf ("log client: prefix is \"%s\"\n", logClientPrefix);
+ if (level>1) {
+ printf ("log client: %u bytes in buffer\n", pClient->nextMsgIndex);
+ if (pClient->nextMsgIndex)
+ printf("-------------------------\n"
+ "%.*s-------------------------\n",
+ (int)(pClient->nextMsgIndex), pClient->msgBuf);
}
}
- Replies:
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 mdavidsaver via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Andrew Johnson via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Andrew Johnson via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 mdavidsaver via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 mdavidsaver via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 mdavidsaver via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 mdavidsaver via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 mdavidsaver via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 mdavidsaver via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 mdavidsaver via Core-talk
- [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 noreply--- via Core-talk
- Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Andrew Johnson via Core-talk
- Navigate by Date:
- Prev:
[Merge] ~dirk.zimoch/epics-base:iocLogClientFixes into epics-base:7.0 Dirk Zimoch via Core-talk
- Next:
Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Index:
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
<2019>
2020
2021
2022
2023
2024
- Navigate by Thread:
- Prev:
[Merge] ~epics-core/epics-base/+git/Com:make-simple into epics-base:7.0 noreply--- via Core-talk
- Next:
Re: [Merge] ~dirk.zimoch/epics-base:iocLogClientFixesTry2 into epics-base:7.0 Dirk Zimoch via Core-talk
- Index:
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
<2019>
2020
2021
2022
2023
2024
|