Hello Chen,
I created entry 300 in the Mantis bug
tracking system. Feel; free to add any comments there that you might have.
I don’t have a stack trace so I am
attempting to infer the cause based on quite a few different paths through the
code to this particular assert failure. After some analysis I can identify two
possible causes.
1) A string, a scalar DBR_STRING of type dbr_string_t, that somehow isn’t nill terminated is passing through the database
to the CA server.
2) The server is printing a diagnostic message and a record name, as
obtained with “RECORD_NAME ( paddr )” form the database API, is unusually
long (perhaps it is corrupt or alternatively it isn’t nill terminated).
I can easily detect (1) and fix it in the
scalar case. Thanks to C99 we also now have functions like vsnprintf and I can easily
make the code robust against (2).
I committed patches to R3.14 for both (1)
and (2).
It would be very interesting to see if
this fix resolves your problem. To test you have two options.
1) Attempt to patch R3.14.9 with the diffs provided below (that might
be somewhat susceptible to human error)
2) Wait for the next CVS snapshot to come around for the EPICS R3.14
branch, and fetch a complete copy of the latest R3.14.10 efforts (only for
testing purposes).
Thanks for any help that you can provide
with testing.
Jeff Hill
Index: camessage.c
===================================================================
RCS file:
/net/phoebus/epicsmgr/cvsroot/epics/base/src/rsrv/camessage.c,v
retrieving revision
1.105.2.10
diff -c -b -r1.105.2.10
camessage.c
*** camessage.c 17
Aug 2007 22:31:10 -0000 1.105.2.10
--- camessage.c 7
Sep 2007 17:03:09 -0000
***************
*** 24,29 ****
--- 24,30 ----
#include
"osiSock.h"
#include
"osiPoolStatus.h"
#include
"epicsEvent.h"
+ #include
"epicsStdio.h"
#include
"epicsThread.h"
#include
"epicsMutex.h"
#include
"epicsTime.h"
***************
*** 134,139 ****
--- 135,141 ----
va_list
args
)
{
+
static const ca_uint32_t maxDiagLen = 512;
struct channel_in_use *pciu;
caHdr
*pReqOut;
char
*pMsgString;
***************
*** 174,180 ****
* allocate plenty of space for a sprintf() buffer
*/
localStatus = cas_copy_in_header ( client,
!
CA_PROTO_ERROR, 512, 0, 0, cid, status,
( void * ) &pReqOut );
if ( localStatus != ECA_NORMAL ) {
errlogPrintf ( "caserver: Unable to deliver err msg \"%s\" to
client because \"%s\"\n",
--- 176,182 ----
* allocate plenty of space for a sprintf() buffer
*/
localStatus = cas_copy_in_header ( client,
!
CA_PROTO_ERROR, maxDiagLen, 0, 0, cid, status,
( void * ) &pReqOut );
if ( localStatus != ECA_NORMAL ) {
errlogPrintf ( "caserver: Unable to deliver err msg \"%s\" to
client because \"%s\"\n",
***************
*** 215,223 ****
/*
* add their context string into the protocol
*/
!
status = vsprintf ( pMsgString, pformat, args );
! if
( status >= 0 ) {
!
size += ( ( ca_uint32_t ) status ) + 1u;
}
cas_commit_msg ( client, size );
}
--- 217,236 ----
/*
* add their context string into the protocol
*/
!
localStatus = epicsVsnprintf ( pMsgString, maxDiagLen, pformat, args );
! if
( localStatus >= 1 ) {
!
unsigned diagLen = ( unsigned ) localStatus;
!
if ( diagLen < maxDiagLen ) {
!
size += (ca_uint32_t) (diagLen + 1u);
!
}
!
else {
!
errlogPrintf (
! "caserver:
vsend_err: epicsVsnprintf detected "
!
"error message truncation, pFormat = \"%s\"\n",
!
pformat );
!
size += maxDiagLen;
!
pMsgString [ maxDiagLen - 1 ] = '\0';
!
}
}
cas_commit_msg ( client, size );
}
***************
*** 495,501 ****
struct channel_in_use *pciu = pevext->pciu;
const int readAccess = asCheckGet ( pciu->asClientPVT );
int status;
-
int strcnt;
int v41;
SEND_LOCK ( pClient );
--- 508,513 ----
***************
*** 575,581 ****
}
}
else {
!
ca_uint32_t msgSize = pevext->size;
int cacStatus = caNetConvert (
pevext->msg.m_dataType, pPayload, pPayload,
TRUE /* host -> net format */, pevext->msg.m_count );
--- 587,593 ----
}
}
else {
!
ca_uint32_t payloadSize = pevext->size;
int cacStatus = caNetConvert (
pevext->msg.m_dataType, pPayload, pPayload,
TRUE /* host -> net format */, pevext->msg.m_count );
***************
*** 586,601 ****
*/
if ( pevext->msg.m_dataType == DBR_STRING
&& pevext->msg.m_count == 1 ) {
!
/* add 1 so that the string terminator will be shipped */
!
strcnt = strlen ( (char *) pPayload ) + 1;
!
msgSize = strcnt;
}
}
else {
!
memset ( pPayload, 0, msgSize );
cas_set_header_cid ( pClient, cacStatus );
}
!
cas_commit_msg ( pClient, msgSize );
}
/*
--- 598,622 ----
*/
if ( pevext->msg.m_dataType == DBR_STRING
&& pevext->msg.m_count == 1 ) {
!
char * pStr = (char *) pPayload;
!
size_t strcnt = strlen ( pStr );
!
if ( strcnt < payloadSize ) {
!
payloadSize = ( ca_uint32_t ) ( strcnt + 1u );
!
}
!
else {
!
pStr[payloadSize-1] = '\0';
!
errlogPrintf (
!
"caserver: read_reply: detected DBR_STRING w/o nill termination "
!
"in response from db_get_field, pPayload = \"%s\"\n",
!
pStr );
!
}
}
}
else {
!
memset ( pPayload, 0, payloadSize );
cas_set_header_cid ( pClient, cacStatus );
}
!
cas_commit_msg ( pClient, payloadSize );
}
/*
***************
*** 620,626 ****
ca_uint32_t payloadSize;
void *pPayload;
int status;
-
int strcnt;
int v41;
if
( ! pciu ) {
--- 641,646 ----
***************
*** 686,698 ****
* boundary
*/
if ( mp->m_dataType == DBR_STRING && mp->m_count == 1 ) {
!
/* add 1 so that the string terminator will be shipped */
!
strcnt = strlen ( (char *) pPayload ) + 1;
!
cas_commit_msg ( pClient, strcnt );
}
else {
!
cas_commit_msg ( pClient, payloadSize );
}
SEND_UNLOCK ( pClient );
--- 706,725 ----
* boundary
*/
if ( mp->m_dataType == DBR_STRING && mp->m_count == 1 ) {
!
char * pStr = (char *) pPayload;
!
size_t strcnt = strlen ( pStr );
!
if ( strcnt < payloadSize ) {
!
payloadSize = ( ca_uint32_t ) ( strcnt + 1u );
}
else {
!
pStr[payloadSize-1] = '\0';
!
errlogPrintf (
!
"caserver: read_action: detected DBR_STRING w/o nill termination "
!
"in response from db_get_field, pPayload = \"%s\"\n",
!
pStr );
!
}
}
+
cas_commit_msg ( pClient, payloadSize );
SEND_UNLOCK ( pClient );
Hi,
I've written an ioc (using TekVISA from Tektronix on a DPO7054 Oscilloscope,
Windows XP) and it starts okay at first. But when it runs for like several
minutes, I got an error like:
==============================================================
A call to "assert (size <= ntohs ( pMsg->m_postsize ))" failed
in ..\caserverio.
c line 344.
EPICS Release EPICS R3.14.5 $R3-14-5$ $2004/02/04 20:45:33$.
Current time Fri Sep 07 2007 09:49:29.518463915 .
Please E-mail this message to the author or to [email protected]
Calling epicsThreadSuspendSelf()
==============================================================
then it stunks. I suppose that the cas_commit_msg function checks the
pMsg->m_postsize and finds out it's larger than size. But I don't know how
these variables come from.
I'm sorry I'm just new to EPICS and have few experiences about C programming.
Could any of you give me a clue that what's happenning now? Any input is
welcome.
--
Best Regards
Chen
----------------------------------------------------------------
Zhi-chu Chen |
Shanghai Synchrotron Radiation Facility
No. 2019 | Jialuo Rd. | Jiading | Shanghai | P.R. China
tel:
086 21 5955 3405 | zhichu.chen.googlepages.com
| www.sinap.ac.cn
----------------------------------------------------------------