Hej,
- not all systems support locale,
- not all systems have the locale configured the same way as the data base
- isprint(0x80..0xFF) is not portable in all platforms
- characters >=0x80 may be UTF-8 or 8895 or anything else,
so escape them by default on all platforms
- Add the option -8 to suppress the escaping, ("8 bit clean")
- Tested under Cygwin, Mingw and Mac OS X
This is what I can offer, based on 3.14.12.3
If someone integrates it, I will be happy, otherwise I can make
a bzr pull request in a couple of days, weeks or so
/Torsten
diff --git a/src/catools/caget.c b/src/catools/caget.c
index a2b0e08..c304552 100644
--- a/src/catools/caget.c
+++ b/src/catools/caget.c
@@ -82,6 +82,7 @@ static void usage (void)
" Default: Print all values\n"
" -# <count>: Print first <count> elements of an array\n"
" -S: Print array of char as a string (long string)\n"
+ " -8: Print strings 8 bit clean, do not escape chars >= 0x80\n"
"Floating point type format:\n"
" Default: Use %%g format\n"
" -e <nr>: Use %%e format, with a precision of <nr> digits\n"
@@ -389,7 +390,7 @@ int main (int argc, char *argv[])
LINE_BUFFER(stdout); /* Configure stdout buffering */
- while ((opt = getopt(argc, argv, ":taicnhsSe:f:g:l:#:d:0:w:p:F:")) != -1) {
+ while ((opt = getopt(argc, argv, ":taicnhsS8e:f:g:l:#:d:0:w:p:F:")) != -1) {
switch (opt) {
case 'h': /* Print usage */
usage();
@@ -455,6 +456,9 @@ int main (int argc, char *argv[])
case 's': /* Select string dbr for floating type data */
floatAsString = 1;
break;
+ case '8': /* Do not escape chars >= 0x80 */
+ epicsSetPrint8BitUnescaped(1);
+ break;
case 'S': /* Treat char array as (long) string */
charArrAsStr = 1;
break;
diff --git a/src/libCom/misc/epicsString.c b/src/libCom/misc/epicsString.c
index 907bba9..31091b1 100644
--- a/src/libCom/misc/epicsString.c
+++ b/src/libCom/misc/epicsString.c
@@ -28,6 +28,7 @@
#include "cantProceed.h"
#include "epicsString.h"
+static int print8BitUnescaped = 0;
/* Deprecated, use epicsStrnRawFromEscaped() instead */
int dbTranslateEscape(char *to, const char *from)
{
@@ -35,6 +36,19 @@ int dbTranslateEscape(char *to, const char *from)
return epicsStrnRawFromEscaped(to, big_enough, from, big_enough);
}
+epicsShareFunc void epicsSetPrint8BitUnescaped(int epicsPrint8BitUnescaped)
+{
+ print8BitUnescaped = epicsPrint8BitUnescaped;
+}
+
+epicsShareFunc int epicsisprint(int c)
+{
+ if (c >= 0x80) return print8BitUnescaped;
+ if (c < 0) return print8BitUnescaped;
+
+ return isprint(c);
+}
+
int epicsStrnRawFromEscaped(char *to, size_t outsize, const char *from,
size_t inlen)
{
@@ -126,7 +140,7 @@ int epicsStrnEscapedFromRaw(char *outbuf, size_t outsize, const char *inbuf,
case '\'': len = epicsSnprintf(outpos, maxout, "\\'"); break;
case '\"': len = epicsSnprintf(outpos, maxout, "\\\""); break;
default:
- if (isprint((int)c))
+ if (epicsisprint((int)c))
len = epicsSnprintf(outpos, maxout, "%c", c);
else
len = epicsSnprintf(outpos, maxout, "\\%03o",
@@ -161,7 +175,7 @@ size_t epicsStrnEscapedFromRawSize(const char *inbuf, size_t inlen)
nout++;
break;
default:
- if (!isprint((int)c))
+ if (!epicsisprint((int)c))
nout += 3;
}
}
@@ -225,7 +239,7 @@ int epicsStrPrintEscaped(FILE *fp, const char *s, size_t len)
case '\'': nout += fprintf(fp, "\\'"); break;
case '\"': nout += fprintf(fp, "\\\""); break;
default:
- if (isprint((int)c))
+ if (epicsisprint((int)c))
nout += fprintf(fp, "%c", c);
else
nout += fprintf(fp, "\\%03o", (unsigned char)c);
diff --git a/src/libCom/misc/epicsString.h b/src/libCom/misc/epicsString.h
index 658f333..469a13d 100644
--- a/src/libCom/misc/epicsString.h
+++ b/src/libCom/misc/epicsString.h
@@ -24,6 +24,8 @@
extern "C" {
#endif
+epicsShareFunc int epicsisprint(int c);
+epicsShareFunc void epicsSetPrint8BitUnescaped(int epicsPrint8BitUnescaped);
epicsShareFunc int epicsStrnRawFromEscaped(char *outbuf, size_t outsize,
const char *inbuf, size_t inlen);
epicsShareFunc int epicsStrnEscapedFromRaw(char *outbuf, size_t outsize,
diff --git a/startup/EpicsHostArch.pl b/startup/EpicsHostArch.pl
index 3d75200..8a6024b 100755
--- a/startup/EpicsHostArch.pl
+++ b/startup/EpicsHostArch.pl
@@ -32,6 +32,7 @@ sub GetEpicsHostArch { # no args
} elsif ($arch =~ m/MSWin32-x86/) { return "win32-x86";
} elsif ($arch =~ m/MSWin32-x64/) { return "windows-x64";
} elsif ($arch =~ m/cygwin/) { return "cygwin-x86";
+ } elsif ($arch =~ m/msys/) { return "win32-x86-mingw";
} elsif ($arch =~ m/darwin/) {
my($kernel, $hostname, $release, $version, $cpu) = POSIX::uname();
if ($cpu =~ m/Power Macintosh/) { return "darwin-ppc"; }
On 05/11/14 10:25, Goetz Pfeiffer wrote:
Hello Everybody,
when using catools with strings that contain non ASCII characters, these
characters are always printed or read as octal constants, no matter what the
locale settings are.
Note: In the following text all command examples or outputs on the
console are
indented by two characters and preceded by a double colon (::), this is
taken
from reStructuredText format ( http://docutils.sourceforge.net/rst.html ).
In the following example we want to use a character of the ISO-8859-1
character
set. Why not simply use unicode UTF-8 ? The reason is that display managers
like DM2K and EDM do not support unicode. If we want to display non-ASCII
characters in string fields of records with these display managers we
must use
a character set like ISO-8859-1 (also known as Latin 1).
Here is an example on a linux host with unicode UTF-8:
First we write the degree character '°' in ISO-8859-1 encoding to the
EGU field of a record::
> echo "°" | iconv -f UTF-8 -t ISO_8859-1 | xargs caput
U49ID8R:AmsTempT1.EGU
When we now read the value::
> caget U49ID8R:AmsTempT1.EGU
we get::
U49ID8R:AmsTempT1.EGU \260
The '°' character is printed as an octal number "260". This is okay
since with
UTF-8 on our host system we couldn't display an ISO-8859-1 character.
This is our locale::
> locale
LANG=en_US.UTF-8
LC_CTYPE="en_US.UTF-8"
LC_NUMERIC="en_US.UTF-8"
LC_TIME="en_US.UTF-8"
LC_COLLATE="en_US.UTF-8"
LC_MONETARY="en_US.UTF-8"
LC_MESSAGES="en_US.UTF-8"
LC_PAPER="en_US.UTF-8"
LC_NAME="en_US.UTF-8"
LC_ADDRESS="en_US.UTF-8"
LC_TELEPHONE="en_US.UTF-8"
LC_MEASUREMENT="en_US.UTF-8"
LC_IDENTIFICATION="en_US.UTF-8"
LC_ALL=
Now we change the locale to ISO-8859-1::
> export LC_ALL=de_DE.iso88591
> locale
LANG=en_US.UTF-8
LC_CTYPE="de_DE.iso88591"
LC_NUMERIC="de_DE.iso88591"
LC_TIME="de_DE.iso88591"
LC_COLLATE="de_DE.iso88591"
LC_MONETARY="de_DE.iso88591"
LC_MESSAGES="de_DE.iso88591"
LC_PAPER="de_DE.iso88591"
LC_NAME="de_DE.iso88591"
LC_ADDRESS="de_DE.iso88591"
LC_TELEPHONE="de_DE.iso88591"
LC_MEASUREMENT="de_DE.iso88591"
LC_IDENTIFICATION="de_DE.iso88591"
LC_ALL=de_DE.iso88591
Now we call caget again::
U49ID8R:AmsTempT1.EGU \260
The character is still printed as an octal value although our locale
settings
(LC_ALL) define that this is a printable character. caget uses function
epicsStrnEscapedFromRaw() from libCom in EPICS base to convert a string to a
printable form. This function calls isprint() to determine which
characters are
printable. The way caget is written means that locale settings from the
environment are ignored.
Using locale settings from the environment in C is simple. The C program
must
have this include::
#include <locale.h>
And it has to call setlocale like this::
setlocale(LC_ALL, "");
Here is, as an example, my patch of caget.c in Epics base:
---------------------------------
--- caget.c.old 2014-11-05 09:31:48.010589013 +0100
+++ caget.c 2014-11-05 09:43:28.611042679 +0100
@@ -28,6 +28,7 @@
#include <stdio.h>
#include <string.h>
+#include <locale.h>
#include <epicsStdlib.h>
#include <epicsString.h>
@@ -59,6 +60,10 @@
" -w <sec>: Wait time, specifies CA timeout, default is %f
second(s)\n"
" -c: Asynchronous get (use ca_get_callback and wait for
completion)\n"
" -p <prio>: CA priority (0-%u, default 0=lowest)\n"
+ "Locale:\n"
+ " -L: use locale according to environment variables in order to\n"
+ " determine what characters are printable. Non printable
characters\n"
+ " are shown as 3 digit octal numbers preceded by a backslash\n"
"Format options:\n"
" Default output format is \"name value\"\n"
" -t: Terse mode - print only value, without name\n"
@@ -389,11 +394,14 @@
LINE_BUFFER(stdout); /* Configure stdout buffering */
- while ((opt = getopt(argc, argv, ":taicnhsSe:f:g:l:#:d:0:w:p:F:"))
!= -1) {
+ while ((opt = getopt(argc, argv, ":taicnhLsSe:f:g:l:#:d:0:w:p:F:"))
!= -1) {
switch (opt) {
case 'h': /* Print usage */
usage();
return 0;
+ case 'L': /* use environment locale settings */
+ setlocale(LC_ALL, "");
+ break;
case 't': /* Terse output mode */
complainIfNotPlainAndSet(&format, terse);
break;
---------------------------------
With these changes the new option "-L" causes caget to use locale
settings from
the environment. Here is an example how to use this::
> export LC_ALL=de_DE.iso88591
> caget -L U49ID8R:AmsTempT1.EGU
U49ID8R:AmsTempT1.EGU °
If the encoding of the terminal emulator (xterm, konsole etc.) is also
set to
ISO-8859-1 (Latin 1) the "°" character is now displayed correctly.
Maybe we could add support for locale settings from the environment to all
catools programs and possibly the IOC shell. I would propose an option "-L"
that enables this feature. What is your opinion ?
Greetings,
Goetz Pfeiffer
- References:
- catools (caget/caput/camonitor) and locale settings from the environment Goetz Pfeiffer
- Navigate by Date:
- Prev:
Re: Permissible in C to call CA I/O functions from callback? J. Lewis Muir
- Next:
Asyn and stringin records freddie.akeroyd
- Index:
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
<2014>
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
- Navigate by Thread:
- Prev:
Re: xsubpp moved under Sscientif Linux 7.0 Wang Xiaoqiang (PSI)
- Next:
Record processing during iocInit? Jörn Wüstenfeld
- Index:
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
<2014>
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
|