/*
* file: utfcode.c
*
* (c) Peter Kleiweg 2000
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2,
* or (at your option) any later version.
*/
#define UTFcodeVERSION "1.0"
#ifdef __MSDOS__
#ifndef __COMPACT__
#error Memory model COMPACT required
#endif /* __COMPACT__ */
#include
#endif /* __MSDOS__ */
#include
#include
#include
#include
int
obits = 0,
utf7 = 1,
utf8 = 1,
nr;
unsigned
octal;
long unsigned
*lu;
char
*programname,
*no_mem_buffer,
out_of_memory [] = "Out of memory";
void
utf_7 (void),
utf_8 (void),
outbyte (unsigned i),
outoct (void),
get_programname (char const *argv0),
errit (char const *format, ...),
syntax (void),
*s_malloc (size_t size),
*s_realloc (void *block, size_t size);
char
*s_strdup (char const *s);
int main (int argc, char *argv [])
{
int
i;
no_mem_buffer = (char *) malloc (1024);
get_programname (argv [0]);
while (argc > 1) {
if (! strcmp (argv [1], "-7")) {
utf7 = 1;
utf8 = 0;
} else if (! strcmp (argv [1], "-8")) {
utf7 = 0;
utf8 = 1;
} else
break;
argv++;
argc--;
}
if (argc == 1)
syntax ();
nr = argc - 1;
lu = (long unsigned *) s_malloc (nr * sizeof (long unsigned));
for (i = 0; i < nr; i++) {
if ((argv [i + 1][0] == 'U' || argv [i + 1][0] == 'u') && argv [i + 1][1] == '+') {
argv [i + 1][0] = '0';
argv [i + 1][1] = 'x';
}
lu [i] = strtoul (argv [i + 1], NULL, 0);
}
if (utf7)
utf_7 ();
if (utf8)
utf_8 ();
return 0;
}
void utf_7 ()
{
int
i;
fputc ('+', stdout);
for (i = 0; i < nr; i++)
if (lu [i] < 0x10000) {
outbyte (lu [i] >> 8);
outbyte (lu [i] & 0xFF);
} else
errit ("Too large for UTF-7: 0x%lX", lu [i]);
if (obits)
outoct ();
fputs ("-\n", stdout);
}
void outbyte (unsigned u)
{
switch (obits) {
case 0:
octal = (u >> 2);
outoct ();
octal = ((u & 0x03) << 4);
obits = 2;
break;
case 2:
octal |= (u >> 4);
outoct ();
octal = ((u & 0x0F) << 2);
obits = 4;
break;
case 4:
octal |= (u >> 6);
outoct ();
octal = (u & 0x3F);
outoct ();
obits = 0;
break;
}
}
void outoct ()
{
char
c;
if (octal < 26)
c = octal + 'A';
else if (octal < 52)
c = octal - 26 + 'a';
else if (octal < 62)
c = octal - 52 + '0';
else if (octal == 62)
c = '+';
else
c = '/';
fputc (c, stdout);
}
void utf_8 ()
{
int
i;
for (i = 0; i < nr; i++) {
/* 1 byte */
if (lu [i] < 0x80)
fputc (lu [i], stdout);
/* 2 bytes */
else if (lu [i] < 0x800) {
fputc (0xC0 | (lu [i] >> 6), stdout);
fputc (0x80 | (lu [i] & 0x3F), stdout);
}
/* 3 bytes */
else if (lu [i] < 0x10000) {
fputc (0xE0 | (lu [i] >> 12), stdout);
fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
fputc (0x80 | (lu [i] & 0x3F), stdout);
}
/* 4 bytes */
else if (lu [i] < 0x200000) {
fputc (0xF0 | (lu [i] >> 18), stdout);
fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
fputc (0x80 | (lu [i] & 0x3F), stdout);
}
/* 5 bytes */
else if (lu [i] < 0x4000000) {
fputc (0xF8 | (lu [i] >> 24), stdout);
fputc (0x80 | ((lu [i] >> 18) & 0x3F), stdout);
fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
fputc (0x80 | (lu [i] & 0x3F), stdout);
}
/* 6 bytes */
else if (lu [i] < 0x80000000) {
fputc (0xFC | (lu [i] >> 30), stdout);
fputc (0x80 | ((lu [i] >> 24) & 0x3F), stdout);
fputc (0x80 | ((lu [i] >> 18) & 0x3F), stdout);
fputc (0x80 | ((lu [i] >> 12) & 0x3F), stdout);
fputc (0x80 | ((lu [i] >> 6) & 0x3F), stdout);
fputc (0x80 | (lu [i] & 0x3F), stdout);
} else
errit ("Too large for UTF-8: 0x%lX", lu [i]);
}
fputc ('\n', stdout);
}
void errit (char const *format, ...)
{
va_list
list;
fprintf (stderr, "\nError %s: ", programname);
va_start (list, format);
vfprintf (stderr, format, list);
fprintf (stderr, "\n\n");
exit (1);
}
void get_programname (char const *argv0)
{
#ifdef __MSDOS__
char
name [MAXFILE];
fnsplit (argv0, NULL, NULL, name, NULL);
programname = strdup (name);
#else /* unix */
char
*p;
p = strrchr (argv0, '/');
if (p)
programname = strdup (p + 1);
else
programname = strdup (argv0);
#endif
}
void *s_malloc (size_t size)
{
void
*p;
p = malloc (size);
if (! p) {
free (no_mem_buffer);
errit (out_of_memory);
}
return p;
}
void *s_realloc (void *block, size_t size)
{
void
*p;
p = realloc (block, size);
if (! p) {
free (no_mem_buffer);
errit (out_of_memory);
}
return p;
}
char *s_strdup (char const *s)
{
char
*s1;
if (s) {
s1 = (char *) s_malloc (strlen (s) + 1);
strcpy (s1, s);
} else {
s1 = (char *) s_malloc (1);
s1 [0] = '\0';
}
return s1;
}
void syntax ()
{
fprintf (
stderr,
"\n"
"This is utfcode, version " UTFcodeVERSION "\n"
"\n"
"Usage: %s [-7|-8] [number...]\n"
"\n"
" -7: utf-7 only\n"
" -8: utf-8 only\n"
"\n",
programname
);
exit (1);
}