campo-sirio/pdf/pdcore/pc_chartabs.c
guy 3bf951f42c Patch level : 10.0
Files correlati     : pdflib
Ricompilazione Demo : [ ]
Commento            :
Aggiornata pdflib.dll alla versione 7.0.4


git-svn-id: svn://10.65.10.50/trunk@18580 c028cbd2-c16b-5b4b-a496-9718f37d4682
2009-03-23 08:55:58 +00:00

671 lines
16 KiB
C
Executable File

/*---------------------------------------------------------------------------*
| PDFlib - A library for generating PDF on the fly |
+---------------------------------------------------------------------------+
| Copyright (c) 1997-2006 Thomas Merz and PDFlib GmbH. All rights reserved. |
+---------------------------------------------------------------------------+
| |
| This software is subject to the PDFlib license. It is NOT in the |
| public domain. Extended versions and commercial licenses are |
| available, please check http://www.pdflib.com. |
| |
*---------------------------------------------------------------------------*/
/* $Id: pc_chartabs.c,v 1.3 2009-03-23 08:51:17 guy Exp $
*
* PDFlib routines for converting glyph or character names to Unicode
* and vice versa
*
*/
#define PC_CHARTABS_C
#include "pc_util.h"
#include "pc_chartabs.h"
#include "pc_ctype.h"
/* ---------------- general character search functions ------------------- */
/*
* Binary search for list of codes in a pdc_glyph_tab array sorted by glyphname
*/
int
pdc_glyphname2codelist(const char *glyphname, const pdc_glyph_tab *glyphtab,
int tabsize, pdc_ushort *codelist)
{
const char *s1, *s2;
int lo = 0;
int hi = glyphname ? tabsize : lo;
int i, j, cmp, nv = 0;
while (lo < hi)
{
i = (lo + hi) / 2;
s1 = glyphname;
s2 = glyphtab[i].name;
for (; *s1; ++s1, ++s2)
{
if (*s1 != *s2)
break;
}
cmp = (*s1 - *s2);
if (cmp == 0)
{
j = i;
for (; i >= 1; i--)
{
s1 = glyphname;
s2 = glyphtab[i-1].name;
for (; *s1; ++s1, ++s2)
{
if (*s1 != *s2)
break;
}
if (*s1 != *s2)
break;
}
for (; i < tabsize; i++)
{
if (i > j)
{
s1 = glyphname;
s2 = glyphtab[i].name;
for (; *s1; ++s1, ++s2)
{
if (*s1 != *s2)
break;
}
if (*s1 != *s2)
break;
}
codelist[nv] = glyphtab[i].code;
nv++;
}
return nv;
}
if (cmp < 0)
hi = i;
else
lo = i + 1;
}
return nv;
}
/*
* Binary search for code in a pdc_glyph_tab array sorted by glyphname
*/
int
pdc_glyphname2code(const char *glyphname, const pdc_glyph_tab *glyphtab,
int tabsize)
{
const char *s1, *s2;
int lo = 0;
int hi = glyphname ? tabsize : lo;
int i, cmp;
while (lo < hi)
{
i = (lo + hi) / 2;
s1 = glyphname;
s2 = glyphtab[i].name;
for (; *s1; ++s1, ++s2)
{
if (*s1 != *s2)
break;
}
cmp = (*s1 - *s2);
if (cmp == 0)
return (int) glyphtab[i].code;
if (cmp < 0)
hi = i;
else
lo = i + 1;
}
return -1;
}
/*
* Binary search for glyphname in a pdc_glyph_tab array sorted by code
*/
const char *
pdc_code2glyphname(pdc_ushort code, const pdc_glyph_tab *glyphtab, int tabsize)
{
int lo = 0;
int hi = tabsize;
while (lo < hi)
{
int i = (lo + hi) / 2;
if (code == glyphtab[i].code)
return glyphtab[i].name;
if (code < glyphtab[i].code)
hi = i;
else
lo = i + 1;
}
return NULL;
}
/*
* Binary search for list of codes in a pdc_code_map array sorted by source code
*/
int
pdc_code2codelist(pdc_core *pdc, pdc_ushort code,
const pdc_code_map *codemap, int tabsize,
pdc_ushort *codelist, int listsize)
{
int lo = 0;
int hi = tabsize;
int nv = 0;
while (lo < hi)
{
int i = (lo + hi) / 2;
if (codemap[i].src == code)
{
for (; i >= 1; i--)
{
if (codemap[i-1].src != code)
break;
}
for (; i < tabsize; i++)
{
if (codemap[i].src != code)
break;
if (nv >= listsize)
pdc_error(pdc, PDC_E_CONV_LIST_MEMOVERFLOW, 0, 0, 0, 0);
codelist[nv] = codemap[i].dst;
nv++;
}
return nv;
}
if (codemap[i].src > code)
hi = i;
else
lo = i + 1;
}
return nv;
}
/*
* Binary search for glyphname in a pdc_glyph_tab array sorted by glyphname
* to get the static pointer for the glyphname.
*/
const char *
pdc_glyphname2glyphname(const char *glyphname,
const pdc_glyph_tab *glyphtab, int tabsize)
{
const char *s1, *s2;
int lo = 0;
int hi = tabsize;
int cmp, i;
while (lo < hi)
{
i = (lo + hi) / 2;
s1 = glyphname;
s2 = glyphtab[i].name;
for (; *s1; ++s1, ++s2)
{
if (*s1 != *s2)
break;
}
cmp = (*s1 - *s2);
if (cmp == 0)
return glyphtab[i].name;
if (cmp < 0)
hi = i;
else
lo = i + 1;
}
return NULL;
}
/* ---------------- special character search functions ------------------- */
/*
* Returns the Unicode value of a glyph name in Adobe Glyph List 1.2'.
* If the name is not contained in AGL, -1 will be returned.
*/
int
pdc_adobe2unicode(const char *glyphname)
{
return pdc_glyphname2code(glyphname, tab_agl2uni,
(sizeof (tab_agl2uni)) / (sizeof (pdc_glyph_tab)));
}
/*
* Returns the name in AGL 1.2' or ZapfDingbats font,
* which corresponds to the supplied Unicode value.
* If the value doesn't have a corresponding glyph name,
* NULL will be returned.
* For control codes ".notdef" will be returned.
* But this is not compatibel with AGL 2.0!
*/
const char *
pdc_unicode2adobe(pdc_ushort uv)
{
const char *glyphname;
/* AGL 1.2' glyphname */
glyphname = pdc_code2glyphname(uv, tab_uni2agl,
(sizeof tab_uni2agl) / (sizeof (pdc_glyph_tab)));
if (glyphname != NULL)
return glyphname;
/* C0 and C1 control characters.
* They have never a graphical representation but are defined.
*/
if (uv < PDC_UNICODE_SPACE ||
(uv >= PDC_UNICODE_DELETE && uv < PDC_UNICODE_NBSP))
return glyph__notdef;
return NULL;
}
const char *
pdc_get_notdef_glyphname(void)
{
return (char *) glyph__notdef;
}
/*
* Returns the Unicode value of a ZapfDingbats glyph name.
* If the name is not contained in the ZapfDingbats list
* -1 will be returned.
*/
int
pdc_zadb2unicode(const char *glyphname)
{
return pdc_glyphname2code(glyphname, tab_zadb2uni,
(sizeof (tab_zadb2uni)) / (sizeof (pdc_glyph_tab)));
}
/*
* Returns the glyph name in the ZapfDingbats font which corresponds
* to the supplied Unicode value. If the value doesn't have a
* corresponding glyph name NULL will be returned.
*/
const char *
pdc_unicode2zadb(pdc_ushort uv)
{
return pdc_code2glyphname(uv, tab_uni2zadb,
(sizeof tab_uni2zadb) / (sizeof (pdc_glyph_tab)));
}
/*
* Returns the Unicode values of a glyph name in Adobe Glyph List 2.0
* which is not contained in AGL-1.2'.
*
* The function presupposes that uvlist is an array of PDC_MAX_UVLIST.
*
* Return value is the number of Unicodes.
*/
int
pdc_newadobe2unicodelist(const char *glyphname, pdc_ushort *uvlist)
{
return pdc_glyphname2codelist(glyphname, tab_diffagl2uni,
(sizeof tab_diffagl2uni) / (sizeof (pdc_glyph_tab)),
uvlist);
}
/*
* Returns the glyph name in Adobe Glyph List 2.0
* which is not contained in AGL-1.2' corresponding
* to the supplied Unicode value. Ambiguous Unicode
* values or glyph names are not supported!
* If the value doesn't have a corresponding glyph name
* NULL will be returned.
*/
const char *
pdc_unicode2newadobe(pdc_ushort uv)
{
return pdc_code2glyphname(uv, tab_uni2diffagl,
(sizeof tab_uni2diffagl) / (sizeof (pdc_glyph_tab)));
}
/*
* Returns the glyph name in Adobe Glyph List 2.0
* which is not contained in AGL-1.2' and which matches
* the supplied glyph name.
* If no match is found NULL will be returned.
*/
const char *
pdc_get_newadobe_glyphname(const char *glyphname)
{
return pdc_glyphname2glyphname(glyphname, tab_diffagl2uni,
(sizeof tab_diffagl2uni) / (sizeof (pdc_glyph_tab)));
}
/*
* Returns the alternative Unicode value of a double-mapped
* AGL-1.2 glyph name. If the name is not double-mapped,
* -1 will be returned.
*/
int
pdc_glyphname2altunicode(const char *glyphname)
{
return pdc_glyphname2code(glyphname, tab_double_mappping,
(sizeof (tab_double_mappping)) / (sizeof (pdc_glyph_tab)));
}
/*
* Returns true if a character name is contained in pc_standard_latin_charset.
* Otherwise false will be returned.
*/
pdc_bool
pdc_is_std_charname(const char *glyphname)
{
const char *s1, *s2;
int lo = 0;
int hi = ((sizeof pc_standard_latin_charset) / (sizeof (char *)));
int cmp, i;
if (glyphname)
{
while (lo < hi)
{
i = (lo + hi) / 2;
s1 = glyphname;
s2 = pc_standard_latin_charset[i];
for (; *s1; ++s1, ++s2)
{
if (*s1 != *s2)
break;
}
cmp = (*s1 - *s2);
if (cmp == 0)
return pdc_true;
if (cmp < 0)
hi = i;
else
lo = i + 1;
}
}
return pdc_false;
}
/* -------------- special character mapping for Type1 fonts --------------- */
/*
* Deletes a bit in a bit mask. The bit indicates that
* the respective glyph name of AGL 2.0 is not available
* in a PostScript font. The glyph name is used to avoid
* ambiguities (see comment in pc_chartabs.h)
*
*/
#define PDC_BIT_NBSP (1L<<0)
#define PDC_BIT_SHY (1L<<1)
#define PDC_BIT_MODMACRON (1L<<2)
#define PDC_BIT_CAPDELTA (1L<<3)
#define PDC_BIT_CAPOMEGA (1L<<4)
#define PDC_BIT_DIVSLASH (1L<<5)
#define PDC_BIT_BULLETOP (1L<<6)
#define PDC_BIT_SMALLMU (1L<<7)
void
pdc_delete_missingglyph_bit(pdc_ushort uv, pdc_ulong *bmask)
{
switch(uv)
{
case PDC_UNICODE_NBSP:
*bmask &= ~PDC_BIT_NBSP;
return;
case PDC_UNICODE_SHY:
*bmask &= ~PDC_BIT_SHY;
return;
case PDC_UNICODE_MODMACRON:
*bmask &= ~PDC_BIT_MODMACRON;
return;
case PDC_UNICODE_CAPDELTA:
*bmask &= ~PDC_BIT_CAPDELTA;
return;
case PDC_UNICODE_CAPOMEGA:
*bmask &= ~PDC_BIT_CAPOMEGA;
return;
case PDC_UNICODE_DIVSLASH:
*bmask &= ~PDC_BIT_DIVSLASH;
return;
case PDC_UNICODE_BULLETOP:
*bmask &= ~PDC_BIT_BULLETOP;
return;
case PDC_UNICODE_SMALLMU:
*bmask &= ~PDC_BIT_SMALLMU;
return;
default:
return;
}
}
/*
* Returnes an alternative Unicode value and/or glyph name for an
* AGL 2.0 glyph name which is not available in a PostScript font.
*
*/
pdc_ushort
pdc_get_alter_glyphname(pdc_ushort uv, pdc_ulong bmask, char **glyphname)
{
switch(uv)
{
case PDC_UNICODE_NBSP:
if (bmask & PDC_BIT_NBSP)
{
if (glyphname)
*glyphname = (char *) glyph_space;
return PDC_UNICODE_SPACE;
}
break;
case PDC_UNICODE_SHY:
if (bmask & PDC_BIT_SHY)
{
if (glyphname)
*glyphname = (char *) glyph_hyphen;
return PDC_UNICODE_HYPHEN;
}
break;
case PDC_UNICODE_MODMACRON:
if (bmask & PDC_BIT_MODMACRON)
{
if (glyphname)
*glyphname = (char *) glyph_macron;
return PDC_UNICODE_MACRON;
}
break;
case PDC_UNICODE_CAPDELTA:
if (bmask & PDC_BIT_CAPDELTA)
{
if (glyphname)
*glyphname = (char *) glyph_Delta;
return PDC_UNICODE_INCREMENT;
}
break;
case PDC_UNICODE_CAPOMEGA:
if (bmask & PDC_BIT_CAPOMEGA)
{
if (glyphname)
*glyphname = (char *) glyph_Omega;
return PDC_UNICODE_OHMSIGN;
}
break;
case PDC_UNICODE_DIVSLASH:
if (bmask & PDC_BIT_DIVSLASH)
{
if (glyphname)
*glyphname = (char *) glyph_fraction;
return PDC_UNICODE_FRACSLASH;
}
case PDC_UNICODE_BULLETOP:
if (bmask & PDC_BIT_BULLETOP)
{
if (glyphname)
*glyphname = (char *) glyph_periodcentered;
return PDC_UNICODE_MIDDLEDOT;
}
case PDC_UNICODE_SMALLMU:
if (bmask & PDC_BIT_SMALLMU)
{
if (glyphname)
*glyphname = (char *) glyph_mu;
return PDC_UNICODE_MICRO;
}
default:
if (glyphname)
{
if (*glyphname == NULL)
*glyphname = (char *) pdc_get_notdef_glyphname();
return 0;
}
}
return uv;
}
/*
* Returns the Unicode value for a given string Unicode expression:
*
* - Byte 1...255 -> U0001...U00FF
* - U+XXXXX
* - 0xXXXXX
* - HTML character reference without frame syntax &...;
*
* If no conversion is possible -1 will be returned.
*/
int
pdc_string2unicode(pdc_core *pdc, const char *text, int i_flags,
const pdc_keyconn *keyconn, pdc_bool verbose)
{
int iz = PDC_KEY_NOTFOUND, usv = -1;
pdc_bool seterr = pdc_false;
int flags = PDC_INT_UNSIGNED;
int i = 0;
(void) verbose;
/* single byte as Unicode value */
if (strlen(text) == 1)
{
char c = text[0];
usv = (pdc_byte) c;
}
else
{
/* keyword */
if (keyconn)
{
if (i_flags & PDC_INT_CASESENS)
iz = pdc_get_keycode(text, keyconn);
else
iz = pdc_get_keycode_ci(text, keyconn);
}
if (iz != PDC_KEY_NOTFOUND)
{
usv = iz;
}
else
{
/* Unicode value */
if (!pdc_strincmp(text, "U+", 2))
{
flags |= PDC_INT_HEXADEC;
i = 2;
}
if (!pdc_str2integer(&text[i], flags, &iz))
{
seterr = pdc_true;
}
else if (iz >= PDC_NUM_UNIVAL ||
(iz >= PDC_UNICODE_MINHIGHSUR &&
iz <= PDC_UNICODE_MAXLOWSUR))
{
seterr = pdc_true;
}
else
{
usv = iz;
}
}
}
if (seterr)
{
pdc_set_errmsg(pdc, PDC_E_CONV_ILLUTF32CHAR, &text[i], 0, 0, 0);
if (verbose)
pdc_error(pdc, -1, 0, 0, 0, 0);
}
return usv;
}
/*
* Returns true if Unicode character is a character relevant for line breaking
*
*/
pdc_bool
pdc_is_linebreaking_relchar(pdc_ushort uv)
{
switch (uv)
{
case PDC_UNICODE_HT:
case PDC_UNICODE_LF:
case PDC_UNICODE_VT:
case PDC_UNICODE_FF:
case PDC_UNICODE_CR:
case PDC_UNICODE_NEL:
case PDC_UNICODE_SHY:
case PDC_UNICODE_LS:
case PDC_UNICODE_PS:
return pdc_true;
}
return pdc_false;
}