campo-sirio/pdf/pdcore/pc_unicode.h
guy 3bf951f42c Patch level : 10.0
Files correlati     : pdflib
Ricompilazione Demo : [ ]
Commento            :
Aggiornata pdflib.dll alla versione 7.0.4


git-svn-id: svn://10.65.10.50/trunk@18580 c028cbd2-c16b-5b4b-a496-9718f37d4682
2009-03-23 08:55:58 +00:00

306 lines
10 KiB
C
Executable File

/*---------------------------------------------------------------------------*
| PDFlib - A library for generating PDF on the fly |
+---------------------------------------------------------------------------+
| Copyright (c) 1997-2006 Thomas Merz and PDFlib GmbH. All rights reserved. |
+---------------------------------------------------------------------------+
| |
| This software is subject to the PDFlib license. It is NOT in the |
| public domain. Extended versions and commercial licenses are |
| available, please check http://www.pdflib.com. |
| |
*---------------------------------------------------------------------------*/
/* $Id: pc_unicode.h,v 1.4 2009-03-23 08:51:17 guy Exp $
*
* Unicode glyph name conversion routines
*
*/
#ifndef PC_UNICODE_H
#define PC_UNICODE_H
#define PDC_NUM_BMPVAL 0x10000
#define PDC_NUM_UNIVAL 0x110000
#define PDC_MAX_UNIVAL 0x10FFFF
#define PDC_UNICODE_HT 0x0009
#define PDC_UNICODE_LF 0x000A
#define PDC_UNICODE_VT 0x000B
#define PDC_UNICODE_FF 0x000C
#define PDC_UNICODE_CR 0x000D
#define PDC_UNICODE_ETB 0x0017
#define PDC_UNICODE_ESC 0x001B
#define PDC_UNICODE_SPACE 0x0020
#define PDC_UNICODE_QUOTMARK 0x0022
#define PDC_UNICODE_AMPERSAND 0x0026
#define PDC_UNICODE_APOSTROPHE 0x0027
#define PDC_UNICODE_HYPHEN 0x002D
#define PDC_UNICODE_PERIOD 0x002E
#define PDC_UNICODE_SEMICOLON 0x003B
#define PDC_UNICODE_LESS_THAN 0x003C
#define PDC_UNICODE_GREATER_THAN 0x003E
#define PDC_UNICODE_BACKSLASH 0x005C
#define PDC_UNICODE_LEFT_CURLY 0x007B
#define PDC_UNICODE_RIGHT_CURLY 0x007D
#define PDC_UNICODE_DELETE 0x007F
#define PDC_UNICODE_NEL 0x0085
#define PDC_UNICODE_NBSP 0x00A0
#define PDC_UNICODE_SHY 0x00AD
#define PDC_UNICODE_MACRON 0x00AF
#define PDC_UNICODE_MICRO 0x00B5
#define PDC_UNICODE_MIDDLEDOT 0x00B7
#define PDC_UNICODE_MODMACRON 0x02C9
#define PDC_UNICODE_CAPDELTA 0x0394
#define PDC_UNICODE_CAPOMEGA 0x03A9
#define PDC_UNICODE_SMALLMU 0x03BC
#define PDC_UNICODE_LS 0x2028
#define PDC_UNICODE_PS 0x2029
#define PDC_UNICODE_NNBSP 0x202F
#define PDC_UNICODE_FRACSLASH 0x2044
#define PDC_UNICODE_MMSPACE 0x205F
#define PDC_UNICODE_EURO 0x20AC
#define PDC_UNICODE_OHMSIGN 0x2126
#define PDC_UNICODE_INCREMENT 0x2206
#define PDC_UNICODE_DIVSLASH 0x2215
#define PDC_UNICODE_BULLETOP 0x2219
#define PDC_UNICODE_IDEOSPACE 0x3000
/* maximal value of Latin-1 characters */
#define PDC_UNICODE_MAXASCII 0x007F
#define PDC_UNICODE_MAXLATIN1 0x00FF
/* maximal resp. single value of Japanese HW characters */
#define PDC_UNICODE_MAXHW 0x007E
#define PDC_UNICODE_SINGHW 0x00A5
/* Unicode borders of fullwidth forms of ASCII characters */
#define PDC_UNICODE_MINFWASCII 0xFF00
#define PDC_UNICODE_MAXFWASCII 0xFF5E
#define PDC_UNICODE_DIFFWASCII 0xFEE0
/* PDC_UNICODE_MINFASCII - PDC_UNICODE_SPACE */
/* Unicode borders of fullwidth forms of Symbol characters */
#define PDC_UNICODE_MINFWSYMBOL 0xFFE0
#define PDC_UNICODE_MAXFWSYMBOL 0xFFE6
/* Unicode borders of Private Use Area (PUA) */
#define PDC_UNICODE_MINPUA 0xE000
#define PDC_UNICODE_MAXPUA 0xF8FF
/* Begin of PDFlib PUA */
#define PDC_UNICODE_PDFPUA 0xF200
/* Unicode borders of Unicode Corporate Use Subarea as used by Adobe Systems */
#define PDC_UNICODE_MINCUS 0xF600
#define PDC_UNICODE_MAXCUS 0xF8FF
/* Unicode Surrogate ranges */
#define PDC_UNICODE_MINHIGHSUR 0xD800
#define PDC_UNICODE_MAXHIGHSUR 0xDBFF
#define PDC_UNICODE_MINLOWSUR 0xDC00
#define PDC_UNICODE_MAXLOWSUR 0xDFFF
/* Unicode borders of higher Unicode spaces */
#define PDC_UNICODE_MINSPACE 0x2000
#define PDC_UNICODE_MAXSPACE 0x200B
/* Unicode borders of CJK compatibility forms and small form variants */
#define PDC_UNICODE_MINCJKFORMS 0xFE30
#define PDC_UNICODE_MIDCJKFORMS 0xFE48
#define PDC_UNICODE_MAXCJKFORMS 0xFE6F
/* replacement character */
#define PDC_UNICODE_REPLCHAR 0xFFFD
/* special character for CRLF */
#define PDF_UNICODE_CRLF 0xFDD0
/* not a character */
#define PDC_UNICODE_NOTCHAR 0xFFFF
/* Latin and Armenian ligatures */
#define PDC_UNICODE_CAPLIGATIJ 0x0132
#define PDC_UNICODE_SMALLLIGATIJ 0x0133
#define PDC_UNICODE_MINLIGAT 0xFB00
#define PDC_UNICODE_MAXLIGAT 0xFB17
/* The Unicode byte order mark (BOM) byte parts */
#define PDC_UNICODE_BOM 0xFEFF
#define PDF_BOM0 0xFE
#define PDF_BOM1 0xFF
#define PDF_BOM2 0xEF
#define PDF_BOM3 0xBB
#define PDF_BOM4 0xBF
/*
* check whether the string is UTF-16 unicode by looking for the BOM
* in big-endian or little-endian format resp.
* s must not be NULL.
*/
#define pdc_is_utf16be_unicode(s) \
(((pdc_byte *)(s))[0] == PDF_BOM0 && \
((pdc_byte *)(s))[1] == PDF_BOM1)
#define pdc_is_utf16le_unicode(s) \
(((pdc_byte *)(s))[0] == PDF_BOM1 && \
((pdc_byte *)(s))[1] == PDF_BOM0)
/*
* check whether the string is UTF-32 unicode by looking for the BOM
* in big-endian or little-endian format resp.
* s must not be NULL.
*/
#define pdc_is_utf32be_unicode(s) \
(((pdc_byte *)(s))[0] == 0x00 && \
((pdc_byte *)(s))[1] == 0x00 && \
((pdc_byte *)(s))[2] == PDF_BOM0 && \
((pdc_byte *)(s))[3] == PDF_BOM1)
#define pdc_is_utf32le_unicode(s) \
(((pdc_byte *)(s))[0] == PDF_BOM1 && \
((pdc_byte *)(s))[1] == PDF_BOM0 && \
((pdc_byte *)(s))[2] == 0x00 && \
((pdc_byte *)(s))[3] == 0x00)
/*
* check whether the string is UTF-8 unicode by looking for the BOM
* s must not be NULL.
*/
#define pdc_is_utf8_unicode(s) \
(((pdc_byte *)(s))[0] == PDF_BOM2 && \
((pdc_byte *)(s))[1] == PDF_BOM3 && \
((pdc_byte *)(s))[2] == PDF_BOM4)
#define PDC_UTF8_STRING "\xEF\xBB\xBF"
#define pdc_is_utf8_bytecode(s) \
(((pdc_byte *)(s))[0] == PDF_BOM2 && \
((pdc_byte *)(s))[1] == PDF_BOM3 && \
((pdc_byte *)(s))[2] == PDF_BOM4)
#define pdc_copy_utf8_bom(s) \
((pdc_byte *)(s))[0] = PDF_BOM2, \
((pdc_byte *)(s))[1] = PDF_BOM3, \
((pdc_byte *)(s))[2] = PDF_BOM4;
#define PDC_UTF8 pdc_utf8
#define PDC_UTF8_STRG "utf8"
#define PDC_UTF8_FLAG pdc_false
#define PDC_HTML_CTRLCHAR '&'
#define PDC_HTML_DELIMITCHAR ';'
typedef enum
{
conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
targetExhausted, /* insuff. room in target for conversion */
sourceIllegal /* source sequence is illegal/malformed */
}
pdc_convers_result;
typedef enum
{
strictConversion = 0,
lenientConversion
}
pdc_convers_flags;
/* flags for pdc_convert_string(), pdc_strdup_ext(),
* pdc_utfxx6_to_utfxx(), pdc_convert_name_ext()
*/
#define PDC_CONV_FORCEUTF16 (1<<0)
#define PDC_CONV_TRY7BYTES (1<<1)
#define PDC_CONV_TRYBYTES (1<<2)
#define PDC_CONV_WITHBOM (1<<3)
#define PDC_CONV_NOBOM (1<<4)
#define PDC_CONV_AUTOBOM (1<<5)
#define PDC_CONV_ANALYZE (1<<6)
#define PDC_CONV_TMPALLOC (1<<7)
#define PDC_CONV_HTMLCHAR (1<<8)
#define PDC_CONV_NEWALLOC (1<<9)
#define PDC_CONV_INFLATE (1<<10)
#define PDC_CONV_ESCSEQU (1<<11)
#define PDC_CONV_BSSEQU (1<<12)
#define PDC_CONV_EBCDIC (1<<13)
#define PDC_CONV_ENCERROR (1<<14)
#define PDC_CONV_KEEPLBCHAR (1<<15)
#define PDC_CONV_LOGGING (1<<16)
#define PDC_CONV_ISUTF8 (1<<17)
#define PDC_CONV_ASCII (1<<18)
#define PDC_CONV_MAXSTRLEN (1<<19)
#define PDC_CONV_FILENAME (1<<20)
/* DON'T change the order */
typedef enum
{
pdc_auto = 1,
pdc_auto2 = 2,
pdc_bytes = 3,
pdc_bytes2 = 4,
pdc_utf8 = 5, /* UTF-8 */
pdc_utf16 = 7, /* UTF-16 */
pdc_utf16be = 8, /* UTF-16 big endian */
pdc_utf16le = 9, /* UTF-16 little endian */
pdc_utf32 = 10 /* UTF-32 */
}
pdc_text_format;
/* copy for pdflib in p_keyconn.h */
#if defined(PC_UNICODE_C)
static const pdc_keyconn pdc_textformat_keylist[] =
{
{"auto", pdc_auto},
{"auto2", pdc_auto2},
{"bytes", pdc_bytes},
{"bytes2", pdc_bytes2},
{"utf8", pdc_utf8},
{"utf16", pdc_utf16},
{"utf16be", pdc_utf16be},
{"utf16le", pdc_utf16le},
{NULL, 0}
};
#endif /* PC_UNICODE_C */
const char *pdc_get_textformat(int textformat);
int pdc_convert_string(pdc_core *pdc,
pdc_text_format inutf, int codepage, pdc_encodingvector *inev,
pdc_byte *instring, int inlen, pdc_text_format *oututf_p,
pdc_encodingvector *outev, pdc_byte **outstring, int *outlen, int flags,
pdc_bool verbose);
int pdc_convert_textstring(pdc_core *pdc,
pdc_text_format inutf, int codepage, pdc_encodingvector *inev,
const pdc_glyph_tab *glyphtab, int tabsize, int replchar,
pdc_byte *instring, int inlen,
pdc_text_format *oututf_p, pdc_encodingvector *outev,
pdc_byte **outstring, int *outlen, int flags,
pdc_bool verbose);
char *pdc_convert_name(pdc_core *pdc, const char *name, int len, int flags);
char *pdc_convert_name_ext(pdc_core *pdc, const char *name, int len,
pdc_encoding enc, int codepage, int flags);
char *pdc_utf8_to_hostbytes(pdc_core *pdc, pdc_bool honorlang, char *name);
char *pdc_hostbytes_to_utf8(pdc_core *pdc, pdc_bool honorlang, char *name);
char *pdc_utf16_to_utf8(pdc_core *pdc, const char *utf16string, int len,
int flags, int *size);
char *pdc_utf8_to_utf16(pdc_core *pdc, const char *utf8string,
const char *format, int flags, int *size);
char *pdc_utf16_to_utf32(pdc_core *pdc, const char *utf16string, int len,
int *size);
char *pdc_utf32_to_utf8(pdc_core *pdc, const char *utf32string, int len,
int flags, int *size);
char *pdc_utf32_to_utf16(pdc_core *pdc, const char *utf32string, int len,
const char *format, int flags, int *size);
int pdc_char16_to_char32(pdc_core *pdc, const pdc_ushort *ustext, int *ic,
int len, pdc_bool verbose);
int pdc_char32_to_char16(pdc_core *pdc, int usv, pdc_ushort *uvlist,
pdc_bool verbose);
#endif /* PC_UNICODE_H */