2016-08-16 14:43:41 +03:00
/* eci.c - Extended Channel Interpretations
libzint - the open source barcode library
2020-04-04 18:53:29 +03:00
Copyright ( C ) 2009 - 2020 Robin Stuart < rstuart114 @ gmail . com >
2016-08-16 14:43:41 +03:00
Redistribution and use in source and binary forms , with or without
modification , are permitted provided that the following conditions
are met :
2017-10-23 22:37:52 +03:00
1. Redistributions of source code must retain the above copyright
notice , this list of conditions and the following disclaimer .
2016-08-16 14:43:41 +03:00
2. Redistributions in binary form must reproduce the above copyright
notice , this list of conditions and the following disclaimer in the
2017-10-23 22:37:52 +03:00
documentation and / or other materials provided with the distribution .
2016-08-16 14:43:41 +03:00
3. Neither the name of the project nor the names of its contributors
may be used to endorse or promote products derived from this software
2017-10-23 22:37:52 +03:00
without specific prior written permission .
2016-08-16 14:43:41 +03:00
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS " AND
ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL
DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS INTERRUPTION )
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT
LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY
2017-10-23 22:37:52 +03:00
OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF
2016-08-16 14:43:41 +03:00
SUCH DAMAGE .
*/
2019-12-19 03:37:55 +03:00
/* vim: set ts=4 sw=4 et : */
2016-08-16 14:43:41 +03:00
# include <string.h>
# include <stdio.h>
# include "eci.h"
2019-12-19 03:37:55 +03:00
# include "common.h"
2016-09-06 00:06:50 +03:00
# ifdef _MSC_VER
# include <malloc.h>
# endif
2016-08-16 14:43:41 +03:00
/* Convert Unicode to other character encodings */
2019-12-19 03:37:55 +03:00
INTERNAL int utf_to_eci ( const int eci , const unsigned char source [ ] , unsigned char dest [ ] , size_t * length ) {
2016-08-16 14:43:41 +03:00
int in_posn ;
int out_posn ;
int ext ;
int done ;
2018-01-21 14:50:49 +03:00
2016-08-16 14:43:41 +03:00
if ( eci = = 26 ) {
/* Unicode mode, do not process - just copy data across */
2019-09-01 22:23:15 +03:00
memcpy ( dest , source , * length ) ;
dest [ * length ] = ' \0 ' ;
2016-08-16 14:43:41 +03:00
return 0 ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
in_posn = 0 ;
out_posn = 0 ;
do {
/* Single byte (ASCII) character */
2017-09-10 18:03:09 +03:00
int bytelen = 1 ;
int glyph = ( int ) source [ in_posn ] ;
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
if ( ( source [ in_posn ] > = 0x80 ) & & ( source [ in_posn ] < 0xc0 ) ) {
/* Something has gone wrong, abort */
return ZINT_ERROR_INVALID_DATA ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
if ( ( source [ in_posn ] > = 0xc0 ) & & ( source [ in_posn ] < 0xe0 ) ) {
/* Two-byte character */
bytelen = 2 ;
glyph = ( source [ in_posn ] & 0x1f ) < < 6 ;
2020-04-04 18:53:29 +03:00
if ( ( int ) * length < ( in_posn + 2 ) ) {
2016-08-16 14:43:41 +03:00
return ZINT_ERROR_INVALID_DATA ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
if ( source [ in_posn + 1 ] > 0xc0 ) {
return ZINT_ERROR_INVALID_DATA ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
glyph + = ( source [ in_posn + 1 ] & 0x3f ) ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
if ( ( source [ in_posn ] > = 0xe0 ) & & ( source [ in_posn ] < 0xf0 ) ) {
/* Three-byte character */
bytelen = 3 ;
glyph = ( source [ in_posn ] & 0x0f ) < < 12 ;
2016-09-06 00:06:50 +03:00
2020-04-04 18:53:29 +03:00
if ( ( int ) * length < ( in_posn + 2 ) ) {
2016-08-16 14:43:41 +03:00
return ZINT_ERROR_INVALID_DATA ;
}
2016-09-06 00:06:50 +03:00
2020-04-04 18:53:29 +03:00
if ( ( int ) * length < ( in_posn + 3 ) ) {
2016-08-16 14:43:41 +03:00
return ZINT_ERROR_INVALID_DATA ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
if ( source [ in_posn + 1 ] > 0xc0 ) {
return ZINT_ERROR_INVALID_DATA ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
if ( source [ in_posn + 2 ] > 0xc0 ) {
return ZINT_ERROR_INVALID_DATA ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
glyph + = ( source [ in_posn + 1 ] & 0x3f ) < < 6 ;
glyph + = ( source [ in_posn + 2 ] & 0x3f ) ;
}
2016-09-06 00:06:50 +03:00
2019-09-01 22:23:15 +03:00
if ( source [ in_posn ] > = 0xf0 | | glyph > 0x2122 ) {
/* Not in any ISO 8859 or Windows page */
2016-08-16 14:43:41 +03:00
return ZINT_ERROR_INVALID_DATA ;
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
if ( glyph < 128 ) {
dest [ out_posn ] = glyph ;
} else {
done = 0 ;
for ( ext = 0 ; ext < 128 ; ext + + ) {
switch ( eci ) {
case 3 : // Latin-1
if ( glyph = = iso_8859_1 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 4 : // Latin-2
if ( glyph = = iso_8859_2 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 5 : // Latin-3
if ( glyph = = iso_8859_3 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 6 : // Latin-4
if ( glyph = = iso_8859_4 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 7 : // Latin/Cyrillic
if ( glyph = = iso_8859_5 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 8 : // Latin/Arabic
if ( glyph = = iso_8859_6 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 9 : // Latin/Greek
if ( glyph = = iso_8859_7 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 10 : // Latin/Hebrew
if ( glyph = = iso_8859_8 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 11 : // Latin-5
if ( glyph = = iso_8859_9 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 12 : // Latin-6
if ( glyph = = iso_8859_10 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 13 : // Latin/Thai
if ( glyph = = iso_8859_11 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 15 : // Latin-7
if ( glyph = = iso_8859_13 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 16 : // Latin-8
if ( glyph = = iso_8859_14 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 17 : // Latin-9
if ( glyph = = iso_8859_15 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 18 : // Latin-10
if ( glyph = = iso_8859_16 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 21 : // Windows-1250
if ( glyph = = windows_1250 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 22 : // Windows-1251
if ( glyph = = windows_1251 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 23 : // Windows-1252
if ( glyph = = windows_1252 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
case 24 : // Windows-1256
if ( glyph = = windows_1256 [ ext ] ) {
dest [ out_posn ] = ext + 128 ;
done = 1 ;
}
break ;
default :
break ;
}
2019-09-01 22:23:15 +03:00
if ( done ) {
break ;
}
2016-08-16 14:43:41 +03:00
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
if ( ! ( done ) ) {
return ZINT_ERROR_INVALID_DATA ;
}
}
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
in_posn + = bytelen ;
out_posn + + ;
2020-04-04 18:53:29 +03:00
} while ( in_posn < ( int ) * length ) ;
2016-08-16 14:43:41 +03:00
dest [ out_posn ] = ' \0 ' ;
* length = out_posn ;
return 0 ;
}
/* Find the lowest ECI mode which will encode a given set of Unicode text */
2019-12-19 03:37:55 +03:00
INTERNAL int get_best_eci ( unsigned char source [ ] , size_t length ) {
2016-08-16 14:43:41 +03:00
int eci = 3 ;
# ifndef _MSC_VER
unsigned char local_source [ length + 1 ] ;
# else
2016-08-22 19:58:32 +03:00
unsigned char * local_source = ( unsigned char * ) _alloca ( length + 1 ) ;
2016-08-16 14:43:41 +03:00
# endif
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
do {
if ( utf_to_eci ( eci , source , local_source , & length ) = = 0 ) {
return eci ;
2016-09-06 00:06:50 +03:00
}
2016-08-16 14:43:41 +03:00
eci + + ;
} while ( eci < 25 ) ;
2016-09-06 00:06:50 +03:00
2016-08-16 14:43:41 +03:00
return 26 ; // If all of these fail, use Unicode!
}