2005-04-17 02:20:36 +04:00
/*
* unicode . c
*
* PURPOSE
* Routines for converting between UTF - 8 and OSTA Compressed Unicode .
* Also handles filename mangling
*
* DESCRIPTION
* OSTA Compressed Unicode is explained in the OSTA UDF specification .
* http : //www.osta.org/
* UTF - 8 is explained in the IETF RFC XXXX .
* ftp : //ftp.internic.net/rfc/rfcxxxx.txt
*
* COPYRIGHT
* This file is distributed under the terms of the GNU General Public
* License ( GPL ) . Copies of the GPL can be obtained from :
* ftp : //prep.ai.mit.edu/pub/gnu/GPL
* Each contributing author retains all rights to their own work .
*/
# include "udfdecl.h"
# include <linux/kernel.h>
# include <linux/string.h> /* for memset */
# include <linux/nls.h>
# include <linux/udf_fs.h>
# include "udf_sb.h"
static int udf_translate_to_linux ( uint8_t * , uint8_t * , int , uint8_t * , int ) ;
2007-07-21 15:37:18 +04:00
static int udf_char_to_ustr ( struct ustr * dest , const uint8_t * src , int strlen )
2005-04-17 02:20:36 +04:00
{
2007-07-19 12:47:43 +04:00
if ( ( ! dest ) | | ( ! src ) | | ( ! strlen ) | | ( strlen > UDF_NAME_LEN - 2 ) )
2005-04-17 02:20:36 +04:00
return 0 ;
2007-07-21 15:37:18 +04:00
2005-04-17 02:20:36 +04:00
memset ( dest , 0 , sizeof ( struct ustr ) ) ;
memcpy ( dest - > u_name , src , strlen ) ;
dest - > u_cmpID = 0x08 ;
dest - > u_len = strlen ;
2007-07-21 15:37:18 +04:00
2005-04-17 02:20:36 +04:00
return strlen ;
}
/*
* udf_build_ustr
*/
2007-07-21 15:37:18 +04:00
int udf_build_ustr ( struct ustr * dest , dstring * ptr , int size )
2005-04-17 02:20:36 +04:00
{
int usesize ;
2007-07-19 12:47:43 +04:00
if ( ( ! dest ) | | ( ! ptr ) | | ( ! size ) )
2005-04-17 02:20:36 +04:00
return - 1 ;
memset ( dest , 0 , sizeof ( struct ustr ) ) ;
2007-07-19 12:47:43 +04:00
usesize = ( size > UDF_NAME_LEN ) ? UDF_NAME_LEN : size ;
dest - > u_cmpID = ptr [ 0 ] ;
dest - > u_len = ptr [ size - 1 ] ;
memcpy ( dest - > u_name , ptr + 1 , usesize - 1 ) ;
2007-07-21 15:37:18 +04:00
2005-04-17 02:20:36 +04:00
return 0 ;
}
/*
* udf_build_ustr_exact
*/
2007-07-21 15:37:18 +04:00
static int udf_build_ustr_exact ( struct ustr * dest , dstring * ptr , int exactsize )
2005-04-17 02:20:36 +04:00
{
2007-07-19 12:47:43 +04:00
if ( ( ! dest ) | | ( ! ptr ) | | ( ! exactsize ) )
2005-04-17 02:20:36 +04:00
return - 1 ;
memset ( dest , 0 , sizeof ( struct ustr ) ) ;
2007-07-19 12:47:43 +04:00
dest - > u_cmpID = ptr [ 0 ] ;
dest - > u_len = exactsize - 1 ;
memcpy ( dest - > u_name , ptr + 1 , exactsize - 1 ) ;
2007-07-21 15:37:18 +04:00
2005-04-17 02:20:36 +04:00
return 0 ;
}
/*
* udf_ocu_to_utf8
*
* PURPOSE
* Convert OSTA Compressed Unicode to the UTF - 8 equivalent .
*
* DESCRIPTION
* This routine is only called by udf_filldir ( ) .
*
* PRE - CONDITIONS
* utf Pointer to UTF - 8 output buffer .
* ocu Pointer to OSTA Compressed Unicode input buffer
* of size UDF_NAME_LEN bytes .
* both of type " struct ustr * "
*
* POST - CONDITIONS
* < return > Zero on success .
*
* HISTORY
* November 12 , 1997 - Andrew E . Mileski
* Written , tested , and released .
*/
int udf_CS0toUTF8 ( struct ustr * utf_o , struct ustr * ocu_i )
{
uint8_t * ocu ;
uint32_t c ;
uint8_t cmp_id , ocu_len ;
int i ;
ocu = ocu_i - > u_name ;
ocu_len = ocu_i - > u_len ;
cmp_id = ocu_i - > u_cmpID ;
utf_o - > u_len = 0 ;
2007-07-19 12:47:43 +04:00
if ( ocu_len = = 0 ) {
2005-04-17 02:20:36 +04:00
memset ( utf_o , 0 , sizeof ( struct ustr ) ) ;
utf_o - > u_cmpID = 0 ;
utf_o - > u_len = 0 ;
return 0 ;
}
2007-07-19 12:47:43 +04:00
if ( ( cmp_id ! = 8 ) & & ( cmp_id ! = 16 ) ) {
printk ( KERN_ERR " udf: unknown compression code (%d) stri=%s \n " ,
cmp_id , ocu_i - > u_name ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-19 12:47:43 +04:00
for ( i = 0 ; ( i < ocu_len ) & & ( utf_o - > u_len < = ( UDF_NAME_LEN - 3 ) ) ; ) {
2005-04-17 02:20:36 +04:00
/* Expand OSTA compressed Unicode to Unicode */
c = ocu [ i + + ] ;
if ( cmp_id = = 16 )
c = ( c < < 8 ) | ocu [ i + + ] ;
/* Compress Unicode to UTF-8 */
2007-07-21 15:37:18 +04:00
if ( c < 0x80U ) {
utf_o - > u_name [ utf_o - > u_len + + ] = ( uint8_t ) c ;
} else if ( c < 0x800U ) {
utf_o - > u_name [ utf_o - > u_len + + ] = ( uint8_t ) ( 0xc0 | ( c > > 6 ) ) ;
utf_o - > u_name [ utf_o - > u_len + + ] = ( uint8_t ) ( 0x80 | ( c & 0x3f ) ) ;
2007-07-19 12:47:43 +04:00
} else {
2007-07-21 15:37:18 +04:00
utf_o - > u_name [ utf_o - > u_len + + ] = ( uint8_t ) ( 0xe0 | ( c > > 12 ) ) ;
utf_o - > u_name [ utf_o - > u_len + + ] = ( uint8_t ) ( 0x80 | ( ( c > > 6 ) & 0x3f ) ) ;
utf_o - > u_name [ utf_o - > u_len + + ] = ( uint8_t ) ( 0x80 | ( c & 0x3f ) ) ;
2005-04-17 02:20:36 +04:00
}
}
2007-07-19 12:47:43 +04:00
utf_o - > u_cmpID = 8 ;
2005-04-17 02:20:36 +04:00
return utf_o - > u_len ;
}
/*
*
* udf_utf8_to_ocu
*
* PURPOSE
* Convert UTF - 8 to the OSTA Compressed Unicode equivalent .
*
* DESCRIPTION
* This routine is only called by udf_lookup ( ) .
*
* PRE - CONDITIONS
* ocu Pointer to OSTA Compressed Unicode output
* buffer of size UDF_NAME_LEN bytes .
* utf Pointer to UTF - 8 input buffer .
* utf_len Length of UTF - 8 input buffer in bytes .
*
* POST - CONDITIONS
* < return > Zero on success .
*
* HISTORY
* November 12 , 1997 - Andrew E . Mileski
* Written , tested , and released .
*/
2007-07-21 15:37:18 +04:00
static int udf_UTF8toCS0 ( dstring * ocu , struct ustr * utf , int length )
2005-04-17 02:20:36 +04:00
{
unsigned c , i , max_val , utf_char ;
int utf_cnt , u_len ;
memset ( ocu , 0 , sizeof ( dstring ) * length ) ;
ocu [ 0 ] = 8 ;
max_val = 0xffU ;
2007-07-21 15:37:18 +04:00
try_again :
2005-04-17 02:20:36 +04:00
u_len = 0U ;
utf_char = 0U ;
utf_cnt = 0U ;
2007-07-19 12:47:43 +04:00
for ( i = 0U ; i < utf - > u_len ; i + + ) {
2007-07-21 15:37:18 +04:00
c = ( uint8_t ) utf - > u_name [ i ] ;
2005-04-17 02:20:36 +04:00
/* Complete a multi-byte UTF-8 character */
2007-07-19 12:47:43 +04:00
if ( utf_cnt ) {
2005-04-17 02:20:36 +04:00
utf_char = ( utf_char < < 6 ) | ( c & 0x3fU ) ;
if ( - - utf_cnt )
continue ;
2007-07-19 12:47:43 +04:00
} else {
2005-04-17 02:20:36 +04:00
/* Check for a multi-byte UTF-8 character */
2007-07-19 12:47:43 +04:00
if ( c & 0x80U ) {
2005-04-17 02:20:36 +04:00
/* Start a multi-byte UTF-8 character */
2007-07-19 12:47:43 +04:00
if ( ( c & 0xe0U ) = = 0xc0U ) {
2005-04-17 02:20:36 +04:00
utf_char = c & 0x1fU ;
utf_cnt = 1 ;
2007-07-19 12:47:43 +04:00
} else if ( ( c & 0xf0U ) = = 0xe0U ) {
2005-04-17 02:20:36 +04:00
utf_char = c & 0x0fU ;
utf_cnt = 2 ;
2007-07-19 12:47:43 +04:00
} else if ( ( c & 0xf8U ) = = 0xf0U ) {
2005-04-17 02:20:36 +04:00
utf_char = c & 0x07U ;
utf_cnt = 3 ;
2007-07-19 12:47:43 +04:00
} else if ( ( c & 0xfcU ) = = 0xf8U ) {
2005-04-17 02:20:36 +04:00
utf_char = c & 0x03U ;
utf_cnt = 4 ;
2007-07-19 12:47:43 +04:00
} else if ( ( c & 0xfeU ) = = 0xfcU ) {
2005-04-17 02:20:36 +04:00
utf_char = c & 0x01U ;
utf_cnt = 5 ;
2007-07-21 15:37:18 +04:00
} else {
2005-04-17 02:20:36 +04:00
goto error_out ;
2007-07-21 15:37:18 +04:00
}
2005-04-17 02:20:36 +04:00
continue ;
2007-07-21 15:37:18 +04:00
} else {
2005-04-17 02:20:36 +04:00
/* Single byte UTF-8 character (most common) */
utf_char = c ;
2007-07-21 15:37:18 +04:00
}
2005-04-17 02:20:36 +04:00
}
/* Choose no compression if necessary */
2007-07-19 12:47:43 +04:00
if ( utf_char > max_val ) {
2007-07-21 15:37:18 +04:00
if ( max_val = = 0xffU ) {
2005-04-17 02:20:36 +04:00
max_val = 0xffffU ;
2007-07-21 15:37:18 +04:00
ocu [ 0 ] = ( uint8_t ) 0x10U ;
2005-04-17 02:20:36 +04:00
goto try_again ;
}
goto error_out ;
}
2007-07-19 12:47:43 +04:00
if ( max_val = = 0xffffU ) {
2007-07-21 15:37:18 +04:00
ocu [ + + u_len ] = ( uint8_t ) ( utf_char > > 8 ) ;
2005-04-17 02:20:36 +04:00
}
2007-07-21 15:37:18 +04:00
ocu [ + + u_len ] = ( uint8_t ) ( utf_char & 0xffU ) ;
2005-04-17 02:20:36 +04:00
}
2007-07-19 12:47:43 +04:00
if ( utf_cnt ) {
2007-07-21 15:37:18 +04:00
error_out :
2005-04-17 02:20:36 +04:00
ocu [ + + u_len ] = ' ? ' ;
printk ( KERN_DEBUG " udf: bad UTF-8 character \n " ) ;
}
2007-07-21 15:37:18 +04:00
ocu [ length - 1 ] = ( uint8_t ) u_len + 1 ;
2005-04-17 02:20:36 +04:00
return u_len + 1 ;
}
2007-07-19 12:47:43 +04:00
static int udf_CS0toNLS ( struct nls_table * nls , struct ustr * utf_o ,
struct ustr * ocu_i )
2005-04-17 02:20:36 +04:00
{
uint8_t * ocu ;
uint32_t c ;
uint8_t cmp_id , ocu_len ;
int i ;
ocu = ocu_i - > u_name ;
ocu_len = ocu_i - > u_len ;
cmp_id = ocu_i - > u_cmpID ;
utf_o - > u_len = 0 ;
2007-07-19 12:47:43 +04:00
if ( ocu_len = = 0 ) {
2005-04-17 02:20:36 +04:00
memset ( utf_o , 0 , sizeof ( struct ustr ) ) ;
utf_o - > u_cmpID = 0 ;
utf_o - > u_len = 0 ;
return 0 ;
}
2007-07-19 12:47:43 +04:00
if ( ( cmp_id ! = 8 ) & & ( cmp_id ! = 16 ) ) {
printk ( KERN_ERR " udf: unknown compression code (%d) stri=%s \n " ,
cmp_id , ocu_i - > u_name ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-19 12:47:43 +04:00
for ( i = 0 ; ( i < ocu_len ) & & ( utf_o - > u_len < = ( UDF_NAME_LEN - 3 ) ) ; ) {
2005-04-17 02:20:36 +04:00
/* Expand OSTA compressed Unicode to Unicode */
c = ocu [ i + + ] ;
if ( cmp_id = = 16 )
c = ( c < < 8 ) | ocu [ i + + ] ;
2007-07-19 12:47:43 +04:00
utf_o - > u_len + = nls - > uni2char ( c , & utf_o - > u_name [ utf_o - > u_len ] ,
UDF_NAME_LEN - utf_o - > u_len ) ;
2005-04-17 02:20:36 +04:00
}
2007-07-19 12:47:43 +04:00
utf_o - > u_cmpID = 8 ;
2005-04-17 02:20:36 +04:00
return utf_o - > u_len ;
}
2007-07-21 15:37:18 +04:00
static int udf_NLStoCS0 ( struct nls_table * nls , dstring * ocu , struct ustr * uni ,
2007-07-19 12:47:43 +04:00
int length )
2005-04-17 02:20:36 +04:00
{
unsigned len , i , max_val ;
uint16_t uni_char ;
int u_len ;
memset ( ocu , 0 , sizeof ( dstring ) * length ) ;
ocu [ 0 ] = 8 ;
max_val = 0xffU ;
2007-07-21 15:37:18 +04:00
try_again :
2005-04-17 02:20:36 +04:00
u_len = 0U ;
2007-07-19 12:47:43 +04:00
for ( i = 0U ; i < uni - > u_len ; i + + ) {
len = nls - > char2uni ( & uni - > u_name [ i ] , uni - > u_len - i , & uni_char ) ;
2005-04-17 02:20:36 +04:00
if ( len < = 0 )
continue ;
2007-07-19 12:47:43 +04:00
if ( uni_char > max_val ) {
2005-04-17 02:20:36 +04:00
max_val = 0xffffU ;
2007-07-21 15:37:18 +04:00
ocu [ 0 ] = ( uint8_t ) 0x10U ;
2005-04-17 02:20:36 +04:00
goto try_again ;
}
2007-07-19 12:47:43 +04:00
2005-04-17 02:20:36 +04:00
if ( max_val = = 0xffffU )
2007-07-21 15:37:18 +04:00
ocu [ + + u_len ] = ( uint8_t ) ( uni_char > > 8 ) ;
ocu [ + + u_len ] = ( uint8_t ) ( uni_char & 0xffU ) ;
2005-04-17 02:20:36 +04:00
i + = len - 1 ;
}
2007-07-21 15:37:18 +04:00
ocu [ length - 1 ] = ( uint8_t ) u_len + 1 ;
2005-04-17 02:20:36 +04:00
return u_len + 1 ;
}
2007-07-21 15:37:18 +04:00
int udf_get_filename ( struct super_block * sb , uint8_t * sname , uint8_t * dname ,
2007-07-19 12:47:43 +04:00
int flen )
2005-04-17 02:20:36 +04:00
{
struct ustr filename , unifilename ;
int len ;
2007-07-19 12:47:43 +04:00
if ( udf_build_ustr_exact ( & unifilename , sname , flen ) ) {
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-19 12:47:43 +04:00
if ( UDF_QUERY_FLAG ( sb , UDF_FLAG_UTF8 ) ) {
if ( ! udf_CS0toUTF8 ( & filename , & unifilename ) ) {
2007-07-21 15:37:18 +04:00
udf_debug ( " Failed in udf_get_filename: sname = %s \n " , sname ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-19 12:47:43 +04:00
} else if ( UDF_QUERY_FLAG ( sb , UDF_FLAG_NLS_MAP ) ) {
2007-07-21 15:37:18 +04:00
if ( ! udf_CS0toNLS ( UDF_SB ( sb ) - > s_nls_map , & filename , & unifilename ) ) {
udf_debug ( " Failed in udf_get_filename: sname = %s \n " , sname ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-21 15:37:18 +04:00
} else {
2005-04-17 02:20:36 +04:00
return 0 ;
2007-07-21 15:37:18 +04:00
}
2005-04-17 02:20:36 +04:00
2007-07-21 15:37:18 +04:00
len = udf_translate_to_linux ( dname , filename . u_name , filename . u_len ,
unifilename . u_name , unifilename . u_len ) ;
if ( len ) {
2005-04-17 02:20:36 +04:00
return len ;
}
2007-07-21 15:37:18 +04:00
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-21 15:37:18 +04:00
int udf_put_filename ( struct super_block * sb , const uint8_t * sname ,
uint8_t * dname , int flen )
2005-04-17 02:20:36 +04:00
{
struct ustr unifilename ;
int namelen ;
2007-07-19 12:47:43 +04:00
if ( ! ( udf_char_to_ustr ( & unifilename , sname , flen ) ) ) {
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-19 12:47:43 +04:00
if ( UDF_QUERY_FLAG ( sb , UDF_FLAG_UTF8 ) ) {
2007-07-21 15:37:18 +04:00
namelen = udf_UTF8toCS0 ( dname , & unifilename , UDF_NAME_LEN ) ;
if ( ! namelen ) {
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-19 12:47:43 +04:00
} else if ( UDF_QUERY_FLAG ( sb , UDF_FLAG_NLS_MAP ) ) {
2007-07-21 15:37:18 +04:00
namelen = udf_NLStoCS0 ( UDF_SB ( sb ) - > s_nls_map , dname , & unifilename , UDF_NAME_LEN ) ;
if ( ! namelen ) {
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-21 15:37:18 +04:00
} else {
2005-04-17 02:20:36 +04:00
return 0 ;
2007-07-21 15:37:18 +04:00
}
2005-04-17 02:20:36 +04:00
return namelen ;
}
# define ILLEGAL_CHAR_MARK '_'
2007-07-21 15:37:18 +04:00
# define EXT_MARK '.'
# define CRC_MARK '#'
# define EXT_SIZE 5
2005-04-17 02:20:36 +04:00
2007-07-21 15:37:18 +04:00
static int udf_translate_to_linux ( uint8_t * newName , uint8_t * udfName , int udfLen ,
uint8_t * fidName , int fidNameLen )
2005-04-17 02:20:36 +04:00
{
2007-07-19 12:47:43 +04:00
int index , newIndex = 0 , needsCRC = 0 ;
2005-04-17 02:20:36 +04:00
int extIndex = 0 , newExtIndex = 0 , hasExt = 0 ;
unsigned short valueCRC ;
uint8_t curr ;
const uint8_t hexChar [ ] = " 0123456789ABCDEF " ;
2007-07-21 15:37:18 +04:00
if ( udfName [ 0 ] = = ' . ' & &
( udfLen = = 1 | | ( udfLen = = 2 & & udfName [ 1 ] = = ' . ' ) ) ) {
2005-04-17 02:20:36 +04:00
needsCRC = 1 ;
newIndex = udfLen ;
memcpy ( newName , udfName , udfLen ) ;
2007-07-19 12:47:43 +04:00
} else {
for ( index = 0 ; index < udfLen ; index + + ) {
2005-04-17 02:20:36 +04:00
curr = udfName [ index ] ;
2007-07-19 12:47:43 +04:00
if ( curr = = ' / ' | | curr = = 0 ) {
2005-04-17 02:20:36 +04:00
needsCRC = 1 ;
curr = ILLEGAL_CHAR_MARK ;
2007-07-21 15:37:18 +04:00
while ( index + 1 < udfLen & & ( udfName [ index + 1 ] = = ' / ' | |
udfName [ index + 1 ] = = 0 ) )
2005-04-17 02:20:36 +04:00
index + + ;
2007-07-21 15:37:18 +04:00
} if ( curr = = EXT_MARK & & ( udfLen - index - 1 ) < = EXT_SIZE ) {
if ( udfLen = = index + 1 ) {
2005-04-17 02:20:36 +04:00
hasExt = 0 ;
2007-07-21 15:37:18 +04:00
} else {
2005-04-17 02:20:36 +04:00
hasExt = 1 ;
extIndex = index ;
newExtIndex = newIndex ;
}
}
if ( newIndex < 256 )
newName [ newIndex + + ] = curr ;
else
needsCRC = 1 ;
}
}
2007-07-19 12:47:43 +04:00
if ( needsCRC ) {
2005-04-17 02:20:36 +04:00
uint8_t ext [ EXT_SIZE ] ;
int localExtIndex = 0 ;
2007-07-19 12:47:43 +04:00
if ( hasExt ) {
2005-04-17 02:20:36 +04:00
int maxFilenameLen ;
2007-07-21 15:37:18 +04:00
for ( index = 0 ; index < EXT_SIZE & & extIndex + index + 1 < udfLen ; index + + ) {
2005-04-17 02:20:36 +04:00
curr = udfName [ extIndex + index + 1 ] ;
2007-07-19 12:47:43 +04:00
if ( curr = = ' / ' | | curr = = 0 ) {
2005-04-17 02:20:36 +04:00
needsCRC = 1 ;
curr = ILLEGAL_CHAR_MARK ;
2007-07-21 15:37:18 +04:00
while ( extIndex + index + 2 < udfLen & &
( index + 1 < EXT_SIZE
& & ( udfName [ extIndex + index + 2 ] = = ' / ' | |
udfName [ extIndex + index + 2 ] = = 0 ) ) )
2005-04-17 02:20:36 +04:00
index + + ;
}
ext [ localExtIndex + + ] = curr ;
}
maxFilenameLen = 250 - localExtIndex ;
if ( newIndex > maxFilenameLen )
newIndex = maxFilenameLen ;
else
newIndex = newExtIndex ;
2007-07-21 15:37:18 +04:00
} else if ( newIndex > 250 ) {
2005-04-17 02:20:36 +04:00
newIndex = 250 ;
2007-07-21 15:37:18 +04:00
}
2005-04-17 02:20:36 +04:00
newName [ newIndex + + ] = CRC_MARK ;
valueCRC = udf_crc ( fidName , fidNameLen , 0 ) ;
newName [ newIndex + + ] = hexChar [ ( valueCRC & 0xf000 ) > > 12 ] ;
newName [ newIndex + + ] = hexChar [ ( valueCRC & 0x0f00 ) > > 8 ] ;
newName [ newIndex + + ] = hexChar [ ( valueCRC & 0x00f0 ) > > 4 ] ;
newName [ newIndex + + ] = hexChar [ ( valueCRC & 0x000f ) ] ;
2007-07-19 12:47:43 +04:00
if ( hasExt ) {
2005-04-17 02:20:36 +04:00
newName [ newIndex + + ] = EXT_MARK ;
2007-07-19 12:47:43 +04:00
for ( index = 0 ; index < localExtIndex ; index + + )
2005-04-17 02:20:36 +04:00
newName [ newIndex + + ] = ext [ index ] ;
}
}
2007-07-21 15:37:18 +04:00
2005-04-17 02:20:36 +04:00
return newIndex ;
}