Add as a feature case-insensitive directories (the casefold feature)
using Unicode 12.1. Also, the usual largish number of cleanups and bug fixes. -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAlzSDXYACgkQ8vlZVpUN gaPQ3Qf/Sh0NqHbmbdW1J52oh4GqUKUhUezEac40yZcZBU4p3PFPZ5Ji83kAQV5r JgHx5YW4AYHs59UkRVq/er7wKEFJxAE8weUq90WYLE1Z/EjojDE8JHSsK00obKNN rJOm5qX/gy5C7PVUSWkSuAZQPMSGrmH5U5ie0nrI7bFWnr7T5CQkWarspUq53JBG RP910mPTT/otE7iTgUzjDeAMKfaSdtRhcJn/uTQ+2YZ1BJsHBHJHDnfQtd3CttHs ncTUaqPnhWqOKJV2Y9TDyAWYeSbn30cF0dpBM38N4u6YwaUwrBp/kPI0tes97SgY lZM4VEAW6iF+18uLSyv7D0Mpba9qQg== =9R7U -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Add as a feature case-insensitive directories (the casefold feature) using Unicode 12.1. Also, the usual largish number of cleanups and bug fixes" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits) ext4: export /sys/fs/ext4/feature/casefold if Unicode support is present ext4: fix ext4_show_options for file systems w/o journal unicode: refactor the rule for regenerating utf8data.h docs: ext4.rst: document case-insensitive directories ext4: Support case-insensitive file name lookups ext4: include charset encoding information in the superblock MAINTAINERS: add Unicode subsystem entry unicode: update unicode database unicode version 12.1.0 unicode: introduce test module for normalized utf8 implementation unicode: implement higher level API for string handling unicode: reduce the size of utf8data[] unicode: introduce code for UTF-8 normalization unicode: introduce UTF-8 character database ext4: actually request zeroing of inode table after grow ext4: cond_resched in work-heavy group loops ext4: fix use-after-free race with debug_want_extra_isize ext4: avoid drop reference to iloc.bh twice ext4: ignore e_value_offs for xattrs with value-in-ea-inode ext4: protect journal inode's blocks using block_validity ext4: use BUG() instead of BUG_ON(1) ...
This commit is contained in:
commit
5abe37954e
@ -91,10 +91,48 @@ Currently Available
|
||||
* large block (up to pagesize) support
|
||||
* efficient new ordered mode in JBD2 and ext4 (avoid using buffer head to force
|
||||
the ordering)
|
||||
* Case-insensitive file name lookups
|
||||
|
||||
[1] Filesystems with a block size of 1k may see a limit imposed by the
|
||||
directory hash tree having a maximum depth of two.
|
||||
|
||||
case-insensitive file name lookups
|
||||
======================================================
|
||||
|
||||
The case-insensitive file name lookup feature is supported on a
|
||||
per-directory basis, allowing the user to mix case-insensitive and
|
||||
case-sensitive directories in the same filesystem. It is enabled by
|
||||
flipping the +F inode attribute of an empty directory. The
|
||||
case-insensitive string match operation is only defined when we know how
|
||||
text in encoded in a byte sequence. For that reason, in order to enable
|
||||
case-insensitive directories, the filesystem must have the
|
||||
casefold feature, which stores the filesystem-wide encoding
|
||||
model used. By default, the charset adopted is the latest version of
|
||||
Unicode (12.1.0, by the time of this writing), encoded in the UTF-8
|
||||
form. The comparison algorithm is implemented by normalizing the
|
||||
strings to the Canonical decomposition form, as defined by Unicode,
|
||||
followed by a byte per byte comparison.
|
||||
|
||||
The case-awareness is name-preserving on the disk, meaning that the file
|
||||
name provided by userspace is a byte-per-byte match to what is actually
|
||||
written in the disk. The Unicode normalization format used by the
|
||||
kernel is thus an internal representation, and not exposed to the
|
||||
userspace nor to the disk, with the important exception of disk hashes,
|
||||
used on large case-insensitive directories with DX feature. On DX
|
||||
directories, the hash must be calculated using the casefolded version of
|
||||
the filename, meaning that the normalization format used actually has an
|
||||
impact on where the directory entry is stored.
|
||||
|
||||
When we change from viewing filenames as opaque byte sequences to seeing
|
||||
them as encoded strings we need to address what happens when a program
|
||||
tries to create a file with an invalid name. The Unicode subsystem
|
||||
within the kernel leaves the decision of what to do in this case to the
|
||||
filesystem, which select its preferred behavior by enabling/disabling
|
||||
the strict mode. When Ext4 encounters one of those strings and the
|
||||
filesystem did not require strict mode, it falls back to considering the
|
||||
entire string as an opaque byte sequence, which still allows the user to
|
||||
operate on that file, but the case-insensitive lookups won't work.
|
||||
|
||||
Options
|
||||
=======
|
||||
|
||||
|
@ -176,6 +176,7 @@ mkprep
|
||||
mkregtable
|
||||
mktables
|
||||
mktree
|
||||
mkutf8data
|
||||
modpost
|
||||
modules.builtin
|
||||
modules.order
|
||||
@ -254,6 +255,7 @@ vsyscall_32.lds
|
||||
wanxlfw.inc
|
||||
uImage
|
||||
unifdef
|
||||
utf8data.h
|
||||
wakeup.bin
|
||||
wakeup.elf
|
||||
wakeup.lds
|
||||
|
@ -15984,6 +15984,12 @@ F: drivers/uwb/
|
||||
F: include/linux/uwb.h
|
||||
F: include/linux/uwb/
|
||||
|
||||
UNICODE SUBSYSTEM:
|
||||
M: Gabriel Krisman Bertazi <krisman@collabora.com>
|
||||
L: linux-fsdevel@vger.kernel.org
|
||||
S: Supported
|
||||
F: fs/unicode/
|
||||
|
||||
UNICORE32 ARCHITECTURE:
|
||||
M: Guan Xuetao <gxt@pku.edu.cn>
|
||||
W: http://mprc.pku.edu.cn/~guanxuetao/linux
|
||||
|
@ -317,5 +317,6 @@ endif # NETWORK_FILESYSTEMS
|
||||
|
||||
source "fs/nls/Kconfig"
|
||||
source "fs/dlm/Kconfig"
|
||||
source "fs/unicode/Kconfig"
|
||||
|
||||
endmenu
|
||||
|
@ -92,6 +92,7 @@ obj-$(CONFIG_EXPORTFS) += exportfs/
|
||||
obj-$(CONFIG_NFSD) += nfsd/
|
||||
obj-$(CONFIG_LOCKD) += lockd/
|
||||
obj-$(CONFIG_NLS) += nls/
|
||||
obj-$(CONFIG_UNICODE) += unicode/
|
||||
obj-$(CONFIG_SYSV_FS) += sysv/
|
||||
obj-$(CONFIG_CIFS) += cifs/
|
||||
obj-$(CONFIG_HPFS_FS) += hpfs/
|
||||
|
@ -137,6 +137,48 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
|
||||
static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_map_blocks map;
|
||||
u32 i = 0, err = 0, num, n;
|
||||
|
||||
if ((ino < EXT4_ROOT_INO) ||
|
||||
(ino > le32_to_cpu(sbi->s_es->s_inodes_count)))
|
||||
return -EINVAL;
|
||||
inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
|
||||
while (i < num) {
|
||||
map.m_lblk = i;
|
||||
map.m_len = num - i;
|
||||
n = ext4_map_blocks(NULL, inode, &map, 0);
|
||||
if (n < 0) {
|
||||
err = n;
|
||||
break;
|
||||
}
|
||||
if (n == 0) {
|
||||
i++;
|
||||
} else {
|
||||
if (!ext4_data_block_valid(sbi, map.m_pblk, n)) {
|
||||
ext4_error(sb, "blocks %llu-%llu from inode %u "
|
||||
"overlap system zone", map.m_pblk,
|
||||
map.m_pblk + map.m_len - 1, ino);
|
||||
err = -EFSCORRUPTED;
|
||||
break;
|
||||
}
|
||||
err = add_system_zone(sbi, map.m_pblk, n);
|
||||
if (err < 0)
|
||||
break;
|
||||
i += n;
|
||||
}
|
||||
}
|
||||
iput(inode);
|
||||
return err;
|
||||
}
|
||||
|
||||
int ext4_setup_system_zone(struct super_block *sb)
|
||||
{
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
@ -155,6 +197,7 @@ int ext4_setup_system_zone(struct super_block *sb)
|
||||
return 0;
|
||||
|
||||
for (i=0; i < ngroups; i++) {
|
||||
cond_resched();
|
||||
if (ext4_bg_has_super(sb, i) &&
|
||||
((i < 5) || ((i % flex_size) == 0)))
|
||||
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
|
||||
@ -171,6 +214,12 @@ int ext4_setup_system_zone(struct super_block *sb)
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) {
|
||||
ret = ext4_protect_reserved_inode(sb,
|
||||
le32_to_cpu(sbi->s_es->s_journal_inum));
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (test_opt(sb, DEBUG))
|
||||
debug_print_tree(sbi);
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/unicode.h>
|
||||
#include "ext4.h"
|
||||
#include "xattr.h"
|
||||
|
||||
@ -660,3 +661,50 @@ const struct file_operations ext4_dir_operations = {
|
||||
.open = ext4_dir_open,
|
||||
.release = ext4_release_dir,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
|
||||
const char *str, const struct qstr *name)
|
||||
{
|
||||
struct qstr qstr = {.name = str, .len = len };
|
||||
|
||||
if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) {
|
||||
if (len != name->len)
|
||||
return -1;
|
||||
return !memcmp(str, name, len);
|
||||
}
|
||||
|
||||
return ext4_ci_compare(dentry->d_parent->d_inode, name, &qstr);
|
||||
}
|
||||
|
||||
static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
|
||||
{
|
||||
const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
|
||||
const struct unicode_map *um = sbi->s_encoding;
|
||||
unsigned char *norm;
|
||||
int len, ret = 0;
|
||||
|
||||
if (!IS_CASEFOLDED(dentry->d_inode))
|
||||
return 0;
|
||||
|
||||
norm = kmalloc(PATH_MAX, GFP_ATOMIC);
|
||||
if (!norm)
|
||||
return -ENOMEM;
|
||||
|
||||
len = utf8_casefold(um, str, norm, PATH_MAX);
|
||||
if (len < 0) {
|
||||
if (ext4_has_strict_mode(sbi))
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
str->hash = full_name_hash(dentry, norm, len);
|
||||
out:
|
||||
kfree(norm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct dentry_operations ext4_dentry_ops = {
|
||||
.d_hash = ext4_d_hash,
|
||||
.d_compare = ext4_d_compare,
|
||||
};
|
||||
#endif
|
||||
|
@ -399,10 +399,11 @@ struct flex_groups {
|
||||
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
|
||||
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
|
||||
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
|
||||
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded file */
|
||||
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
|
||||
|
||||
#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
|
||||
#define EXT4_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
|
||||
#define EXT4_FL_USER_VISIBLE 0x704BDFFF /* User visible flags */
|
||||
#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */
|
||||
|
||||
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
|
||||
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
|
||||
@ -417,10 +418,10 @@ struct flex_groups {
|
||||
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
|
||||
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
|
||||
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
|
||||
EXT4_PROJINHERIT_FL)
|
||||
EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL)
|
||||
|
||||
/* Flags that are appropriate for regular files (all but dir-specific ones). */
|
||||
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
|
||||
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL))
|
||||
|
||||
/* Flags that are appropriate for non-directories/regular files. */
|
||||
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
|
||||
@ -1313,7 +1314,9 @@ struct ext4_super_block {
|
||||
__u8 s_first_error_time_hi;
|
||||
__u8 s_last_error_time_hi;
|
||||
__u8 s_pad[2];
|
||||
__le32 s_reserved[96]; /* Padding to the end of the block */
|
||||
__le16 s_encoding; /* Filename charset encoding */
|
||||
__le16 s_encoding_flags; /* Filename charset encoding flags */
|
||||
__le32 s_reserved[95]; /* Padding to the end of the block */
|
||||
__le32 s_checksum; /* crc32c(superblock) */
|
||||
};
|
||||
|
||||
@ -1338,6 +1341,16 @@ struct ext4_super_block {
|
||||
/* Number of quota types we support */
|
||||
#define EXT4_MAXQUOTAS 3
|
||||
|
||||
#define EXT4_ENC_UTF8_12_1 1
|
||||
|
||||
/*
|
||||
* Flags for ext4_sb_info.s_encoding_flags.
|
||||
*/
|
||||
#define EXT4_ENC_STRICT_MODE_FL (1 << 0)
|
||||
|
||||
#define ext4_has_strict_mode(sbi) \
|
||||
(sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
|
||||
|
||||
/*
|
||||
* fourth extended-fs super-block data in memory
|
||||
*/
|
||||
@ -1387,6 +1400,10 @@ struct ext4_sb_info {
|
||||
struct kobject s_kobj;
|
||||
struct completion s_kobj_unregister;
|
||||
struct super_block *s_sb;
|
||||
#ifdef CONFIG_UNICODE
|
||||
struct unicode_map *s_encoding;
|
||||
__u16 s_encoding_flags;
|
||||
#endif
|
||||
|
||||
/* Journaling */
|
||||
struct journal_s *s_journal;
|
||||
@ -1592,9 +1609,6 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
|
||||
#define EXT4_SB(sb) (sb)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Returns true if the inode is inode is encrypted
|
||||
*/
|
||||
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
|
||||
|
||||
/*
|
||||
@ -1663,6 +1677,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
|
||||
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
|
||||
#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
|
||||
#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
|
||||
#define EXT4_FEATURE_INCOMPAT_CASEFOLD 0x20000
|
||||
|
||||
extern void ext4_update_dynamic_rev(struct super_block *sb);
|
||||
|
||||
@ -1756,6 +1771,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(csum_seed, CSUM_SEED)
|
||||
EXT4_FEATURE_INCOMPAT_FUNCS(largedir, LARGEDIR)
|
||||
EXT4_FEATURE_INCOMPAT_FUNCS(inline_data, INLINE_DATA)
|
||||
EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
|
||||
EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
|
||||
|
||||
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
|
||||
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
|
||||
@ -1783,6 +1799,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
|
||||
EXT4_FEATURE_INCOMPAT_MMP | \
|
||||
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
|
||||
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
|
||||
EXT4_FEATURE_INCOMPAT_CASEFOLD | \
|
||||
EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
|
||||
EXT4_FEATURE_INCOMPAT_LARGEDIR)
|
||||
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
|
||||
@ -2376,8 +2393,8 @@ extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
|
||||
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
|
||||
|
||||
/* hash.c */
|
||||
extern int ext4fs_dirhash(const char *name, int len, struct
|
||||
dx_hash_info *hinfo);
|
||||
extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
|
||||
struct dx_hash_info *hinfo);
|
||||
|
||||
/* ialloc.c */
|
||||
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
|
||||
@ -2973,6 +2990,10 @@ static inline void ext4_unlock_group(struct super_block *sb,
|
||||
/* dir.c */
|
||||
extern const struct file_operations ext4_dir_operations;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
extern const struct dentry_operations ext4_dentry_ops;
|
||||
#endif
|
||||
|
||||
/* file.c */
|
||||
extern const struct inode_operations ext4_file_inode_operations;
|
||||
extern const struct file_operations ext4_file_operations;
|
||||
@ -3065,6 +3086,10 @@ extern void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
|
||||
extern int ext4_handle_dirty_dirent_node(handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct buffer_head *bh);
|
||||
extern int ext4_ci_compare(const struct inode *parent,
|
||||
const struct qstr *name,
|
||||
const struct qstr *entry);
|
||||
|
||||
#define S_SHIFT 12
|
||||
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
|
||||
[S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
|
||||
|
@ -711,7 +711,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
|
||||
* We don't need to check unwritten extent because
|
||||
* indirect-based file doesn't have it.
|
||||
*/
|
||||
BUG_ON(1);
|
||||
BUG();
|
||||
}
|
||||
} else if (retval == 0) {
|
||||
if (ext4_es_is_written(es)) {
|
||||
@ -780,7 +780,7 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
|
||||
}
|
||||
p = &(*p)->rb_right;
|
||||
} else {
|
||||
BUG_ON(1);
|
||||
BUG();
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/unicode.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/bitops.h>
|
||||
#include "ext4.h"
|
||||
@ -196,7 +197,8 @@ static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
|
||||
* represented, and whether or not the returned hash is 32 bits or 64
|
||||
* bits. 32 bit hashes will return 0 for the minor hash.
|
||||
*/
|
||||
int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
|
||||
static int __ext4fs_dirhash(const char *name, int len,
|
||||
struct dx_hash_info *hinfo)
|
||||
{
|
||||
__u32 hash;
|
||||
__u32 minor_hash = 0;
|
||||
@ -268,3 +270,33 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
|
||||
hinfo->minor_hash = minor_hash;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
|
||||
struct dx_hash_info *hinfo)
|
||||
{
|
||||
#ifdef CONFIG_UNICODE
|
||||
const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
|
||||
int r, dlen;
|
||||
unsigned char *buff;
|
||||
struct qstr qstr = {.name = name, .len = len };
|
||||
|
||||
if (len && IS_CASEFOLDED(dir)) {
|
||||
buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
|
||||
if (!buff)
|
||||
return -ENOMEM;
|
||||
|
||||
dlen = utf8_casefold(um, &qstr, buff, PATH_MAX);
|
||||
if (dlen < 0) {
|
||||
kfree(buff);
|
||||
goto opaque_seq;
|
||||
}
|
||||
|
||||
r = __ext4fs_dirhash(buff, dlen, hinfo);
|
||||
|
||||
kfree(buff);
|
||||
return r;
|
||||
}
|
||||
opaque_seq:
|
||||
#endif
|
||||
return __ext4fs_dirhash(name, len, hinfo);
|
||||
}
|
||||
|
@ -455,7 +455,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
||||
if (qstr) {
|
||||
hinfo.hash_version = DX_HASH_HALF_MD4;
|
||||
hinfo.seed = sbi->s_hash_seed;
|
||||
ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
|
||||
ext4fs_dirhash(parent, qstr->name, qstr->len, &hinfo);
|
||||
grp = hinfo.hash;
|
||||
} else
|
||||
grp = prandom_u32();
|
||||
|
@ -1407,7 +1407,7 @@ int htree_inlinedir_to_tree(struct file *dir_file,
|
||||
}
|
||||
}
|
||||
|
||||
ext4fs_dirhash(de->name, de->name_len, hinfo);
|
||||
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
|
||||
if ((hinfo->hash < start_hash) ||
|
||||
((hinfo->hash == start_hash) &&
|
||||
(hinfo->minor_hash < start_minor_hash)))
|
||||
|
@ -399,6 +399,10 @@ static int __check_block_validity(struct inode *inode, const char *func,
|
||||
unsigned int line,
|
||||
struct ext4_map_blocks *map)
|
||||
{
|
||||
if (ext4_has_feature_journal(inode->i_sb) &&
|
||||
(inode->i_ino ==
|
||||
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
|
||||
return 0;
|
||||
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
|
||||
map->m_len)) {
|
||||
ext4_error_inode(inode, func, line, map->m_pblk,
|
||||
@ -541,7 +545,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
map->m_len = retval;
|
||||
retval = 0;
|
||||
} else {
|
||||
BUG_ON(1);
|
||||
BUG();
|
||||
}
|
||||
#ifdef ES_AGGRESSIVE_TEST
|
||||
ext4_map_blocks_es_recheck(handle, inode, map,
|
||||
@ -1876,7 +1880,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
||||
else if (ext4_es_is_unwritten(&es))
|
||||
map->m_flags |= EXT4_MAP_UNWRITTEN;
|
||||
else
|
||||
BUG_ON(1);
|
||||
BUG();
|
||||
|
||||
#ifdef ES_AGGRESSIVE_TEST
|
||||
ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
|
||||
@ -4738,9 +4742,11 @@ void ext4_set_inode_flags(struct inode *inode)
|
||||
new_fl |= S_DAX;
|
||||
if (flags & EXT4_ENCRYPT_FL)
|
||||
new_fl |= S_ENCRYPTED;
|
||||
if (flags & EXT4_CASEFOLD_FL)
|
||||
new_fl |= S_CASEFOLD;
|
||||
inode_set_flags(inode, new_fl,
|
||||
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
|
||||
S_ENCRYPTED);
|
||||
S_ENCRYPTED|S_CASEFOLD);
|
||||
}
|
||||
|
||||
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
|
||||
|
@ -278,6 +278,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
|
||||
struct ext4_iloc iloc;
|
||||
unsigned int oldflags, mask, i;
|
||||
unsigned int jflag;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
/* Is it quota file? Do not allow user to mess with it */
|
||||
if (ext4_is_quota_file(inode))
|
||||
@ -322,6 +323,23 @@ static int ext4_ioctl_setflags(struct inode *inode,
|
||||
goto flags_out;
|
||||
}
|
||||
|
||||
if ((flags ^ oldflags) & EXT4_CASEFOLD_FL) {
|
||||
if (!ext4_has_feature_casefold(sb)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto flags_out;
|
||||
}
|
||||
|
||||
if (!S_ISDIR(inode->i_mode)) {
|
||||
err = -ENOTDIR;
|
||||
goto flags_out;
|
||||
}
|
||||
|
||||
if (!ext4_empty_dir(inode)) {
|
||||
err = -ENOTEMPTY;
|
||||
goto flags_out;
|
||||
}
|
||||
}
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
@ -978,7 +996,7 @@ mext_out:
|
||||
if (err == 0)
|
||||
err = err2;
|
||||
mnt_drop_write_file(filp);
|
||||
if (!err && (o_group > EXT4_SB(sb)->s_groups_count) &&
|
||||
if (!err && (o_group < EXT4_SB(sb)->s_groups_count) &&
|
||||
ext4_has_group_desc_csum(sb) &&
|
||||
test_opt(sb, INIT_INODE_TABLE))
|
||||
err = ext4_register_li_request(sb, o_group);
|
||||
|
@ -1539,7 +1539,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,
|
||||
ex->fe_len += 1 << order;
|
||||
}
|
||||
|
||||
if (ex->fe_start + ex->fe_len > (1 << (e4b->bd_blkbits + 3))) {
|
||||
if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
|
||||
/* Should never happen! (but apparently sometimes does?!?) */
|
||||
WARN_ON(1);
|
||||
ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
|
||||
@ -2490,6 +2490,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
|
||||
sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
|
||||
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
cond_resched();
|
||||
desc = ext4_get_group_desc(sb, i, NULL);
|
||||
if (desc == NULL) {
|
||||
ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
|
||||
@ -2705,6 +2706,7 @@ int ext4_mb_release(struct super_block *sb)
|
||||
|
||||
if (sbi->s_group_info) {
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
cond_resched();
|
||||
grinfo = ext4_get_group_info(sb, i);
|
||||
#ifdef DOUBLE_CHECK
|
||||
kfree(grinfo->bb_bitmap);
|
||||
|
107
fs/ext4/namei.c
107
fs/ext4/namei.c
@ -35,6 +35,7 @@
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/unicode.h>
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
|
||||
@ -629,7 +630,7 @@ static struct stats dx_show_leaf(struct inode *dir,
|
||||
}
|
||||
if (!fscrypt_has_encryption_key(dir)) {
|
||||
/* Directory is not encrypted */
|
||||
ext4fs_dirhash(de->name,
|
||||
ext4fs_dirhash(dir, de->name,
|
||||
de->name_len, &h);
|
||||
printk("%*.s:(U)%x.%u ", len,
|
||||
name, h.hash,
|
||||
@ -662,8 +663,8 @@ static struct stats dx_show_leaf(struct inode *dir,
|
||||
name = fname_crypto_str.name;
|
||||
len = fname_crypto_str.len;
|
||||
}
|
||||
ext4fs_dirhash(de->name, de->name_len,
|
||||
&h);
|
||||
ext4fs_dirhash(dir, de->name,
|
||||
de->name_len, &h);
|
||||
printk("%*.s:(E)%x.%u ", len, name,
|
||||
h.hash, (unsigned) ((char *) de
|
||||
- base));
|
||||
@ -673,7 +674,7 @@ static struct stats dx_show_leaf(struct inode *dir,
|
||||
#else
|
||||
int len = de->name_len;
|
||||
char *name = de->name;
|
||||
ext4fs_dirhash(de->name, de->name_len, &h);
|
||||
ext4fs_dirhash(dir, de->name, de->name_len, &h);
|
||||
printk("%*.s:%x.%u ", len, name, h.hash,
|
||||
(unsigned) ((char *) de - base));
|
||||
#endif
|
||||
@ -762,7 +763,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
|
||||
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
|
||||
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
|
||||
if (fname && fname_name(fname))
|
||||
ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo);
|
||||
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
|
||||
hash = hinfo->hash;
|
||||
|
||||
if (root->info.unused_flags & 1) {
|
||||
@ -1008,7 +1009,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
|
||||
/* silently ignore the rest of the block */
|
||||
break;
|
||||
}
|
||||
ext4fs_dirhash(de->name, de->name_len, hinfo);
|
||||
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
|
||||
if ((hinfo->hash < start_hash) ||
|
||||
((hinfo->hash == start_hash) &&
|
||||
(hinfo->minor_hash < start_minor_hash)))
|
||||
@ -1197,7 +1198,7 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
|
||||
|
||||
while ((char *) de < base + blocksize) {
|
||||
if (de->name_len && de->inode) {
|
||||
ext4fs_dirhash(de->name, de->name_len, &h);
|
||||
ext4fs_dirhash(dir, de->name, de->name_len, &h);
|
||||
map_tail--;
|
||||
map_tail->hash = h.hash;
|
||||
map_tail->offs = ((char *) de - base)>>2;
|
||||
@ -1252,15 +1253,52 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
|
||||
dx_set_count(entries, count + 1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
/*
|
||||
* Test whether a case-insensitive directory entry matches the filename
|
||||
* being searched for.
|
||||
*
|
||||
* Returns: 0 if the directory entry matches, more than 0 if it
|
||||
* doesn't match or less than zero on error.
|
||||
*/
|
||||
int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
|
||||
const struct qstr *entry)
|
||||
{
|
||||
const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
|
||||
const struct unicode_map *um = sbi->s_encoding;
|
||||
int ret;
|
||||
|
||||
ret = utf8_strncasecmp(um, name, entry);
|
||||
if (ret < 0) {
|
||||
/* Handle invalid character sequence as either an error
|
||||
* or as an opaque byte sequence.
|
||||
*/
|
||||
if (ext4_has_strict_mode(sbi))
|
||||
return -EINVAL;
|
||||
|
||||
if (name->len != entry->len)
|
||||
return 1;
|
||||
|
||||
return !!memcmp(name->name, entry->name, name->len);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Test whether a directory entry matches the filename being searched for.
|
||||
*
|
||||
* Return: %true if the directory entry matches, otherwise %false.
|
||||
*/
|
||||
static inline bool ext4_match(const struct ext4_filename *fname,
|
||||
static inline bool ext4_match(const struct inode *parent,
|
||||
const struct ext4_filename *fname,
|
||||
const struct ext4_dir_entry_2 *de)
|
||||
{
|
||||
struct fscrypt_name f;
|
||||
#ifdef CONFIG_UNICODE
|
||||
const struct qstr entry = {.name = de->name, .len = de->name_len};
|
||||
#endif
|
||||
|
||||
if (!de->inode)
|
||||
return false;
|
||||
@ -1270,6 +1308,12 @@ static inline bool ext4_match(const struct ext4_filename *fname,
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
f.crypto_buf = fname->crypto_buf;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent))
|
||||
return (ext4_ci_compare(parent, fname->usr_fname, &entry) == 0);
|
||||
#endif
|
||||
|
||||
return fscrypt_match_name(&f, de->name, de->name_len);
|
||||
}
|
||||
|
||||
@ -1290,7 +1334,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
|
||||
/* this code is executed quadratically often */
|
||||
/* do minimal checking `by hand' */
|
||||
if ((char *) de + de->name_len <= dlimit &&
|
||||
ext4_match(fname, de)) {
|
||||
ext4_match(dir, fname, de)) {
|
||||
/* found a match - just to be sure, do
|
||||
* a full check */
|
||||
if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
|
||||
@ -1588,6 +1632,17 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
|
||||
return ERR_PTR(-EPERM);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
if (!inode && IS_CASEFOLDED(dir)) {
|
||||
/* Eventually we want to call d_add_ci(dentry, NULL)
|
||||
* for negative dentries in the encoding case as
|
||||
* well. For now, prevent the negative dentry
|
||||
* from being cached.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
return d_splice_alias(inode, dentry);
|
||||
}
|
||||
|
||||
@ -1798,7 +1853,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
|
||||
if (ext4_check_dir_entry(dir, NULL, de, bh,
|
||||
buf, buf_size, offset))
|
||||
return -EFSCORRUPTED;
|
||||
if (ext4_match(fname, de))
|
||||
if (ext4_match(dir, fname, de))
|
||||
return -EEXIST;
|
||||
nlen = EXT4_DIR_REC_LEN(de->name_len);
|
||||
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
|
||||
@ -1983,7 +2038,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
|
||||
if (fname->hinfo.hash_version <= DX_HASH_TEA)
|
||||
fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
|
||||
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
|
||||
ext4fs_dirhash(fname_name(fname), fname_len(fname), &fname->hinfo);
|
||||
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), &fname->hinfo);
|
||||
|
||||
memset(frames, 0, sizeof(frames));
|
||||
frame = frames;
|
||||
@ -2036,6 +2091,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
|
||||
struct ext4_dir_entry_2 *de;
|
||||
struct ext4_dir_entry_tail *t;
|
||||
struct super_block *sb;
|
||||
struct ext4_sb_info *sbi;
|
||||
struct ext4_filename fname;
|
||||
int retval;
|
||||
int dx_fallback=0;
|
||||
@ -2047,10 +2103,17 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
|
||||
csum_size = sizeof(struct ext4_dir_entry_tail);
|
||||
|
||||
sb = dir->i_sb;
|
||||
sbi = EXT4_SB(sb);
|
||||
blocksize = sb->s_blocksize;
|
||||
if (!dentry->d_name.len)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
|
||||
utf8_validate(sbi->s_encoding, &dentry->d_name))
|
||||
return -EINVAL;
|
||||
#endif
|
||||
|
||||
retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
|
||||
if (retval)
|
||||
return retval;
|
||||
@ -2975,6 +3038,17 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
ext4_update_dx_flag(dir);
|
||||
ext4_mark_inode_dirty(handle, dir);
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
/* VFS negative dentries are incompatible with Encoding and
|
||||
* Case-insensitiveness. Eventually we'll want avoid
|
||||
* invalidating the dentries here, alongside with returning the
|
||||
* negative dentries at ext4_lookup(), when it is better
|
||||
* supported by the VFS for the CI case.
|
||||
*/
|
||||
if (IS_CASEFOLDED(dir))
|
||||
d_invalidate(dentry);
|
||||
#endif
|
||||
|
||||
end_rmdir:
|
||||
brelse(bh);
|
||||
if (handle)
|
||||
@ -3044,6 +3118,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
|
||||
inode->i_ctime = current_time(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
/* VFS negative dentries are incompatible with Encoding and
|
||||
* Case-insensitiveness. Eventually we'll want avoid
|
||||
* invalidating the dentries here, alongside with returning the
|
||||
* negative dentries at ext4_lookup(), when it is better
|
||||
* supported by the VFS for the CI case.
|
||||
*/
|
||||
if (IS_CASEFOLDED(dir))
|
||||
d_invalidate(dentry);
|
||||
#endif
|
||||
|
||||
end_unlink:
|
||||
brelse(bh);
|
||||
if (handle)
|
||||
|
@ -126,9 +126,10 @@ int ext4_mpage_readpages(struct address_space *mapping,
|
||||
int fully_mapped = 1;
|
||||
unsigned first_hole = blocks_per_page;
|
||||
|
||||
prefetchw(&page->flags);
|
||||
if (pages) {
|
||||
page = lru_to_page(pages);
|
||||
|
||||
prefetchw(&page->flags);
|
||||
list_del(&page->lru);
|
||||
if (add_to_page_cache_lru(page, mapping, page->index,
|
||||
readahead_gfp_mask(mapping)))
|
||||
|
@ -874,6 +874,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
|
||||
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
|
||||
if (unlikely(err)) {
|
||||
ext4_std_error(sb, err);
|
||||
iloc.bh = NULL;
|
||||
goto errout;
|
||||
}
|
||||
brelse(dind);
|
||||
|
151
fs/ext4/super.c
151
fs/ext4/super.c
@ -42,6 +42,7 @@
|
||||
#include <linux/cleancache.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/unicode.h>
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
@ -1054,6 +1055,9 @@ static void ext4_put_super(struct super_block *sb)
|
||||
crypto_free_shash(sbi->s_chksum_driver);
|
||||
kfree(sbi->s_blockgroup_lock);
|
||||
fs_put_dax(sbi->s_daxdev);
|
||||
#ifdef CONFIG_UNICODE
|
||||
utf8_unload(sbi->s_encoding);
|
||||
#endif
|
||||
kfree(sbi);
|
||||
}
|
||||
|
||||
@ -1749,6 +1753,36 @@ static const struct mount_opts {
|
||||
{Opt_err, 0, 0}
|
||||
};
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
static const struct ext4_sb_encodings {
|
||||
__u16 magic;
|
||||
char *name;
|
||||
char *version;
|
||||
} ext4_sb_encoding_map[] = {
|
||||
{EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
|
||||
};
|
||||
|
||||
static int ext4_sb_read_encoding(const struct ext4_super_block *es,
|
||||
const struct ext4_sb_encodings **encoding,
|
||||
__u16 *flags)
|
||||
{
|
||||
__u16 magic = le16_to_cpu(es->s_encoding);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
|
||||
if (magic == ext4_sb_encoding_map[i].magic)
|
||||
break;
|
||||
|
||||
if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
|
||||
return -EINVAL;
|
||||
|
||||
*encoding = &ext4_sb_encoding_map[i];
|
||||
*flags = le16_to_cpu(es->s_encoding_flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int handle_mount_opt(struct super_block *sb, char *opt, int token,
|
||||
substring_t *args, unsigned long *journal_devnum,
|
||||
unsigned int *journal_ioprio, int is_remount)
|
||||
@ -2875,6 +2909,15 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_UNICODE
|
||||
if (ext4_has_feature_casefold(sb)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Filesystem with casefold feature cannot be "
|
||||
"mounted without CONFIG_UNICODE");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (readonly)
|
||||
return 1;
|
||||
|
||||
@ -3496,6 +3539,37 @@ int ext4_calculate_overhead(struct super_block *sb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ext4_clamp_want_extra_isize(struct super_block *sb)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
|
||||
/* determine the minimum size of new large inodes, if present */
|
||||
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
|
||||
sbi->s_want_extra_isize == 0) {
|
||||
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
|
||||
EXT4_GOOD_OLD_INODE_SIZE;
|
||||
if (ext4_has_feature_extra_isize(sb)) {
|
||||
if (sbi->s_want_extra_isize <
|
||||
le16_to_cpu(es->s_want_extra_isize))
|
||||
sbi->s_want_extra_isize =
|
||||
le16_to_cpu(es->s_want_extra_isize);
|
||||
if (sbi->s_want_extra_isize <
|
||||
le16_to_cpu(es->s_min_extra_isize))
|
||||
sbi->s_want_extra_isize =
|
||||
le16_to_cpu(es->s_min_extra_isize);
|
||||
}
|
||||
}
|
||||
/* Check if enough inode space is available */
|
||||
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
|
||||
sbi->s_inode_size) {
|
||||
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
|
||||
EXT4_GOOD_OLD_INODE_SIZE;
|
||||
ext4_msg(sb, KERN_INFO,
|
||||
"required extra inode space not available");
|
||||
}
|
||||
}
|
||||
|
||||
static void ext4_set_resv_clusters(struct super_block *sb)
|
||||
{
|
||||
ext4_fsblk_t resv_clusters;
|
||||
@ -3722,6 +3796,43 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
&journal_ioprio, 0))
|
||||
goto failed_mount;
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) {
|
||||
const struct ext4_sb_encodings *encoding_info;
|
||||
struct unicode_map *encoding;
|
||||
__u16 encoding_flags;
|
||||
|
||||
if (ext4_has_feature_encrypt(sb)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Can't mount with encoding and encryption");
|
||||
goto failed_mount;
|
||||
}
|
||||
|
||||
if (ext4_sb_read_encoding(es, &encoding_info,
|
||||
&encoding_flags)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Encoding requested by superblock is unknown");
|
||||
goto failed_mount;
|
||||
}
|
||||
|
||||
encoding = utf8_load(encoding_info->version);
|
||||
if (IS_ERR(encoding)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"can't mount with superblock charset: %s-%s "
|
||||
"not supported by the kernel. flags: 0x%x.",
|
||||
encoding_info->name, encoding_info->version,
|
||||
encoding_flags);
|
||||
goto failed_mount;
|
||||
}
|
||||
ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
|
||||
"%s-%s with flags 0x%hx", encoding_info->name,
|
||||
encoding_info->version?:"\b", encoding_flags);
|
||||
|
||||
sbi->s_encoding = encoding;
|
||||
sbi->s_encoding_flags = encoding_flags;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
|
||||
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
|
||||
"with data=journal disables delayed "
|
||||
@ -4219,7 +4330,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
"data=, fs mounted w/o journal");
|
||||
goto failed_mount_wq;
|
||||
}
|
||||
sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM;
|
||||
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
|
||||
clear_opt(sb, JOURNAL_CHECKSUM);
|
||||
clear_opt(sb, DATA_FLAGS);
|
||||
sbi->s_journal = NULL;
|
||||
@ -4354,6 +4465,12 @@ no_journal:
|
||||
iput(root);
|
||||
goto failed_mount4;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
if (sbi->s_encoding)
|
||||
sb->s_d_op = &ext4_dentry_ops;
|
||||
#endif
|
||||
|
||||
sb->s_root = d_make_root(root);
|
||||
if (!sb->s_root) {
|
||||
ext4_msg(sb, KERN_ERR, "get root dentry failed");
|
||||
@ -4368,30 +4485,7 @@ no_journal:
|
||||
} else if (ret)
|
||||
goto failed_mount4a;
|
||||
|
||||
/* determine the minimum size of new large inodes, if present */
|
||||
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
|
||||
sbi->s_want_extra_isize == 0) {
|
||||
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
|
||||
EXT4_GOOD_OLD_INODE_SIZE;
|
||||
if (ext4_has_feature_extra_isize(sb)) {
|
||||
if (sbi->s_want_extra_isize <
|
||||
le16_to_cpu(es->s_want_extra_isize))
|
||||
sbi->s_want_extra_isize =
|
||||
le16_to_cpu(es->s_want_extra_isize);
|
||||
if (sbi->s_want_extra_isize <
|
||||
le16_to_cpu(es->s_min_extra_isize))
|
||||
sbi->s_want_extra_isize =
|
||||
le16_to_cpu(es->s_min_extra_isize);
|
||||
}
|
||||
}
|
||||
/* Check if enough inode space is available */
|
||||
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
|
||||
sbi->s_inode_size) {
|
||||
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
|
||||
EXT4_GOOD_OLD_INODE_SIZE;
|
||||
ext4_msg(sb, KERN_INFO, "required extra inode space not"
|
||||
"available");
|
||||
}
|
||||
ext4_clamp_want_extra_isize(sb);
|
||||
|
||||
ext4_set_resv_clusters(sb);
|
||||
|
||||
@ -4559,6 +4653,11 @@ failed_mount2:
|
||||
failed_mount:
|
||||
if (sbi->s_chksum_driver)
|
||||
crypto_free_shash(sbi->s_chksum_driver);
|
||||
|
||||
#ifdef CONFIG_UNICODE
|
||||
utf8_unload(sbi->s_encoding);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_QUOTA
|
||||
for (i = 0; i < EXT4_MAXQUOTAS; i++)
|
||||
kfree(sbi->s_qf_names[i]);
|
||||
@ -5175,6 +5274,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
goto restore_opts;
|
||||
}
|
||||
|
||||
ext4_clamp_want_extra_isize(sb);
|
||||
|
||||
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
|
||||
test_opt(sb, JOURNAL_CHECKSUM)) {
|
||||
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
|
||||
|
@ -238,6 +238,9 @@ EXT4_ATTR_FEATURE(meta_bg_resize);
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
EXT4_ATTR_FEATURE(encryption);
|
||||
#endif
|
||||
#ifdef CONFIG_UNICODE
|
||||
EXT4_ATTR_FEATURE(casefold);
|
||||
#endif
|
||||
EXT4_ATTR_FEATURE(metadata_csum_seed);
|
||||
|
||||
static struct attribute *ext4_feat_attrs[] = {
|
||||
@ -246,6 +249,9 @@ static struct attribute *ext4_feat_attrs[] = {
|
||||
ATTR_LIST(meta_bg_resize),
|
||||
#ifdef CONFIG_FS_ENCRYPTION
|
||||
ATTR_LIST(encryption),
|
||||
#endif
|
||||
#ifdef CONFIG_UNICODE
|
||||
ATTR_LIST(casefold),
|
||||
#endif
|
||||
ATTR_LIST(metadata_csum_seed),
|
||||
NULL,
|
||||
|
@ -1696,7 +1696,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
|
||||
|
||||
/* No failures allowed past this point. */
|
||||
|
||||
if (!s->not_found && here->e_value_size && here->e_value_offs) {
|
||||
if (!s->not_found && here->e_value_size && !here->e_value_inum) {
|
||||
/* Remove the old value. */
|
||||
void *first_val = s->base + min_offs;
|
||||
size_t offs = le16_to_cpu(here->e_value_offs);
|
||||
|
@ -132,7 +132,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
|
||||
return;
|
||||
}
|
||||
spin_lock(&journal->j_list_lock);
|
||||
nblocks = jbd2_space_needed(journal);
|
||||
space_left = jbd2_log_space_left(journal);
|
||||
if (space_left < nblocks) {
|
||||
int chkpt = journal->j_checkpoint_transactions != NULL;
|
||||
|
@ -1350,6 +1350,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
|
||||
journal_superblock_t *sb = journal->j_superblock;
|
||||
int ret;
|
||||
|
||||
/* Buffer got discarded which means block device got invalidated */
|
||||
if (!buffer_mapped(bh))
|
||||
return -EIO;
|
||||
|
||||
trace_jbd2_write_superblock(journal, write_flags);
|
||||
if (!(journal->j_flags & JBD2_BARRIER))
|
||||
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
|
||||
|
2
fs/unicode/.gitignore
vendored
Normal file
2
fs/unicode/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
mkutf8data
|
||||
utf8data.h
|
13
fs/unicode/Kconfig
Normal file
13
fs/unicode/Kconfig
Normal file
@ -0,0 +1,13 @@
|
||||
#
|
||||
# UTF-8 normalization
|
||||
#
|
||||
config UNICODE
|
||||
bool "UTF-8 normalization and casefolding support"
|
||||
help
|
||||
Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding
|
||||
support.
|
||||
|
||||
config UNICODE_NORMALIZATION_SELFTEST
|
||||
tristate "Test UTF-8 normalization support"
|
||||
depends on UNICODE
|
||||
default n
|
38
fs/unicode/Makefile
Normal file
38
fs/unicode/Makefile
Normal file
@ -0,0 +1,38 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
obj-$(CONFIG_UNICODE) += unicode.o
|
||||
obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o
|
||||
|
||||
unicode-y := utf8-norm.o utf8-core.o
|
||||
|
||||
$(obj)/utf8-norm.o: $(obj)/utf8data.h
|
||||
|
||||
# In the normal build, the checked-in utf8data.h is just shipped.
|
||||
#
|
||||
# To generate utf8data.h from UCD, put *.txt files in this directory
|
||||
# and pass REGENERATE_UTF8DATA=1 from the command line.
|
||||
ifdef REGENERATE_UTF8DATA
|
||||
|
||||
quiet_cmd_utf8data = GEN $@
|
||||
cmd_utf8data = $< \
|
||||
-a $(srctree)/$(src)/DerivedAge.txt \
|
||||
-c $(srctree)/$(src)/DerivedCombiningClass.txt \
|
||||
-p $(srctree)/$(src)/DerivedCoreProperties.txt \
|
||||
-d $(srctree)/$(src)/UnicodeData.txt \
|
||||
-f $(srctree)/$(src)/CaseFolding.txt \
|
||||
-n $(srctree)/$(src)/NormalizationCorrections.txt \
|
||||
-t $(srctree)/$(src)/NormalizationTest.txt \
|
||||
-o $@
|
||||
|
||||
$(obj)/utf8data.h: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
|
||||
$(call if_changed,utf8data)
|
||||
|
||||
else
|
||||
|
||||
$(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE
|
||||
$(call if_changed,shipped)
|
||||
|
||||
endif
|
||||
|
||||
targets += utf8data.h
|
||||
hostprogs-y += mkutf8data
|
71
fs/unicode/README.utf8data
Normal file
71
fs/unicode/README.utf8data
Normal file
@ -0,0 +1,71 @@
|
||||
The utf8data.h file in this directory is generated from the Unicode
|
||||
Character Database for version 12.1.0 of the Unicode standard.
|
||||
|
||||
The full set of files can be found here:
|
||||
|
||||
http://www.unicode.org/Public/12.1.0/ucd/
|
||||
|
||||
Note!
|
||||
|
||||
The URL's listed below are not stable. That's because Unicode 12.1.0
|
||||
has not been officially released yet; it is scheduled to be released
|
||||
on May 8, 2019. We taking Unicode 12.1.0 a few weeks early because it
|
||||
contains a new Japanese character which is required in order to
|
||||
specify Japenese dates after May 1, 2019, when Crown Prince Naruhito
|
||||
ascends to the Chrysanthemum Throne. (Isn't internationalization fun?
|
||||
The abdication of Emperor Akihito of Japan is requiring dozens of
|
||||
software packages to be updated with only a month's notice. :-)
|
||||
|
||||
We will update the URL's (and any needed changes to the checksums)
|
||||
after the final Unicode 12.1.0 is released.
|
||||
|
||||
Individual source links:
|
||||
|
||||
https://www.unicode.org/Public/12.1.0/ucd/CaseFolding-12.1.0d2.txt
|
||||
https://www.unicode.org/Public/12.1.0/ucd/DerivedAge-12.1.0d3.txt
|
||||
https://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedCombiningClass-12.1.0d2.txt
|
||||
https://www.unicode.org/Public/12.1.0/ucd/DerivedCoreProperties-12.1.0d2.txt
|
||||
https://www.unicode.org/Public/12.1.0/ucd/NormalizationCorrections-12.1.0d1.txt
|
||||
https://www.unicode.org/Public/12.1.0/ucd/NormalizationTest-12.1.0d3.txt
|
||||
https://www.unicode.org/Public/12.1.0/ucd/UnicodeData-12.1.0d2.txt
|
||||
|
||||
md5sums (verify by running "md5sum -c README.utf8data"):
|
||||
|
||||
900e76da1d822a160fd6b8c0b1d70094 CaseFolding.txt
|
||||
131256380bff4fea8ad4a851616f2f10 DerivedAge.txt
|
||||
e731a4089b30002144e107e3d6f8d1fa DerivedCombiningClass.txt
|
||||
a47c9fbd7ff92a9b261ba9831e68778a DerivedCoreProperties.txt
|
||||
fcab6dad15e440879d92f315978f93d3 NormalizationCorrections.txt
|
||||
f9ff1c55a60decf436100f791b44aa98 NormalizationTest.txt
|
||||
755f6af699f8c8d2d958da411f78f6c6 UnicodeData.txt
|
||||
|
||||
sha1sums (verify by running "sha1sum -c README.utf8data"):
|
||||
|
||||
dc9245f6803c4ac99555c361f5052e0b13eb779b CaseFolding.txt
|
||||
3281104f237184cdb5d869e86eb8573678ada7da DerivedAge.txt
|
||||
2f5f995ccb96e0fa84b15151b35d5e2681535175 DerivedCombiningClass.txt
|
||||
5b8698a3fcd5018e1987f296b02e2c17e696415e DerivedCoreProperties.txt
|
||||
cd83935fbc012345d8792d2c704f69497e753835 NormalizationCorrections.txt
|
||||
ea419aae505b337b0d99a83fa83fe58ddff7c19f NormalizationTest.txt
|
||||
dc973c0fc93d6f09d9ab9f70d1c9f89c447f0526 UnicodeData.txt
|
||||
|
||||
|
||||
To update to the newer version of the Unicode standard, the latest
|
||||
released version of the UCD can be found here:
|
||||
|
||||
http://www.unicode.org/Public/UCD/latest/
|
||||
|
||||
Then, build under fs/unicode/ with REGENERATE_UTF8DATA=1:
|
||||
|
||||
make REGENERATE_UTF8DATA=1 fs/unicode/
|
||||
|
||||
After sanity checking the newly generated utf8data.h file (the
|
||||
version generated from the 12.1.0 UCD should be 4,109 lines long, and
|
||||
have a total size of 324k) and/or comparing it with the older version
|
||||
of utf8data.h_shipped, rename it to utf8data.h_shipped.
|
||||
|
||||
If you are a kernel developer updating to a newer version of the
|
||||
Unicode Character Database, please update this README.utf8data file
|
||||
with the version of the UCD that was used, the md5sum and sha1sums of
|
||||
the *.txt files, before checking in the new versions of the utf8data.h
|
||||
and README.utf8data files.
|
3419
fs/unicode/mkutf8data.c
Normal file
3419
fs/unicode/mkutf8data.c
Normal file
File diff suppressed because it is too large
Load Diff
187
fs/unicode/utf8-core.c
Normal file
187
fs/unicode/utf8-core.c
Normal file
@ -0,0 +1,187 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/parser.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/unicode.h>
|
||||
|
||||
#include "utf8n.h"
|
||||
|
||||
int utf8_validate(const struct unicode_map *um, const struct qstr *str)
|
||||
{
|
||||
const struct utf8data *data = utf8nfdi(um->version);
|
||||
|
||||
if (utf8nlen(data, str->name, str->len) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8_validate);
|
||||
|
||||
int utf8_strncmp(const struct unicode_map *um,
|
||||
const struct qstr *s1, const struct qstr *s2)
|
||||
{
|
||||
const struct utf8data *data = utf8nfdi(um->version);
|
||||
struct utf8cursor cur1, cur2;
|
||||
int c1, c2;
|
||||
|
||||
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
do {
|
||||
c1 = utf8byte(&cur1);
|
||||
c2 = utf8byte(&cur2);
|
||||
|
||||
if (c1 < 0 || c2 < 0)
|
||||
return -EINVAL;
|
||||
if (c1 != c2)
|
||||
return 1;
|
||||
} while (c1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8_strncmp);
|
||||
|
||||
int utf8_strncasecmp(const struct unicode_map *um,
|
||||
const struct qstr *s1, const struct qstr *s2)
|
||||
{
|
||||
const struct utf8data *data = utf8nfdicf(um->version);
|
||||
struct utf8cursor cur1, cur2;
|
||||
int c1, c2;
|
||||
|
||||
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
do {
|
||||
c1 = utf8byte(&cur1);
|
||||
c2 = utf8byte(&cur2);
|
||||
|
||||
if (c1 < 0 || c2 < 0)
|
||||
return -EINVAL;
|
||||
if (c1 != c2)
|
||||
return 1;
|
||||
} while (c1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8_strncasecmp);
|
||||
|
||||
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
|
||||
unsigned char *dest, size_t dlen)
|
||||
{
|
||||
const struct utf8data *data = utf8nfdicf(um->version);
|
||||
struct utf8cursor cur;
|
||||
size_t nlen = 0;
|
||||
|
||||
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
for (nlen = 0; nlen < dlen; nlen++) {
|
||||
int c = utf8byte(&cur);
|
||||
|
||||
dest[nlen] = c;
|
||||
if (!c)
|
||||
return nlen;
|
||||
if (c == -1)
|
||||
break;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(utf8_casefold);
|
||||
|
||||
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
|
||||
unsigned char *dest, size_t dlen)
|
||||
{
|
||||
const struct utf8data *data = utf8nfdi(um->version);
|
||||
struct utf8cursor cur;
|
||||
ssize_t nlen = 0;
|
||||
|
||||
if (utf8ncursor(&cur, data, str->name, str->len) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
for (nlen = 0; nlen < dlen; nlen++) {
|
||||
int c = utf8byte(&cur);
|
||||
|
||||
dest[nlen] = c;
|
||||
if (!c)
|
||||
return nlen;
|
||||
if (c == -1)
|
||||
break;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(utf8_normalize);
|
||||
|
||||
static int utf8_parse_version(const char *version, unsigned int *maj,
|
||||
unsigned int *min, unsigned int *rev)
|
||||
{
|
||||
substring_t args[3];
|
||||
char version_string[12];
|
||||
const struct match_token token[] = {
|
||||
{1, "%d.%d.%d"},
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
strncpy(version_string, version, sizeof(version_string));
|
||||
|
||||
if (match_token(version_string, token, args) != 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (match_int(&args[0], maj) || match_int(&args[1], min) ||
|
||||
match_int(&args[2], rev))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct unicode_map *utf8_load(const char *version)
|
||||
{
|
||||
struct unicode_map *um = NULL;
|
||||
int unicode_version;
|
||||
|
||||
if (version) {
|
||||
unsigned int maj, min, rev;
|
||||
|
||||
if (utf8_parse_version(version, &maj, &min, &rev) < 0)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (!utf8version_is_supported(maj, min, rev))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
unicode_version = UNICODE_AGE(maj, min, rev);
|
||||
} else {
|
||||
unicode_version = utf8version_latest();
|
||||
printk(KERN_WARNING"UTF-8 version not specified. "
|
||||
"Assuming latest supported version (%d.%d.%d).",
|
||||
(unicode_version >> 16) & 0xff,
|
||||
(unicode_version >> 8) & 0xff,
|
||||
(unicode_version & 0xff));
|
||||
}
|
||||
|
||||
um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
|
||||
if (!um)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
um->charset = "UTF-8";
|
||||
um->version = unicode_version;
|
||||
|
||||
return um;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8_load);
|
||||
|
||||
void utf8_unload(struct unicode_map *um)
|
||||
{
|
||||
kfree(um);
|
||||
}
|
||||
EXPORT_SYMBOL(utf8_unload);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
799
fs/unicode/utf8-norm.c
Normal file
799
fs/unicode/utf8-norm.c
Normal file
@ -0,0 +1,799 @@
|
||||
/*
|
||||
* Copyright (c) 2014 SGI.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "utf8n.h"
|
||||
|
||||
struct utf8data {
|
||||
unsigned int maxage;
|
||||
unsigned int offset;
|
||||
};
|
||||
|
||||
#define __INCLUDED_FROM_UTF8NORM_C__
|
||||
#include "utf8data.h"
|
||||
#undef __INCLUDED_FROM_UTF8NORM_C__
|
||||
|
||||
int utf8version_is_supported(u8 maj, u8 min, u8 rev)
|
||||
{
|
||||
int i = ARRAY_SIZE(utf8agetab) - 1;
|
||||
unsigned int sb_utf8version = UNICODE_AGE(maj, min, rev);
|
||||
|
||||
while (i >= 0 && utf8agetab[i] != 0) {
|
||||
if (sb_utf8version == utf8agetab[i])
|
||||
return 1;
|
||||
i--;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8version_is_supported);
|
||||
|
||||
int utf8version_latest(void)
|
||||
{
|
||||
return utf8vers;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8version_latest);
|
||||
|
||||
/*
|
||||
* UTF-8 valid ranges.
|
||||
*
|
||||
* The UTF-8 encoding spreads the bits of a 32bit word over several
|
||||
* bytes. This table gives the ranges that can be held and how they'd
|
||||
* be represented.
|
||||
*
|
||||
* 0x00000000 0x0000007F: 0xxxxxxx
|
||||
* 0x00000000 0x000007FF: 110xxxxx 10xxxxxx
|
||||
* 0x00000000 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
* 0x00000000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
* 0x00000000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
* 0x00000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
*
|
||||
* There is an additional requirement on UTF-8, in that only the
|
||||
* shortest representation of a 32bit value is to be used. A decoder
|
||||
* must not decode sequences that do not satisfy this requirement.
|
||||
* Thus the allowed ranges have a lower bound.
|
||||
*
|
||||
* 0x00000000 0x0000007F: 0xxxxxxx
|
||||
* 0x00000080 0x000007FF: 110xxxxx 10xxxxxx
|
||||
* 0x00000800 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
* 0x00010000 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
* 0x00200000 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
* 0x04000000 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
*
|
||||
* Actual unicode characters are limited to the range 0x0 - 0x10FFFF,
|
||||
* 17 planes of 65536 values. This limits the sequences actually seen
|
||||
* even more, to just the following.
|
||||
*
|
||||
* 0 - 0x7F: 0 - 0x7F
|
||||
* 0x80 - 0x7FF: 0xC2 0x80 - 0xDF 0xBF
|
||||
* 0x800 - 0xFFFF: 0xE0 0xA0 0x80 - 0xEF 0xBF 0xBF
|
||||
* 0x10000 - 0x10FFFF: 0xF0 0x90 0x80 0x80 - 0xF4 0x8F 0xBF 0xBF
|
||||
*
|
||||
* Within those ranges the surrogates 0xD800 - 0xDFFF are not allowed.
|
||||
*
|
||||
* Note that the longest sequence seen with valid usage is 4 bytes,
|
||||
* the same a single UTF-32 character. This makes the UTF-8
|
||||
* representation of Unicode strictly smaller than UTF-32.
|
||||
*
|
||||
* The shortest sequence requirement was introduced by:
|
||||
* Corrigendum #1: UTF-8 Shortest Form
|
||||
* It can be found here:
|
||||
* http://www.unicode.org/versions/corrigendum1.html
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Return the number of bytes used by the current UTF-8 sequence.
|
||||
* Assumes the input points to the first byte of a valid UTF-8
|
||||
* sequence.
|
||||
*/
|
||||
static inline int utf8clen(const char *s)
|
||||
{
|
||||
unsigned char c = *s;
|
||||
|
||||
return 1 + (c >= 0xC0) + (c >= 0xE0) + (c >= 0xF0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode a 3-byte UTF-8 sequence.
|
||||
*/
|
||||
static unsigned int
|
||||
utf8decode3(const char *str)
|
||||
{
|
||||
unsigned int uc;
|
||||
|
||||
uc = *str++ & 0x0F;
|
||||
uc <<= 6;
|
||||
uc |= *str++ & 0x3F;
|
||||
uc <<= 6;
|
||||
uc |= *str++ & 0x3F;
|
||||
|
||||
return uc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Encode a 3-byte UTF-8 sequence.
|
||||
*/
|
||||
static int
|
||||
utf8encode3(char *str, unsigned int val)
|
||||
{
|
||||
str[2] = (val & 0x3F) | 0x80;
|
||||
val >>= 6;
|
||||
str[1] = (val & 0x3F) | 0x80;
|
||||
val >>= 6;
|
||||
str[0] = val | 0xE0;
|
||||
|
||||
return 3;
|
||||
}
|
||||
|
||||
/*
|
||||
* utf8trie_t
|
||||
*
|
||||
* A compact binary tree, used to decode UTF-8 characters.
|
||||
*
|
||||
* Internal nodes are one byte for the node itself, and up to three
|
||||
* bytes for an offset into the tree. The first byte contains the
|
||||
* following information:
|
||||
* NEXTBYTE - flag - advance to next byte if set
|
||||
* BITNUM - 3 bit field - the bit number to tested
|
||||
* OFFLEN - 2 bit field - number of bytes in the offset
|
||||
* if offlen == 0 (non-branching node)
|
||||
* RIGHTPATH - 1 bit field - set if the following node is for the
|
||||
* right-hand path (tested bit is set)
|
||||
* TRIENODE - 1 bit field - set if the following node is an internal
|
||||
* node, otherwise it is a leaf node
|
||||
* if offlen != 0 (branching node)
|
||||
* LEFTNODE - 1 bit field - set if the left-hand node is internal
|
||||
* RIGHTNODE - 1 bit field - set if the right-hand node is internal
|
||||
*
|
||||
* Due to the way utf8 works, there cannot be branching nodes with
|
||||
* NEXTBYTE set, and moreover those nodes always have a righthand
|
||||
* descendant.
|
||||
*/
|
||||
typedef const unsigned char utf8trie_t;
|
||||
#define BITNUM 0x07
|
||||
#define NEXTBYTE 0x08
|
||||
#define OFFLEN 0x30
|
||||
#define OFFLEN_SHIFT 4
|
||||
#define RIGHTPATH 0x40
|
||||
#define TRIENODE 0x80
|
||||
#define RIGHTNODE 0x40
|
||||
#define LEFTNODE 0x80
|
||||
|
||||
/*
|
||||
* utf8leaf_t
|
||||
*
|
||||
* The leaves of the trie are embedded in the trie, and so the same
|
||||
* underlying datatype: unsigned char.
|
||||
*
|
||||
* leaf[0]: The unicode version, stored as a generation number that is
|
||||
* an index into utf8agetab[]. With this we can filter code
|
||||
* points based on the unicode version in which they were
|
||||
* defined. The CCC of a non-defined code point is 0.
|
||||
* leaf[1]: Canonical Combining Class. During normalization, we need
|
||||
* to do a stable sort into ascending order of all characters
|
||||
* with a non-zero CCC that occur between two characters with
|
||||
* a CCC of 0, or at the begin or end of a string.
|
||||
* The unicode standard guarantees that all CCC values are
|
||||
* between 0 and 254 inclusive, which leaves 255 available as
|
||||
* a special value.
|
||||
* Code points with CCC 0 are known as stoppers.
|
||||
* leaf[2]: Decomposition. If leaf[1] == 255, then leaf[2] is the
|
||||
* start of a NUL-terminated string that is the decomposition
|
||||
* of the character.
|
||||
* The CCC of a decomposable character is the same as the CCC
|
||||
* of the first character of its decomposition.
|
||||
* Some characters decompose as the empty string: these are
|
||||
* characters with the Default_Ignorable_Code_Point property.
|
||||
* These do affect normalization, as they all have CCC 0.
|
||||
*
|
||||
* The decompositions in the trie have been fully expanded, with the
|
||||
* exception of Hangul syllables, which are decomposed algorithmically.
|
||||
*
|
||||
* Casefolding, if applicable, is also done using decompositions.
|
||||
*
|
||||
* The trie is constructed in such a way that leaves exist for all
|
||||
* UTF-8 sequences that match the criteria from the "UTF-8 valid
|
||||
* ranges" comment above, and only for those sequences. Therefore a
|
||||
* lookup in the trie can be used to validate the UTF-8 input.
|
||||
*/
|
||||
typedef const unsigned char utf8leaf_t;
|
||||
|
||||
#define LEAF_GEN(LEAF) ((LEAF)[0])
|
||||
#define LEAF_CCC(LEAF) ((LEAF)[1])
|
||||
#define LEAF_STR(LEAF) ((const char *)((LEAF) + 2))
|
||||
|
||||
#define MINCCC (0)
|
||||
#define MAXCCC (254)
|
||||
#define STOPPER (0)
|
||||
#define DECOMPOSE (255)
|
||||
|
||||
/* Marker for hangul syllable decomposition. */
|
||||
#define HANGUL ((char)(255))
|
||||
/* Size of the synthesized leaf used for Hangul syllable decomposition. */
|
||||
#define UTF8HANGULLEAF (12)
|
||||
|
||||
/*
|
||||
* Hangul decomposition (algorithm from Section 3.12 of Unicode 6.3.0)
|
||||
*
|
||||
* AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;;
|
||||
* D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;;
|
||||
*
|
||||
* SBase = 0xAC00
|
||||
* LBase = 0x1100
|
||||
* VBase = 0x1161
|
||||
* TBase = 0x11A7
|
||||
* LCount = 19
|
||||
* VCount = 21
|
||||
* TCount = 28
|
||||
* NCount = 588 (VCount * TCount)
|
||||
* SCount = 11172 (LCount * NCount)
|
||||
*
|
||||
* Decomposition:
|
||||
* SIndex = s - SBase
|
||||
*
|
||||
* LV (Canonical/Full)
|
||||
* LIndex = SIndex / NCount
|
||||
* VIndex = (Sindex % NCount) / TCount
|
||||
* LPart = LBase + LIndex
|
||||
* VPart = VBase + VIndex
|
||||
*
|
||||
* LVT (Canonical)
|
||||
* LVIndex = (SIndex / TCount) * TCount
|
||||
* TIndex = (Sindex % TCount)
|
||||
* LVPart = SBase + LVIndex
|
||||
* TPart = TBase + TIndex
|
||||
*
|
||||
* LVT (Full)
|
||||
* LIndex = SIndex / NCount
|
||||
* VIndex = (Sindex % NCount) / TCount
|
||||
* TIndex = (Sindex % TCount)
|
||||
* LPart = LBase + LIndex
|
||||
* VPart = VBase + VIndex
|
||||
* if (TIndex == 0) {
|
||||
* d = <LPart, VPart>
|
||||
* } else {
|
||||
* TPart = TBase + TIndex
|
||||
* d = <LPart, TPart, VPart>
|
||||
* }
|
||||
*/
|
||||
|
||||
/* Constants */
|
||||
#define SB (0xAC00)
|
||||
#define LB (0x1100)
|
||||
#define VB (0x1161)
|
||||
#define TB (0x11A7)
|
||||
#define LC (19)
|
||||
#define VC (21)
|
||||
#define TC (28)
|
||||
#define NC (VC * TC)
|
||||
#define SC (LC * NC)
|
||||
|
||||
/* Algorithmic decomposition of hangul syllable. */
|
||||
static utf8leaf_t *
|
||||
utf8hangul(const char *str, unsigned char *hangul)
|
||||
{
|
||||
unsigned int si;
|
||||
unsigned int li;
|
||||
unsigned int vi;
|
||||
unsigned int ti;
|
||||
unsigned char *h;
|
||||
|
||||
/* Calculate the SI, LI, VI, and TI values. */
|
||||
si = utf8decode3(str) - SB;
|
||||
li = si / NC;
|
||||
vi = (si % NC) / TC;
|
||||
ti = si % TC;
|
||||
|
||||
/* Fill in base of leaf. */
|
||||
h = hangul;
|
||||
LEAF_GEN(h) = 2;
|
||||
LEAF_CCC(h) = DECOMPOSE;
|
||||
h += 2;
|
||||
|
||||
/* Add LPart, a 3-byte UTF-8 sequence. */
|
||||
h += utf8encode3((char *)h, li + LB);
|
||||
|
||||
/* Add VPart, a 3-byte UTF-8 sequence. */
|
||||
h += utf8encode3((char *)h, vi + VB);
|
||||
|
||||
/* Add TPart if required, also a 3-byte UTF-8 sequence. */
|
||||
if (ti)
|
||||
h += utf8encode3((char *)h, ti + TB);
|
||||
|
||||
/* Terminate string. */
|
||||
h[0] = '\0';
|
||||
|
||||
return hangul;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use trie to scan s, touching at most len bytes.
|
||||
* Returns the leaf if one exists, NULL otherwise.
|
||||
*
|
||||
* A non-NULL return guarantees that the UTF-8 sequence starting at s
|
||||
* is well-formed and corresponds to a known unicode code point. The
|
||||
* shorthand for this will be "is valid UTF-8 unicode".
|
||||
*/
|
||||
static utf8leaf_t *utf8nlookup(const struct utf8data *data,
|
||||
unsigned char *hangul, const char *s, size_t len)
|
||||
{
|
||||
utf8trie_t *trie = NULL;
|
||||
int offlen;
|
||||
int offset;
|
||||
int mask;
|
||||
int node;
|
||||
|
||||
if (!data)
|
||||
return NULL;
|
||||
if (len == 0)
|
||||
return NULL;
|
||||
|
||||
trie = utf8data + data->offset;
|
||||
node = 1;
|
||||
while (node) {
|
||||
offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
|
||||
if (*trie & NEXTBYTE) {
|
||||
if (--len == 0)
|
||||
return NULL;
|
||||
s++;
|
||||
}
|
||||
mask = 1 << (*trie & BITNUM);
|
||||
if (*s & mask) {
|
||||
/* Right leg */
|
||||
if (offlen) {
|
||||
/* Right node at offset of trie */
|
||||
node = (*trie & RIGHTNODE);
|
||||
offset = trie[offlen];
|
||||
while (--offlen) {
|
||||
offset <<= 8;
|
||||
offset |= trie[offlen];
|
||||
}
|
||||
trie += offset;
|
||||
} else if (*trie & RIGHTPATH) {
|
||||
/* Right node after this node */
|
||||
node = (*trie & TRIENODE);
|
||||
trie++;
|
||||
} else {
|
||||
/* No right node. */
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
/* Left leg */
|
||||
if (offlen) {
|
||||
/* Left node after this node. */
|
||||
node = (*trie & LEFTNODE);
|
||||
trie += offlen + 1;
|
||||
} else if (*trie & RIGHTPATH) {
|
||||
/* No left node. */
|
||||
return NULL;
|
||||
} else {
|
||||
/* Left node after this node */
|
||||
node = (*trie & TRIENODE);
|
||||
trie++;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Hangul decomposition is done algorithmically. These are the
|
||||
* codepoints >= 0xAC00 and <= 0xD7A3. Their UTF-8 encoding is
|
||||
* always 3 bytes long, so s has been advanced twice, and the
|
||||
* start of the sequence is at s-2.
|
||||
*/
|
||||
if (LEAF_CCC(trie) == DECOMPOSE && LEAF_STR(trie)[0] == HANGUL)
|
||||
trie = utf8hangul(s - 2, hangul);
|
||||
return trie;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use trie to scan s.
|
||||
* Returns the leaf if one exists, NULL otherwise.
|
||||
*
|
||||
* Forwards to utf8nlookup().
|
||||
*/
|
||||
static utf8leaf_t *utf8lookup(const struct utf8data *data,
|
||||
unsigned char *hangul, const char *s)
|
||||
{
|
||||
return utf8nlookup(data, hangul, s, (size_t)-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximum age of any character in s.
|
||||
* Return -1 if s is not valid UTF-8 unicode.
|
||||
* Return 0 if only non-assigned code points are used.
|
||||
*/
|
||||
int utf8agemax(const struct utf8data *data, const char *s)
|
||||
{
|
||||
utf8leaf_t *leaf;
|
||||
int age = 0;
|
||||
int leaf_age;
|
||||
unsigned char hangul[UTF8HANGULLEAF];
|
||||
|
||||
if (!data)
|
||||
return -1;
|
||||
|
||||
while (*s) {
|
||||
leaf = utf8lookup(data, hangul, s);
|
||||
if (!leaf)
|
||||
return -1;
|
||||
|
||||
leaf_age = utf8agetab[LEAF_GEN(leaf)];
|
||||
if (leaf_age <= data->maxage && leaf_age > age)
|
||||
age = leaf_age;
|
||||
s += utf8clen(s);
|
||||
}
|
||||
return age;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8agemax);
|
||||
|
||||
/*
|
||||
* Minimum age of any character in s.
|
||||
* Return -1 if s is not valid UTF-8 unicode.
|
||||
* Return 0 if non-assigned code points are used.
|
||||
*/
|
||||
int utf8agemin(const struct utf8data *data, const char *s)
|
||||
{
|
||||
utf8leaf_t *leaf;
|
||||
int age;
|
||||
int leaf_age;
|
||||
unsigned char hangul[UTF8HANGULLEAF];
|
||||
|
||||
if (!data)
|
||||
return -1;
|
||||
age = data->maxage;
|
||||
while (*s) {
|
||||
leaf = utf8lookup(data, hangul, s);
|
||||
if (!leaf)
|
||||
return -1;
|
||||
leaf_age = utf8agetab[LEAF_GEN(leaf)];
|
||||
if (leaf_age <= data->maxage && leaf_age < age)
|
||||
age = leaf_age;
|
||||
s += utf8clen(s);
|
||||
}
|
||||
return age;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8agemin);
|
||||
|
||||
/*
|
||||
* Maximum age of any character in s, touch at most len bytes.
|
||||
* Return -1 if s is not valid UTF-8 unicode.
|
||||
*/
|
||||
int utf8nagemax(const struct utf8data *data, const char *s, size_t len)
|
||||
{
|
||||
utf8leaf_t *leaf;
|
||||
int age = 0;
|
||||
int leaf_age;
|
||||
unsigned char hangul[UTF8HANGULLEAF];
|
||||
|
||||
if (!data)
|
||||
return -1;
|
||||
|
||||
while (len && *s) {
|
||||
leaf = utf8nlookup(data, hangul, s, len);
|
||||
if (!leaf)
|
||||
return -1;
|
||||
leaf_age = utf8agetab[LEAF_GEN(leaf)];
|
||||
if (leaf_age <= data->maxage && leaf_age > age)
|
||||
age = leaf_age;
|
||||
len -= utf8clen(s);
|
||||
s += utf8clen(s);
|
||||
}
|
||||
return age;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8nagemax);
|
||||
|
||||
/*
|
||||
* Maximum age of any character in s, touch at most len bytes.
|
||||
* Return -1 if s is not valid UTF-8 unicode.
|
||||
*/
|
||||
int utf8nagemin(const struct utf8data *data, const char *s, size_t len)
|
||||
{
|
||||
utf8leaf_t *leaf;
|
||||
int leaf_age;
|
||||
int age;
|
||||
unsigned char hangul[UTF8HANGULLEAF];
|
||||
|
||||
if (!data)
|
||||
return -1;
|
||||
age = data->maxage;
|
||||
while (len && *s) {
|
||||
leaf = utf8nlookup(data, hangul, s, len);
|
||||
if (!leaf)
|
||||
return -1;
|
||||
leaf_age = utf8agetab[LEAF_GEN(leaf)];
|
||||
if (leaf_age <= data->maxage && leaf_age < age)
|
||||
age = leaf_age;
|
||||
len -= utf8clen(s);
|
||||
s += utf8clen(s);
|
||||
}
|
||||
return age;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8nagemin);
|
||||
|
||||
/*
|
||||
* Length of the normalization of s.
|
||||
* Return -1 if s is not valid UTF-8 unicode.
|
||||
*
|
||||
* A string of Default_Ignorable_Code_Point has length 0.
|
||||
*/
|
||||
ssize_t utf8len(const struct utf8data *data, const char *s)
|
||||
{
|
||||
utf8leaf_t *leaf;
|
||||
size_t ret = 0;
|
||||
unsigned char hangul[UTF8HANGULLEAF];
|
||||
|
||||
if (!data)
|
||||
return -1;
|
||||
while (*s) {
|
||||
leaf = utf8lookup(data, hangul, s);
|
||||
if (!leaf)
|
||||
return -1;
|
||||
if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
|
||||
ret += utf8clen(s);
|
||||
else if (LEAF_CCC(leaf) == DECOMPOSE)
|
||||
ret += strlen(LEAF_STR(leaf));
|
||||
else
|
||||
ret += utf8clen(s);
|
||||
s += utf8clen(s);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8len);
|
||||
|
||||
/*
|
||||
* Length of the normalization of s, touch at most len bytes.
|
||||
* Return -1 if s is not valid UTF-8 unicode.
|
||||
*/
|
||||
ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len)
|
||||
{
|
||||
utf8leaf_t *leaf;
|
||||
size_t ret = 0;
|
||||
unsigned char hangul[UTF8HANGULLEAF];
|
||||
|
||||
if (!data)
|
||||
return -1;
|
||||
while (len && *s) {
|
||||
leaf = utf8nlookup(data, hangul, s, len);
|
||||
if (!leaf)
|
||||
return -1;
|
||||
if (utf8agetab[LEAF_GEN(leaf)] > data->maxage)
|
||||
ret += utf8clen(s);
|
||||
else if (LEAF_CCC(leaf) == DECOMPOSE)
|
||||
ret += strlen(LEAF_STR(leaf));
|
||||
else
|
||||
ret += utf8clen(s);
|
||||
len -= utf8clen(s);
|
||||
s += utf8clen(s);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8nlen);
|
||||
|
||||
/*
|
||||
* Set up an utf8cursor for use by utf8byte().
|
||||
*
|
||||
* u8c : pointer to cursor.
|
||||
* data : const struct utf8data to use for normalization.
|
||||
* s : string.
|
||||
* len : length of s.
|
||||
*
|
||||
* Returns -1 on error, 0 on success.
|
||||
*/
|
||||
int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
|
||||
const char *s, size_t len)
|
||||
{
|
||||
if (!data)
|
||||
return -1;
|
||||
if (!s)
|
||||
return -1;
|
||||
u8c->data = data;
|
||||
u8c->s = s;
|
||||
u8c->p = NULL;
|
||||
u8c->ss = NULL;
|
||||
u8c->sp = NULL;
|
||||
u8c->len = len;
|
||||
u8c->slen = 0;
|
||||
u8c->ccc = STOPPER;
|
||||
u8c->nccc = STOPPER;
|
||||
/* Check we didn't clobber the maximum length. */
|
||||
if (u8c->len != len)
|
||||
return -1;
|
||||
/* The first byte of s may not be an utf8 continuation. */
|
||||
if (len > 0 && (*s & 0xC0) == 0x80)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(utf8ncursor);
|
||||
|
||||
/*
|
||||
* Set up an utf8cursor for use by utf8byte().
|
||||
*
|
||||
* u8c : pointer to cursor.
|
||||
* data : const struct utf8data to use for normalization.
|
||||
* s : NUL-terminated string.
|
||||
*
|
||||
* Returns -1 on error, 0 on success.
|
||||
*/
|
||||
int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
|
||||
const char *s)
|
||||
{
|
||||
return utf8ncursor(u8c, data, s, (unsigned int)-1);
|
||||
}
|
||||
EXPORT_SYMBOL(utf8cursor);
|
||||
|
||||
/*
|
||||
* Get one byte from the normalized form of the string described by u8c.
|
||||
*
|
||||
* Returns the byte cast to an unsigned char on succes, and -1 on failure.
|
||||
*
|
||||
* The cursor keeps track of the location in the string in u8c->s.
|
||||
* When a character is decomposed, the current location is stored in
|
||||
* u8c->p, and u8c->s is set to the start of the decomposition. Note
|
||||
* that bytes from a decomposition do not count against u8c->len.
|
||||
*
|
||||
* Characters are emitted if they match the current CCC in u8c->ccc.
|
||||
* Hitting end-of-string while u8c->ccc == STOPPER means we're done,
|
||||
* and the function returns 0 in that case.
|
||||
*
|
||||
* Sorting by CCC is done by repeatedly scanning the string. The
|
||||
* values of u8c->s and u8c->p are stored in u8c->ss and u8c->sp at
|
||||
* the start of the scan. The first pass finds the lowest CCC to be
|
||||
* emitted and stores it in u8c->nccc, the second pass emits the
|
||||
* characters with this CCC and finds the next lowest CCC. This limits
|
||||
* the number of passes to 1 + the number of different CCCs in the
|
||||
* sequence being scanned.
|
||||
*
|
||||
* Therefore:
|
||||
* u8c->p != NULL -> a decomposition is being scanned.
|
||||
* u8c->ss != NULL -> this is a repeating scan.
|
||||
* u8c->ccc == -1 -> this is the first scan of a repeating scan.
|
||||
*/
|
||||
int utf8byte(struct utf8cursor *u8c)
|
||||
{
|
||||
utf8leaf_t *leaf;
|
||||
int ccc;
|
||||
|
||||
for (;;) {
|
||||
/* Check for the end of a decomposed character. */
|
||||
if (u8c->p && *u8c->s == '\0') {
|
||||
u8c->s = u8c->p;
|
||||
u8c->p = NULL;
|
||||
}
|
||||
|
||||
/* Check for end-of-string. */
|
||||
if (!u8c->p && (u8c->len == 0 || *u8c->s == '\0')) {
|
||||
/* There is no next byte. */
|
||||
if (u8c->ccc == STOPPER)
|
||||
return 0;
|
||||
/* End-of-string during a scan counts as a stopper. */
|
||||
ccc = STOPPER;
|
||||
goto ccc_mismatch;
|
||||
} else if ((*u8c->s & 0xC0) == 0x80) {
|
||||
/* This is a continuation of the current character. */
|
||||
if (!u8c->p)
|
||||
u8c->len--;
|
||||
return (unsigned char)*u8c->s++;
|
||||
}
|
||||
|
||||
/* Look up the data for the current character. */
|
||||
if (u8c->p) {
|
||||
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
|
||||
} else {
|
||||
leaf = utf8nlookup(u8c->data, u8c->hangul,
|
||||
u8c->s, u8c->len);
|
||||
}
|
||||
|
||||
/* No leaf found implies that the input is a binary blob. */
|
||||
if (!leaf)
|
||||
return -1;
|
||||
|
||||
ccc = LEAF_CCC(leaf);
|
||||
/* Characters that are too new have CCC 0. */
|
||||
if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) {
|
||||
ccc = STOPPER;
|
||||
} else if (ccc == DECOMPOSE) {
|
||||
u8c->len -= utf8clen(u8c->s);
|
||||
u8c->p = u8c->s + utf8clen(u8c->s);
|
||||
u8c->s = LEAF_STR(leaf);
|
||||
/* Empty decomposition implies CCC 0. */
|
||||
if (*u8c->s == '\0') {
|
||||
if (u8c->ccc == STOPPER)
|
||||
continue;
|
||||
ccc = STOPPER;
|
||||
goto ccc_mismatch;
|
||||
}
|
||||
|
||||
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s);
|
||||
ccc = LEAF_CCC(leaf);
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is not a stopper, then see if it updates
|
||||
* the next canonical class to be emitted.
|
||||
*/
|
||||
if (ccc != STOPPER && u8c->ccc < ccc && ccc < u8c->nccc)
|
||||
u8c->nccc = ccc;
|
||||
|
||||
/*
|
||||
* Return the current byte if this is the current
|
||||
* combining class.
|
||||
*/
|
||||
if (ccc == u8c->ccc) {
|
||||
if (!u8c->p)
|
||||
u8c->len--;
|
||||
return (unsigned char)*u8c->s++;
|
||||
}
|
||||
|
||||
/* Current combining class mismatch. */
|
||||
ccc_mismatch:
|
||||
if (u8c->nccc == STOPPER) {
|
||||
/*
|
||||
* Scan forward for the first canonical class
|
||||
* to be emitted. Save the position from
|
||||
* which to restart.
|
||||
*/
|
||||
u8c->ccc = MINCCC - 1;
|
||||
u8c->nccc = ccc;
|
||||
u8c->sp = u8c->p;
|
||||
u8c->ss = u8c->s;
|
||||
u8c->slen = u8c->len;
|
||||
if (!u8c->p)
|
||||
u8c->len -= utf8clen(u8c->s);
|
||||
u8c->s += utf8clen(u8c->s);
|
||||
} else if (ccc != STOPPER) {
|
||||
/* Not a stopper, and not the ccc we're emitting. */
|
||||
if (!u8c->p)
|
||||
u8c->len -= utf8clen(u8c->s);
|
||||
u8c->s += utf8clen(u8c->s);
|
||||
} else if (u8c->nccc != MAXCCC + 1) {
|
||||
/* At a stopper, restart for next ccc. */
|
||||
u8c->ccc = u8c->nccc;
|
||||
u8c->nccc = MAXCCC + 1;
|
||||
u8c->s = u8c->ss;
|
||||
u8c->p = u8c->sp;
|
||||
u8c->len = u8c->slen;
|
||||
} else {
|
||||
/* All done, proceed from here. */
|
||||
u8c->ccc = STOPPER;
|
||||
u8c->nccc = STOPPER;
|
||||
u8c->sp = NULL;
|
||||
u8c->ss = NULL;
|
||||
u8c->slen = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(utf8byte);
|
||||
|
||||
const struct utf8data *utf8nfdi(unsigned int maxage)
|
||||
{
|
||||
int i = ARRAY_SIZE(utf8nfdidata) - 1;
|
||||
|
||||
while (maxage < utf8nfdidata[i].maxage)
|
||||
i--;
|
||||
if (maxage > utf8nfdidata[i].maxage)
|
||||
return NULL;
|
||||
return &utf8nfdidata[i];
|
||||
}
|
||||
EXPORT_SYMBOL(utf8nfdi);
|
||||
|
||||
const struct utf8data *utf8nfdicf(unsigned int maxage)
|
||||
{
|
||||
int i = ARRAY_SIZE(utf8nfdicfdata) - 1;
|
||||
|
||||
while (maxage < utf8nfdicfdata[i].maxage)
|
||||
i--;
|
||||
if (maxage > utf8nfdicfdata[i].maxage)
|
||||
return NULL;
|
||||
return &utf8nfdicfdata[i];
|
||||
}
|
||||
EXPORT_SYMBOL(utf8nfdicf);
|
320
fs/unicode/utf8-selftest.c
Normal file
320
fs/unicode/utf8-selftest.c
Normal file
@ -0,0 +1,320 @@
|
||||
/*
|
||||
* Kernel module for testing utf-8 support.
|
||||
*
|
||||
* Copyright 2017 Collabora Ltd.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/unicode.h>
|
||||
#include <linux/dcache.h>
|
||||
|
||||
#include "utf8n.h"
|
||||
|
||||
unsigned int failed_tests;
|
||||
unsigned int total_tests;
|
||||
|
||||
/* Tests will be based on this version. */
|
||||
#define latest_maj 12
|
||||
#define latest_min 1
|
||||
#define latest_rev 0
|
||||
|
||||
#define _test(cond, func, line, fmt, ...) do { \
|
||||
total_tests++; \
|
||||
if (!cond) { \
|
||||
failed_tests++; \
|
||||
pr_err("test %s:%d Failed: %s%s", \
|
||||
func, line, #cond, (fmt?":":".")); \
|
||||
if (fmt) \
|
||||
pr_err(fmt, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
#define test_f(cond, fmt, ...) _test(cond, __func__, __LINE__, fmt, ##__VA_ARGS__)
|
||||
#define test(cond) _test(cond, __func__, __LINE__, "")
|
||||
|
||||
const static struct {
|
||||
/* UTF-8 strings in this vector _must_ be NULL-terminated. */
|
||||
unsigned char str[10];
|
||||
unsigned char dec[10];
|
||||
} nfdi_test_data[] = {
|
||||
/* Trivial sequence */
|
||||
{
|
||||
/* "ABba" decomposes to itself */
|
||||
.str = "aBba",
|
||||
.dec = "aBba",
|
||||
},
|
||||
/* Simple equivalent sequences */
|
||||
{
|
||||
/* 'VULGAR FRACTION ONE QUARTER' cannot decompose to
|
||||
'NUMBER 1' + 'FRACTION SLASH' + 'NUMBER 4' on
|
||||
canonical decomposition */
|
||||
.str = {0xc2, 0xbc, 0x00},
|
||||
.dec = {0xc2, 0xbc, 0x00},
|
||||
},
|
||||
{
|
||||
/* 'LATIN SMALL LETTER A WITH DIAERESIS' decomposes to
|
||||
'LETTER A' + 'COMBINING DIAERESIS' */
|
||||
.str = {0xc3, 0xa4, 0x00},
|
||||
.dec = {0x61, 0xcc, 0x88, 0x00},
|
||||
},
|
||||
{
|
||||
/* 'LATIN SMALL LETTER LJ' can't decompose to
|
||||
'LETTER L' + 'LETTER J' on canonical decomposition */
|
||||
.str = {0xC7, 0x89, 0x00},
|
||||
.dec = {0xC7, 0x89, 0x00},
|
||||
},
|
||||
{
|
||||
/* GREEK ANO TELEIA decomposes to MIDDLE DOT */
|
||||
.str = {0xCE, 0x87, 0x00},
|
||||
.dec = {0xC2, 0xB7, 0x00}
|
||||
},
|
||||
/* Canonical ordering */
|
||||
{
|
||||
/* A + 'COMBINING ACUTE ACCENT' + 'COMBINING OGONEK' decomposes
|
||||
to A + 'COMBINING OGONEK' + 'COMBINING ACUTE ACCENT' */
|
||||
.str = {0x41, 0xcc, 0x81, 0xcc, 0xa8, 0x0},
|
||||
.dec = {0x41, 0xcc, 0xa8, 0xcc, 0x81, 0x0},
|
||||
},
|
||||
{
|
||||
/* 'LATIN SMALL LETTER A WITH DIAERESIS' + 'COMBINING OGONEK'
|
||||
decomposes to
|
||||
'LETTER A' + 'COMBINING OGONEK' + 'COMBINING DIAERESIS' */
|
||||
.str = {0xc3, 0xa4, 0xCC, 0xA8, 0x00},
|
||||
|
||||
.dec = {0x61, 0xCC, 0xA8, 0xcc, 0x88, 0x00},
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
const static struct {
|
||||
/* UTF-8 strings in this vector _must_ be NULL-terminated. */
|
||||
unsigned char str[30];
|
||||
unsigned char ncf[30];
|
||||
} nfdicf_test_data[] = {
|
||||
/* Trivial sequences */
|
||||
{
|
||||
/* "ABba" folds to lowercase */
|
||||
.str = {0x41, 0x42, 0x62, 0x61, 0x00},
|
||||
.ncf = {0x61, 0x62, 0x62, 0x61, 0x00},
|
||||
},
|
||||
{
|
||||
/* All ASCII folds to lower-case */
|
||||
.str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0.1",
|
||||
.ncf = "abcdefghijklmnopqrstuvwxyz0.1",
|
||||
},
|
||||
{
|
||||
/* LATIN SMALL LETTER SHARP S folds to
|
||||
LATIN SMALL LETTER S + LATIN SMALL LETTER S */
|
||||
.str = {0xc3, 0x9f, 0x00},
|
||||
.ncf = {0x73, 0x73, 0x00},
|
||||
},
|
||||
{
|
||||
/* LATIN CAPITAL LETTER A WITH RING ABOVE folds to
|
||||
LATIN SMALL LETTER A + COMBINING RING ABOVE */
|
||||
.str = {0xC3, 0x85, 0x00},
|
||||
.ncf = {0x61, 0xcc, 0x8a, 0x00},
|
||||
},
|
||||
/* Introduced by UTF-8.0.0. */
|
||||
/* Cherokee letters are interesting test-cases because they fold
|
||||
to upper-case. Before 8.0.0, Cherokee lowercase were
|
||||
undefined, thus, the folding from LC is not stable between
|
||||
7.0.0 -> 8.0.0, but it is from UC. */
|
||||
{
|
||||
/* CHEROKEE SMALL LETTER A folds to CHEROKEE LETTER A */
|
||||
.str = {0xea, 0xad, 0xb0, 0x00},
|
||||
.ncf = {0xe1, 0x8e, 0xa0, 0x00},
|
||||
},
|
||||
{
|
||||
/* CHEROKEE SMALL LETTER YE folds to CHEROKEE LETTER YE */
|
||||
.str = {0xe1, 0x8f, 0xb8, 0x00},
|
||||
.ncf = {0xe1, 0x8f, 0xb0, 0x00},
|
||||
},
|
||||
{
|
||||
/* OLD HUNGARIAN CAPITAL LETTER AMB folds to
|
||||
OLD HUNGARIAN SMALL LETTER AMB */
|
||||
.str = {0xf0, 0x90, 0xb2, 0x83, 0x00},
|
||||
.ncf = {0xf0, 0x90, 0xb3, 0x83, 0x00},
|
||||
},
|
||||
/* Introduced by UTF-9.0.0. */
|
||||
{
|
||||
/* OSAGE CAPITAL LETTER CHA folds to
|
||||
OSAGE SMALL LETTER CHA */
|
||||
.str = {0xf0, 0x90, 0x92, 0xb5, 0x00},
|
||||
.ncf = {0xf0, 0x90, 0x93, 0x9d, 0x00},
|
||||
},
|
||||
{
|
||||
/* LATIN CAPITAL LETTER SMALL CAPITAL I folds to
|
||||
LATIN LETTER SMALL CAPITAL I */
|
||||
.str = {0xea, 0x9e, 0xae, 0x00},
|
||||
.ncf = {0xc9, 0xaa, 0x00},
|
||||
},
|
||||
/* Introduced by UTF-11.0.0. */
|
||||
{
|
||||
/* GEORGIAN SMALL LETTER AN folds to GEORGIAN MTAVRULI
|
||||
CAPITAL LETTER AN */
|
||||
.str = {0xe1, 0xb2, 0x90, 0x00},
|
||||
.ncf = {0xe1, 0x83, 0x90, 0x00},
|
||||
}
|
||||
};
|
||||
|
||||
static void check_utf8_nfdi(void)
|
||||
{
|
||||
int i;
|
||||
struct utf8cursor u8c;
|
||||
const struct utf8data *data;
|
||||
|
||||
data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
|
||||
if (!data) {
|
||||
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
|
||||
__func__, latest_maj, latest_min, latest_rev);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
|
||||
int len = strlen(nfdi_test_data[i].str);
|
||||
int nlen = strlen(nfdi_test_data[i].dec);
|
||||
int j = 0;
|
||||
unsigned char c;
|
||||
|
||||
test((utf8len(data, nfdi_test_data[i].str) == nlen));
|
||||
test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen));
|
||||
|
||||
if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0)
|
||||
pr_err("can't create cursor\n");
|
||||
|
||||
while ((c = utf8byte(&u8c)) > 0) {
|
||||
test_f((c == nfdi_test_data[i].dec[j]),
|
||||
"Unexpected byte 0x%x should be 0x%x\n",
|
||||
c, nfdi_test_data[i].dec[j]);
|
||||
j++;
|
||||
}
|
||||
|
||||
test((j == nlen));
|
||||
}
|
||||
}
|
||||
|
||||
static void check_utf8_nfdicf(void)
|
||||
{
|
||||
int i;
|
||||
struct utf8cursor u8c;
|
||||
const struct utf8data *data;
|
||||
|
||||
data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
|
||||
if (!data) {
|
||||
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
|
||||
__func__, latest_maj, latest_min, latest_rev);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
|
||||
int len = strlen(nfdicf_test_data[i].str);
|
||||
int nlen = strlen(nfdicf_test_data[i].ncf);
|
||||
int j = 0;
|
||||
unsigned char c;
|
||||
|
||||
test((utf8len(data, nfdicf_test_data[i].str) == nlen));
|
||||
test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen));
|
||||
|
||||
if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0)
|
||||
pr_err("can't create cursor\n");
|
||||
|
||||
while ((c = utf8byte(&u8c)) > 0) {
|
||||
test_f((c == nfdicf_test_data[i].ncf[j]),
|
||||
"Unexpected byte 0x%x should be 0x%x\n",
|
||||
c, nfdicf_test_data[i].ncf[j]);
|
||||
j++;
|
||||
}
|
||||
|
||||
test((j == nlen));
|
||||
}
|
||||
}
|
||||
|
||||
static void check_utf8_comparisons(void)
|
||||
{
|
||||
int i;
|
||||
struct unicode_map *table = utf8_load("12.1.0");
|
||||
|
||||
if (IS_ERR(table)) {
|
||||
pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
|
||||
__func__, latest_maj, latest_min, latest_rev);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
|
||||
const struct qstr s1 = {.name = nfdi_test_data[i].str,
|
||||
.len = sizeof(nfdi_test_data[i].str)};
|
||||
const struct qstr s2 = {.name = nfdi_test_data[i].dec,
|
||||
.len = sizeof(nfdi_test_data[i].dec)};
|
||||
|
||||
test_f(!utf8_strncmp(table, &s1, &s2),
|
||||
"%s %s comparison mismatch\n", s1.name, s2.name);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
|
||||
const struct qstr s1 = {.name = nfdicf_test_data[i].str,
|
||||
.len = sizeof(nfdicf_test_data[i].str)};
|
||||
const struct qstr s2 = {.name = nfdicf_test_data[i].ncf,
|
||||
.len = sizeof(nfdicf_test_data[i].ncf)};
|
||||
|
||||
test_f(!utf8_strncasecmp(table, &s1, &s2),
|
||||
"%s %s comparison mismatch\n", s1.name, s2.name);
|
||||
}
|
||||
|
||||
utf8_unload(table);
|
||||
}
|
||||
|
||||
static void check_supported_versions(void)
|
||||
{
|
||||
/* Unicode 7.0.0 should be supported. */
|
||||
test(utf8version_is_supported(7, 0, 0));
|
||||
|
||||
/* Unicode 9.0.0 should be supported. */
|
||||
test(utf8version_is_supported(9, 0, 0));
|
||||
|
||||
/* Unicode 1x.0.0 (the latest version) should be supported. */
|
||||
test(utf8version_is_supported(latest_maj, latest_min, latest_rev));
|
||||
|
||||
/* Next versions don't exist. */
|
||||
test(!utf8version_is_supported(13, 0, 0));
|
||||
test(!utf8version_is_supported(0, 0, 0));
|
||||
test(!utf8version_is_supported(-1, -1, -1));
|
||||
}
|
||||
|
||||
static int __init init_test_ucd(void)
|
||||
{
|
||||
failed_tests = 0;
|
||||
total_tests = 0;
|
||||
|
||||
check_supported_versions();
|
||||
check_utf8_nfdi();
|
||||
check_utf8_nfdicf();
|
||||
check_utf8_comparisons();
|
||||
|
||||
if (!failed_tests)
|
||||
pr_info("All %u tests passed\n", total_tests);
|
||||
else
|
||||
pr_err("%u out of %u tests failed\n", failed_tests,
|
||||
total_tests);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit exit_test_ucd(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(init_test_ucd);
|
||||
module_exit(exit_test_ucd);
|
||||
|
||||
MODULE_AUTHOR("Gabriel Krisman Bertazi <krisman@collabora.co.uk>");
|
||||
MODULE_LICENSE("GPL");
|
4109
fs/unicode/utf8data.h_shipped
Normal file
4109
fs/unicode/utf8data.h_shipped
Normal file
File diff suppressed because it is too large
Load Diff
117
fs/unicode/utf8n.h
Normal file
117
fs/unicode/utf8n.h
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2014 SGI.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it would be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef UTF8NORM_H
|
||||
#define UTF8NORM_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* Encoding a unicode version number as a single unsigned int. */
|
||||
#define UNICODE_MAJ_SHIFT (16)
|
||||
#define UNICODE_MIN_SHIFT (8)
|
||||
|
||||
#define UNICODE_AGE(MAJ, MIN, REV) \
|
||||
(((unsigned int)(MAJ) << UNICODE_MAJ_SHIFT) | \
|
||||
((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
|
||||
((unsigned int)(REV)))
|
||||
|
||||
/* Highest unicode version supported by the data tables. */
|
||||
extern int utf8version_is_supported(u8 maj, u8 min, u8 rev);
|
||||
extern int utf8version_latest(void);
|
||||
|
||||
/*
|
||||
* Look for the correct const struct utf8data for a unicode version.
|
||||
* Returns NULL if the version requested is too new.
|
||||
*
|
||||
* Two normalization forms are supported: nfdi and nfdicf.
|
||||
*
|
||||
* nfdi:
|
||||
* - Apply unicode normalization form NFD.
|
||||
* - Remove any Default_Ignorable_Code_Point.
|
||||
*
|
||||
* nfdicf:
|
||||
* - Apply unicode normalization form NFD.
|
||||
* - Remove any Default_Ignorable_Code_Point.
|
||||
* - Apply a full casefold (C + F).
|
||||
*/
|
||||
extern const struct utf8data *utf8nfdi(unsigned int maxage);
|
||||
extern const struct utf8data *utf8nfdicf(unsigned int maxage);
|
||||
|
||||
/*
|
||||
* Determine the maximum age of any unicode character in the string.
|
||||
* Returns 0 if only unassigned code points are present.
|
||||
* Returns -1 if the input is not valid UTF-8.
|
||||
*/
|
||||
extern int utf8agemax(const struct utf8data *data, const char *s);
|
||||
extern int utf8nagemax(const struct utf8data *data, const char *s, size_t len);
|
||||
|
||||
/*
|
||||
* Determine the minimum age of any unicode character in the string.
|
||||
* Returns 0 if any unassigned code points are present.
|
||||
* Returns -1 if the input is not valid UTF-8.
|
||||
*/
|
||||
extern int utf8agemin(const struct utf8data *data, const char *s);
|
||||
extern int utf8nagemin(const struct utf8data *data, const char *s, size_t len);
|
||||
|
||||
/*
|
||||
* Determine the length of the normalized from of the string,
|
||||
* excluding any terminating NULL byte.
|
||||
* Returns 0 if only ignorable code points are present.
|
||||
* Returns -1 if the input is not valid UTF-8.
|
||||
*/
|
||||
extern ssize_t utf8len(const struct utf8data *data, const char *s);
|
||||
extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
|
||||
|
||||
/* Needed in struct utf8cursor below. */
|
||||
#define UTF8HANGULLEAF (12)
|
||||
|
||||
/*
|
||||
* Cursor structure used by the normalizer.
|
||||
*/
|
||||
struct utf8cursor {
|
||||
const struct utf8data *data;
|
||||
const char *s;
|
||||
const char *p;
|
||||
const char *ss;
|
||||
const char *sp;
|
||||
unsigned int len;
|
||||
unsigned int slen;
|
||||
short int ccc;
|
||||
short int nccc;
|
||||
unsigned char hangul[UTF8HANGULLEAF];
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize a utf8cursor to normalize a string.
|
||||
* Returns 0 on success.
|
||||
* Returns -1 on failure.
|
||||
*/
|
||||
extern int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data,
|
||||
const char *s);
|
||||
extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data,
|
||||
const char *s, size_t len);
|
||||
|
||||
/*
|
||||
* Get the next byte in the normalization.
|
||||
* Returns a value > 0 && < 256 on success.
|
||||
* Returns 0 when the end of the normalization is reached.
|
||||
* Returns -1 if the string being normalized is not valid UTF-8.
|
||||
*/
|
||||
extern int utf8byte(struct utf8cursor *u8c);
|
||||
|
||||
#endif /* UTF8NORM_H */
|
@ -1963,6 +1963,7 @@ struct super_operations {
|
||||
#define S_DAX 0 /* Make all the DAX code disappear */
|
||||
#endif
|
||||
#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */
|
||||
#define S_CASEFOLD 32768 /* Casefolded file */
|
||||
|
||||
/*
|
||||
* Note that nosuid etc flags are inode-specific: setting some file-system
|
||||
@ -2003,6 +2004,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
|
||||
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
|
||||
#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
|
||||
#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED)
|
||||
#define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD)
|
||||
|
||||
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
|
||||
(inode)->i_rdev == WHITEOUT_DEV)
|
||||
|
30
include/linux/unicode.h
Normal file
30
include/linux/unicode.h
Normal file
@ -0,0 +1,30 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_UNICODE_H
|
||||
#define _LINUX_UNICODE_H
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/dcache.h>
|
||||
|
||||
struct unicode_map {
|
||||
const char *charset;
|
||||
int version;
|
||||
};
|
||||
|
||||
int utf8_validate(const struct unicode_map *um, const struct qstr *str);
|
||||
|
||||
int utf8_strncmp(const struct unicode_map *um,
|
||||
const struct qstr *s1, const struct qstr *s2);
|
||||
|
||||
int utf8_strncasecmp(const struct unicode_map *um,
|
||||
const struct qstr *s1, const struct qstr *s2);
|
||||
|
||||
int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
|
||||
unsigned char *dest, size_t dlen);
|
||||
|
||||
int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
|
||||
unsigned char *dest, size_t dlen);
|
||||
|
||||
struct unicode_map *utf8_load(const char *version);
|
||||
void utf8_unload(struct unicode_map *um);
|
||||
|
||||
#endif /* _LINUX_UNICODE_H */
|
Loading…
x
Reference in New Issue
Block a user