udf: Join functions for UTF8 and NLS conversions

There is no much sense to have separate functions for UTF8 and
NLS conversions, since UTF8 encoding is actually the special case
of NLS.

However, although UTF8 is also supported by general NLS framework,
it would be good to have separate UTF8 character conversion functions
(char2uni and uni2char) locally in UDF code, so that they could be
used even if NLS support is not enabled in the kernel configuration.

Signed-off-by: Andrew Gabbasov <andrew_gabbasov@mentor.com>
Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
Andrew Gabbasov 2016-01-15 02:44:20 -06:00 committed by Jan Kara
parent 525e2c56c3
commit 3e7fc2055c
1 changed files with 89 additions and 187 deletions

View File

@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
memcpy(dest->u_name, ptr + 1, exactsize - 1); memcpy(dest->u_name, ptr + 1, exactsize - 1);
} }
/* static int udf_uni2char_utf8(wchar_t uni,
* udf_CS0toUTF8 unsigned char *out,
* int boundlen)
* PURPOSE
* Convert OSTA Compressed Unicode to the UTF-8 equivalent.
*
* PRE-CONDITIONS
* utf Pointer to UTF-8 output buffer.
* ocu Pointer to OSTA Compressed Unicode input buffer
* of size UDF_NAME_LEN bytes.
* both of type "struct ustr *"
*
* POST-CONDITIONS
* <return> >= 0 on success.
*
* HISTORY
* November 12, 1997 - Andrew E. Mileski
* Written, tested, and released.
*/
int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
{ {
const uint8_t *ocu; int u_len = 0;
uint8_t cmp_id, ocu_len;
int i;
ocu_len = ocu_i->u_len; if (boundlen <= 0)
if (ocu_len == 0) { return -ENAMETOOLONG;
memset(utf_o, 0, sizeof(struct ustr));
return 0;
}
cmp_id = ocu_i->u_cmpID; if (uni < 0x80) {
if (cmp_id != 8 && cmp_id != 16) { out[u_len++] = (unsigned char)uni;
memset(utf_o, 0, sizeof(struct ustr)); } else if (uni < 0x800) {
pr_err("unknown compression code (%d) stri=%s\n", if (boundlen < 2)
cmp_id, ocu_i->u_name); return -ENAMETOOLONG;
return -EINVAL; out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
} out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
ocu = ocu_i->u_name;
utf_o->u_len = 0;
for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
/* Expand OSTA compressed Unicode to Unicode */
uint32_t c = ocu[i++];
if (cmp_id == 16)
c = (c << 8) | ocu[i++];
/* Compress Unicode to UTF-8 */
if (c < 0x80U)
utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
else if (c < 0x800U) {
if (utf_o->u_len > (UDF_NAME_LEN - 4))
break;
utf_o->u_name[utf_o->u_len++] =
(uint8_t)(0xc0 | (c >> 6));
utf_o->u_name[utf_o->u_len++] =
(uint8_t)(0x80 | (c & 0x3f));
} else { } else {
if (utf_o->u_len > (UDF_NAME_LEN - 5)) if (boundlen < 3)
break; return -ENAMETOOLONG;
utf_o->u_name[utf_o->u_len++] = out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
(uint8_t)(0xe0 | (c >> 12)); out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
utf_o->u_name[utf_o->u_len++] = out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
(uint8_t)(0x80 |
((c >> 6) & 0x3f));
utf_o->u_name[utf_o->u_len++] =
(uint8_t)(0x80 | (c & 0x3f));
} }
} return u_len;
utf_o->u_cmpID = 8;
return utf_o->u_len;
} }
/* static int udf_char2uni_utf8(const unsigned char *in,
* int boundlen,
* udf_UTF8toCS0 wchar_t *uni)
*
* PURPOSE
* Convert UTF-8 to the OSTA Compressed Unicode equivalent.
*
* DESCRIPTION
* This routine is only called by udf_lookup().
*
* PRE-CONDITIONS
* ocu Pointer to OSTA Compressed Unicode output
* buffer of size UDF_NAME_LEN bytes.
* utf Pointer to UTF-8 input buffer.
* utf_len Length of UTF-8 input buffer in bytes.
*
* POST-CONDITIONS
* <return> Zero on success.
*
* HISTORY
* November 12, 1997 - Andrew E. Mileski
* Written, tested, and released.
*/
static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
{ {
unsigned c, i, max_val, utf_char; unsigned int utf_char;
int utf_cnt, u_len, u_ch; unsigned char c;
int utf_cnt, u_len;
memset(ocu, 0, sizeof(dstring) * length); utf_char = 0;
ocu[0] = 8; utf_cnt = 0;
max_val = 0xffU; for (u_len = 0; u_len < boundlen;) {
u_ch = 1; c = in[u_len++];
try_again:
u_len = 0U;
utf_char = 0U;
utf_cnt = 0U;
for (i = 0U; i < utf->u_len; i++) {
/* Name didn't fit? */
if (u_len + 1 + u_ch >= length)
return 0;
c = (uint8_t)utf->u_name[i];
/* Complete a multi-byte UTF-8 character */ /* Complete a multi-byte UTF-8 character */
if (utf_cnt) { if (utf_cnt) {
utf_char = (utf_char << 6) | (c & 0x3fU); utf_char = (utf_char << 6) | (c & 0x3f);
if (--utf_cnt) if (--utf_cnt)
continue; continue;
} else { } else {
/* Check for a multi-byte UTF-8 character */ /* Check for a multi-byte UTF-8 character */
if (c & 0x80U) { if (c & 0x80) {
/* Start a multi-byte UTF-8 character */ /* Start a multi-byte UTF-8 character */
if ((c & 0xe0U) == 0xc0U) { if ((c & 0xe0) == 0xc0) {
utf_char = c & 0x1fU; utf_char = c & 0x1f;
utf_cnt = 1; utf_cnt = 1;
} else if ((c & 0xf0U) == 0xe0U) { } else if ((c & 0xf0) == 0xe0) {
utf_char = c & 0x0fU; utf_char = c & 0x0f;
utf_cnt = 2; utf_cnt = 2;
} else if ((c & 0xf8U) == 0xf0U) { } else if ((c & 0xf8) == 0xf0) {
utf_char = c & 0x07U; utf_char = c & 0x07;
utf_cnt = 3; utf_cnt = 3;
} else if ((c & 0xfcU) == 0xf8U) { } else if ((c & 0xfc) == 0xf8) {
utf_char = c & 0x03U; utf_char = c & 0x03;
utf_cnt = 4; utf_cnt = 4;
} else if ((c & 0xfeU) == 0xfcU) { } else if ((c & 0xfe) == 0xfc) {
utf_char = c & 0x01U; utf_char = c & 0x01;
utf_cnt = 5; utf_cnt = 5;
} else { } else {
goto error_out; utf_cnt = -1;
break;
} }
continue; continue;
} else { } else {
@ -228,36 +149,19 @@ try_again:
utf_char = c; utf_char = c;
} }
} }
*uni = utf_char;
/* Choose no compression if necessary */ break;
if (utf_char > max_val) {
if (max_val == 0xffU) {
max_val = 0xffffU;
ocu[0] = (uint8_t)0x10U;
u_ch = 2;
goto try_again;
} }
goto error_out;
}
if (max_val == 0xffffU)
ocu[++u_len] = (uint8_t)(utf_char >> 8);
ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
}
if (utf_cnt) { if (utf_cnt) {
error_out: *uni = '?';
ocu[++u_len] = '?'; return -EINVAL;
printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n")); }
return u_len;
} }
ocu[length - 1] = (uint8_t)u_len + 1; static int udf_name_from_CS0(struct ustr *utf_o,
const struct ustr *ocu_i,
return u_len + 1; int (*conv_f)(wchar_t, unsigned char *, int))
}
static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
const struct ustr *ocu_i)
{ {
const uint8_t *ocu; const uint8_t *ocu;
uint8_t cmp_id, ocu_len; uint8_t cmp_id, ocu_len;
@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
if (cmp_id == 16) if (cmp_id == 16)
c = (c << 8) | ocu[i++]; c = (c << 8) | ocu[i++];
len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], len = conv_f(c, &utf_o->u_name[utf_o->u_len],
UDF_NAME_LEN - 2 - utf_o->u_len); UDF_NAME_LEN - 2 - utf_o->u_len);
/* Valid character? */ /* Valid character? */
if (len >= 0) if (len >= 0)
utf_o->u_len += len; utf_o->u_len += len;
else if (len == -ENAMETOOLONG)
break;
else else
utf_o->u_name[utf_o->u_len++] = '?'; utf_o->u_name[utf_o->u_len++] = '?';
} }
@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
return utf_o->u_len; return utf_o->u_len;
} }
static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length,
int length) int (*conv_f)(const unsigned char *, int, wchar_t *))
{ {
int len; int i, len;
unsigned i, max_val; unsigned int max_val;
uint16_t uni_char; wchar_t uni_char;
int u_len, u_ch; int u_len, u_ch;
memset(ocu, 0, sizeof(dstring) * length); memset(ocu, 0, sizeof(dstring) * length);
ocu[0] = 8; ocu[0] = 8;
max_val = 0xffU; max_val = 0xff;
u_ch = 1; u_ch = 1;
try_again: try_again:
u_len = 0U; u_len = 0;
for (i = 0U; i < uni->u_len; i++) { for (i = 0; i < uni->u_len; i++) {
/* Name didn't fit? */ /* Name didn't fit? */
if (u_len + 1 + u_ch >= length) if (u_len + 1 + u_ch >= length)
return 0; return 0;
len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char);
if (!len) if (!len)
continue; continue;
/* Invalid character, deal with it */ /* Invalid character, deal with it */
@ -328,15 +234,15 @@ try_again:
} }
if (uni_char > max_val) { if (uni_char > max_val) {
max_val = 0xffffU; max_val = 0xffff;
ocu[0] = (uint8_t)0x10U; ocu[0] = 0x10;
u_ch = 2; u_ch = 2;
goto try_again; goto try_again;
} }
if (max_val == 0xffffU) if (max_val == 0xffff)
ocu[++u_len] = (uint8_t)(uni_char >> 8); ocu[++u_len] = (uint8_t)(uni_char >> 8);
ocu[++u_len] = (uint8_t)(uni_char & 0xffU); ocu[++u_len] = (uint8_t)(uni_char & 0xff);
i += len - 1; i += len - 1;
} }
@ -344,10 +250,16 @@ try_again:
return u_len + 1; return u_len + 1;
} }
int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
{
return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8);
}
int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
uint8_t *dname, int dlen) uint8_t *dname, int dlen)
{ {
struct ustr *filename, *unifilename; struct ustr *filename, *unifilename;
int (*conv_f)(wchar_t, unsigned char *, int);
int ret; int ret;
if (!slen) if (!slen)
@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
udf_build_ustr_exact(unifilename, sname, slen); udf_build_ustr_exact(unifilename, sname, slen);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
ret = udf_CS0toUTF8(filename, unifilename); conv_f = udf_uni2char_utf8;
if (ret < 0) {
udf_debug("Failed in udf_get_filename: sname = %s\n",
sname);
goto out2;
}
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename, conv_f = UDF_SB(sb)->s_nls_map->uni2char;
unifilename);
if (ret < 0) {
udf_debug("Failed in udf_get_filename: sname = %s\n",
sname);
goto out2;
}
} else } else
BUG(); BUG();
ret = udf_name_from_CS0(filename, unifilename, conv_f);
if (ret < 0) {
udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
goto out2;
}
ret = udf_translate_to_linux(dname, dlen, ret = udf_translate_to_linux(dname, dlen,
filename->u_name, filename->u_len, filename->u_name, filename->u_len,
unifilename->u_name, unifilename->u_len); unifilename->u_name, unifilename->u_len);
@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
uint8_t *dname, int dlen) uint8_t *dname, int dlen)
{ {
struct ustr unifilename; struct ustr unifilename;
int namelen; int (*conv_f)(const unsigned char *, int, wchar_t *);
if (!udf_char_to_ustr(&unifilename, sname, slen)) if (!udf_char_to_ustr(&unifilename, sname, slen))
return 0; return 0;
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
namelen = udf_UTF8toCS0(dname, &unifilename, dlen); conv_f = udf_char2uni_utf8;
if (!namelen)
return 0;
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, conv_f = UDF_SB(sb)->s_nls_map->char2uni;
&unifilename, dlen);
if (!namelen)
return 0;
} else } else
return 0; BUG();
return namelen; return udf_name_to_CS0(dname, &unifilename, dlen, conv_f);
} }
#define ILLEGAL_CHAR_MARK '_' #define ILLEGAL_CHAR_MARK '_'