udf: Join functions for UTF8 and NLS conversions

There is no much sense to have separate functions for UTF8 and NLS conversions, since UTF8 encoding is actually the special case of NLS. However, although UTF8 is also supported by general NLS framework, it would be good to have separate UTF8 character conversion functions (char2uni and uni2char) locally in UDF code, so that they could be used even if NLS support is not enabled in the kernel configuration. Signed-off-by: Andrew Gabbasov <andrew_gabbasov@mentor.com> Signed-off-by: Jan Kara <jack@suse.cz>
2016-01-15 02:44:20 -06:00 · 2016-01-15 02:44:20 -06:00 · 3e7fc2055c
parent 525e2c56c3
commit 3e7fc2055c
1 changed files with 89 additions and 187 deletions
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
 	memcpy(dest->u_name, ptr + 1, exactsize - 1);
 }

-/*
- * udf_CS0toUTF8
- *
- * PURPOSE
- *	Convert OSTA Compressed Unicode to the UTF-8 equivalent.
- *
- * PRE-CONDITIONS
- *	utf			Pointer to UTF-8 output buffer.
- *	ocu			Pointer to OSTA Compressed Unicode input buffer
- *				of size UDF_NAME_LEN bytes.
- * 				both of type "struct ustr *"
- *
- * POST-CONDITIONS
- *	<return>		>= 0 on success.
- *
- * HISTORY
- *	November 12, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
-int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
+static int udf_uni2char_utf8(wchar_t uni,
+			     unsigned char *out,
+			     int boundlen)
 {
-	const uint8_t *ocu;
-	uint8_t cmp_id, ocu_len;
-	int i;
+	int u_len = 0;

-	ocu_len = ocu_i->u_len;
-	if (ocu_len == 0) {
-		memset(utf_o, 0, sizeof(struct ustr));
-		return 0;
+	if (boundlen <= 0)
+		return -ENAMETOOLONG;
+
+	if (uni < 0x80) {
+		out[u_len++] = (unsigned char)uni;
+	} else if (uni < 0x800) {
+		if (boundlen < 2)
+			return -ENAMETOOLONG;
+		out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
+		out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
+	} else {
+		if (boundlen < 3)
+			return -ENAMETOOLONG;
+		out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
+		out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
+		out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
 	}
-
-	cmp_id = ocu_i->u_cmpID;
-	if (cmp_id != 8 && cmp_id != 16) {
-		memset(utf_o, 0, sizeof(struct ustr));
-		pr_err("unknown compression code (%d) stri=%s\n",
-		       cmp_id, ocu_i->u_name);
-		return -EINVAL;
-	}
-
-	ocu = ocu_i->u_name;
-	utf_o->u_len = 0;
-	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
-
-		/* Expand OSTA compressed Unicode to Unicode */
-		uint32_t c = ocu[i++];
-		if (cmp_id == 16)
-			c = (c << 8) | ocu[i++];
-
-		/* Compress Unicode to UTF-8 */
-		if (c < 0x80U)
-			utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
-		else if (c < 0x800U) {
-			if (utf_o->u_len > (UDF_NAME_LEN - 4))
-				break;
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0xc0 | (c >> 6));
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0x80 | (c & 0x3f));
-		} else {
-			if (utf_o->u_len > (UDF_NAME_LEN - 5))
-				break;
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0xe0 | (c >> 12));
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0x80 |
-							  ((c >> 6) & 0x3f));
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0x80 | (c & 0x3f));
-		}
-	}
-	utf_o->u_cmpID = 8;
-
-	return utf_o->u_len;
+	return u_len;
 }

-/*
- *
- * udf_UTF8toCS0
- *
- * PURPOSE
- *	Convert UTF-8 to the OSTA Compressed Unicode equivalent.
- *
- * DESCRIPTION
- *	This routine is only called by udf_lookup().
- *
- * PRE-CONDITIONS
- *	ocu			Pointer to OSTA Compressed Unicode output
- *				buffer of size UDF_NAME_LEN bytes.
- *	utf			Pointer to UTF-8 input buffer.
- *	utf_len			Length of UTF-8 input buffer in bytes.
- *
- * POST-CONDITIONS
- *	<return>		Zero on success.
- *
- * HISTORY
- *	November 12, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
-static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
+static int udf_char2uni_utf8(const unsigned char *in,
+			     int boundlen,
+			     wchar_t *uni)
 {
-	unsigned c, i, max_val, utf_char;
-	int utf_cnt, u_len, u_ch;
+	unsigned int utf_char;
+	unsigned char c;
+	int utf_cnt, u_len;

-	memset(ocu, 0, sizeof(dstring) * length);
-	ocu[0] = 8;
-	max_val = 0xffU;
-	u_ch = 1;
-
-try_again:
-	u_len = 0U;
-	utf_char = 0U;
-	utf_cnt = 0U;
-	for (i = 0U; i < utf->u_len; i++) {
-		/* Name didn't fit? */
-		if (u_len + 1 + u_ch >= length)
-			return 0;
-
-		c = (uint8_t)utf->u_name[i];
+	utf_char = 0;
+	utf_cnt = 0;
+	for (u_len = 0; u_len < boundlen;) {
+		c = in[u_len++];

 		/* Complete a multi-byte UTF-8 character */
 		if (utf_cnt) {
-			utf_char = (utf_char << 6) | (c & 0x3fU);
+			utf_char = (utf_char << 6) | (c & 0x3f);
 			if (--utf_cnt)
 				continue;
 		} else {
 			/* Check for a multi-byte UTF-8 character */
-			if (c & 0x80U) {
+			if (c & 0x80) {
 				/* Start a multi-byte UTF-8 character */
-				if ((c & 0xe0U) == 0xc0U) {
-					utf_char = c & 0x1fU;
+				if ((c & 0xe0) == 0xc0) {
+					utf_char = c & 0x1f;
 					utf_cnt = 1;
-				} else if ((c & 0xf0U) == 0xe0U) {
-					utf_char = c & 0x0fU;
+				} else if ((c & 0xf0) == 0xe0) {
+					utf_char = c & 0x0f;
 					utf_cnt = 2;
-				} else if ((c & 0xf8U) == 0xf0U) {
-					utf_char = c & 0x07U;
+				} else if ((c & 0xf8) == 0xf0) {
+					utf_char = c & 0x07;
 					utf_cnt = 3;
-				} else if ((c & 0xfcU) == 0xf8U) {
-					utf_char = c & 0x03U;
+				} else if ((c & 0xfc) == 0xf8) {
+					utf_char = c & 0x03;
 					utf_cnt = 4;
-				} else if ((c & 0xfeU) == 0xfcU) {
-					utf_char = c & 0x01U;
+				} else if ((c & 0xfe) == 0xfc) {
+					utf_char = c & 0x01;
 					utf_cnt = 5;
 				} else {
-					goto error_out;
+					utf_cnt = -1;
+					break;
 				}
 				continue;
 			} else {
@ -228,36 +149,19 @@ try_again:
 				utf_char = c;
 			}
 		}
-
-		/* Choose no compression if necessary */
-		if (utf_char > max_val) {
-			if (max_val == 0xffU) {
-				max_val = 0xffffU;
-				ocu[0] = (uint8_t)0x10U;
-				u_ch = 2;
-				goto try_again;
-			}
-			goto error_out;
-		}
-
-		if (max_val == 0xffffU)
-			ocu[++u_len] = (uint8_t)(utf_char >> 8);
-		ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
+		*uni = utf_char;
+		break;
 	}
-
 	if (utf_cnt) {
-error_out:
-		ocu[++u_len] = '?';
-		printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
+		*uni = '?';
+		return -EINVAL;
 	}
-
-	ocu[length - 1] = (uint8_t)u_len + 1;
-
-	return u_len + 1;
+	return u_len;
 }

-static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
-			const struct ustr *ocu_i)
+static int udf_name_from_CS0(struct ustr *utf_o,
+			     const struct ustr *ocu_i,
+			     int (*conv_f)(wchar_t, unsigned char *, int))
 {
 	const uint8_t *ocu;
 	uint8_t cmp_id, ocu_len;
@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 		if (cmp_id == 16)
 			c = (c << 8) | ocu[i++];

-		len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
-				    UDF_NAME_LEN - 2 - utf_o->u_len);
+		len = conv_f(c, &utf_o->u_name[utf_o->u_len],
+			     UDF_NAME_LEN - 2 - utf_o->u_len);
 		/* Valid character? */
 		if (len >= 0)
 			utf_o->u_len += len;
+		else if (len == -ENAMETOOLONG)
+			break;
 		else
 			utf_o->u_name[utf_o->u_len++] = '?';
 	}
@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 	return utf_o->u_len;
 }

-static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
-			int length)
+static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length,
+			   int (*conv_f)(const unsigned char *, int, wchar_t *))
 {
-	int len;
-	unsigned i, max_val;
-	uint16_t uni_char;
+	int i, len;
+	unsigned int max_val;
+	wchar_t uni_char;
 	int u_len, u_ch;

 	memset(ocu, 0, sizeof(dstring) * length);
 	ocu[0] = 8;
-	max_val = 0xffU;
+	max_val = 0xff;
 	u_ch = 1;

 try_again:
-	u_len = 0U;
-	for (i = 0U; i < uni->u_len; i++) {
+	u_len = 0;
+	for (i = 0; i < uni->u_len; i++) {
 		/* Name didn't fit? */
 		if (u_len + 1 + u_ch >= length)
 			return 0;
-		len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
+		len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char);
 		if (!len)
 			continue;
 		/* Invalid character, deal with it */
@ -328,15 +234,15 @@ try_again:
 		}

 		if (uni_char > max_val) {
-			max_val = 0xffffU;
-			ocu[0] = (uint8_t)0x10U;
+			max_val = 0xffff;
+			ocu[0] = 0x10;
 			u_ch = 2;
 			goto try_again;
 		}

-		if (max_val == 0xffffU)
+		if (max_val == 0xffff)
 			ocu[++u_len] = (uint8_t)(uni_char >> 8);
-		ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+		ocu[++u_len] = (uint8_t)(uni_char & 0xff);
 		i += len - 1;
 	}

@ -344,10 +250,16 @@ try_again:
 	return u_len + 1;
 }

+int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
+{
+	return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8);
+}
+
 int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
 		     uint8_t *dname, int dlen)
 {
 	struct ustr *filename, *unifilename;
+	int (*conv_f)(wchar_t, unsigned char *, int);
 	int ret;

 	if (!slen)
@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,

 	udf_build_ustr_exact(unifilename, sname, slen);
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
-		ret = udf_CS0toUTF8(filename, unifilename);
-		if (ret < 0) {
-			udf_debug("Failed in udf_get_filename: sname = %s\n",
-				  sname);
-			goto out2;
-		}
+		conv_f = udf_uni2char_utf8;
 	} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
-		ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
-				   unifilename);
-		if (ret < 0) {
-			udf_debug("Failed in udf_get_filename: sname = %s\n",
-				  sname);
-			goto out2;
-		}
+		conv_f = UDF_SB(sb)->s_nls_map->uni2char;
 	} else
 		BUG();

+	ret = udf_name_from_CS0(filename, unifilename, conv_f);
+	if (ret < 0) {
+		udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
+		goto out2;
+	}
+
 	ret = udf_translate_to_linux(dname, dlen,
 				     filename->u_name, filename->u_len,
 				     unifilename->u_name, unifilename->u_len);
@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
 		     uint8_t *dname, int dlen)
 {
 	struct ustr unifilename;
-	int namelen;
+	int (*conv_f)(const unsigned char *, int, wchar_t *);

 	if (!udf_char_to_ustr(&unifilename, sname, slen))
 		return 0;

 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
-		namelen = udf_UTF8toCS0(dname, &unifilename, dlen);
-		if (!namelen)
-			return 0;
+		conv_f = udf_char2uni_utf8;
 	} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
-		namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname,
-					&unifilename, dlen);
-		if (!namelen)
-			return 0;
+		conv_f = UDF_SB(sb)->s_nls_map->char2uni;
 	} else
-		return 0;
+		BUG();

-	return namelen;
+	return udf_name_to_CS0(dname, &unifilename, dlen, conv_f);
 }

 #define ILLEGAL_CHAR_MARK	'_'