2004-03-08 04:55:39 +01:00
|
|
|
/* idna.c Convert to or from IDN strings.
|
|
|
|
* Copyright (C) 2002, 2003, 2004 Simon Josefsson
|
|
|
|
*
|
|
|
|
* This file is part of GNU Libidn.
|
|
|
|
*
|
|
|
|
* GNU Libidn is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* GNU Libidn is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with GNU Libidn; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if HAVE_CONFIG_H
|
|
|
|
# include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stringprep.h>
|
|
|
|
#include <punycode.h>
|
|
|
|
|
|
|
|
#include "idna.h"
|
|
|
|
|
|
|
|
#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
|
|
|
|
(c) == 0xFF0E || (c) == 0xFF61)
|
|
|
|
|
|
|
|
/* Core functions */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_ascii_4i
|
|
|
|
* @in: input array with unicode code points.
|
|
|
|
* @inlen: length of input array with unicode code points.
|
|
|
|
* @out: output zero terminated string that must have room for at
|
|
|
|
* least 63 characters plus the terminating zero.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* The ToASCII operation takes a sequence of Unicode code points that make
|
|
|
|
* up one label and transforms it into a sequence of code points in the
|
|
|
|
* ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
|
|
|
|
* resulting sequence are equivalent labels.
|
|
|
|
*
|
|
|
|
* It is important to note that the ToASCII operation can fail. ToASCII
|
|
|
|
* fails if any step of it fails. If any step of the ToASCII operation
|
|
|
|
* fails on any label in a domain name, that domain name MUST NOT be used
|
|
|
|
* as an internationalized domain name. The method for deadling with this
|
|
|
|
* failure is application-specific.
|
|
|
|
*
|
|
|
|
* The inputs to ToASCII are a sequence of code points, the AllowUnassigned
|
|
|
|
* flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
|
|
|
|
* sequence of ASCII code points or a failure condition.
|
|
|
|
*
|
|
|
|
* ToASCII never alters a sequence of code points that are all in the ASCII
|
|
|
|
* range to begin with (although it could fail). Applying the ToASCII
|
|
|
|
* operation multiple times has exactly the same effect as applying it just
|
|
|
|
* once.
|
|
|
|
*
|
|
|
|
* Return value: Returns 0 on success, or an error code.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
|
|
|
|
{
|
|
|
|
size_t len, outlen;
|
|
|
|
uint32_t *src; /* XXX don't need to copy data? */
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ToASCII consists of the following steps:
|
|
|
|
*
|
|
|
|
* 1. If all code points in the sequence are in the ASCII range (0..7F)
|
|
|
|
* then skip to step 3.
|
|
|
|
*/
|
|
|
|
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
int inasciirange;
|
|
|
|
|
|
|
|
inasciirange = 1;
|
|
|
|
for (i = 0; i < inlen; i++)
|
|
|
|
if (in[i] > 0x7F)
|
|
|
|
inasciirange = 0;
|
|
|
|
if (inasciirange)
|
|
|
|
{
|
|
|
|
src = malloc (sizeof (in[0]) * (inlen + 1));
|
|
|
|
if (src == NULL)
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
|
|
|
|
memcpy (src, in, sizeof (in[0]) * inlen);
|
|
|
|
src[inlen] = 0;
|
|
|
|
|
|
|
|
goto step3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 2. Perform the steps specified in [NAMEPREP] and fail if there is
|
|
|
|
* an error. The AllowUnassigned flag is used in [NAMEPREP].
|
|
|
|
*/
|
|
|
|
|
|
|
|
{
|
|
|
|
char *p;
|
|
|
|
|
|
|
|
p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
|
|
|
|
if (p == NULL)
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
|
|
|
|
len = strlen (p);
|
|
|
|
do
|
|
|
|
{
|
2004-05-21 17:42:30 +02:00
|
|
|
char *newp;
|
|
|
|
|
2004-03-08 04:55:39 +01:00
|
|
|
len = 2 * len + 10; /* XXX better guess? */
|
2004-05-21 17:42:30 +02:00
|
|
|
newp = realloc (p, len);
|
|
|
|
if (newp == NULL)
|
2004-05-07 05:57:57 +02:00
|
|
|
{
|
|
|
|
free (p);
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
}
|
|
|
|
p = newp;
|
2004-03-08 04:55:39 +01:00
|
|
|
|
|
|
|
if (flags & IDNA_ALLOW_UNASSIGNED)
|
|
|
|
rc = stringprep_nameprep (p, len);
|
|
|
|
else
|
|
|
|
rc = stringprep_nameprep_no_unassigned (p, len);
|
|
|
|
}
|
|
|
|
while (rc == STRINGPREP_TOO_SMALL_BUFFER);
|
|
|
|
|
|
|
|
if (rc != STRINGPREP_OK)
|
|
|
|
{
|
|
|
|
free (p);
|
|
|
|
return IDNA_STRINGPREP_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
src = stringprep_utf8_to_ucs4 (p, -1, NULL);
|
|
|
|
|
|
|
|
free (p);
|
|
|
|
}
|
|
|
|
|
|
|
|
step3:
|
|
|
|
/*
|
|
|
|
* 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
|
|
|
|
*
|
|
|
|
* (a) Verify the absence of non-LDH ASCII code points; that is,
|
|
|
|
* the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
|
|
|
|
*
|
|
|
|
* (b) Verify the absence of leading and trailing hyphen-minus;
|
|
|
|
* that is, the absence of U+002D at the beginning and end of
|
|
|
|
* the sequence.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (flags & IDNA_USE_STD3_ASCII_RULES)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
for (i = 0; src[i]; i++)
|
|
|
|
if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
|
|
|
|
(src[i] >= 0x3A && src[i] <= 0x40) ||
|
|
|
|
(src[i] >= 0x5B && src[i] <= 0x60) ||
|
|
|
|
(src[i] >= 0x7B && src[i] <= 0x7F))
|
|
|
|
{
|
|
|
|
free (src);
|
|
|
|
return IDNA_CONTAINS_NON_LDH;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
|
|
|
|
{
|
|
|
|
free (src);
|
|
|
|
return IDNA_CONTAINS_MINUS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 4. If all code points in the sequence are in the ASCII range
|
|
|
|
* (0..7F), then skip to step 8.
|
|
|
|
*/
|
|
|
|
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
int inasciirange;
|
|
|
|
|
|
|
|
inasciirange = 1;
|
|
|
|
for (i = 0; src[i]; i++)
|
|
|
|
{
|
|
|
|
if (src[i] > 0x7F)
|
|
|
|
inasciirange = 0;
|
|
|
|
/* copy string to output buffer if we are about to skip to step8 */
|
|
|
|
if (i < 64)
|
|
|
|
out[i] = src[i];
|
|
|
|
}
|
|
|
|
if (i < 64)
|
|
|
|
out[i] = '\0';
|
|
|
|
if (inasciirange)
|
|
|
|
goto step8;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 5. Verify that the sequence does NOT begin with the ACE prefix.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
int match;
|
|
|
|
|
|
|
|
match = 1;
|
|
|
|
for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
|
|
|
|
if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
|
|
|
|
match = 0;
|
|
|
|
if (match)
|
|
|
|
{
|
|
|
|
free (src);
|
|
|
|
return IDNA_CONTAINS_ACE_PREFIX;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
|
|
|
|
* and fail if there is an error.
|
|
|
|
*/
|
|
|
|
for (len = 0; src[len]; len++)
|
|
|
|
;
|
|
|
|
src[len] = '\0';
|
|
|
|
outlen = 63 - strlen (IDNA_ACE_PREFIX);
|
|
|
|
rc = punycode_encode (len, src, NULL,
|
|
|
|
&outlen, &out[strlen (IDNA_ACE_PREFIX)]);
|
|
|
|
if (rc != PUNYCODE_SUCCESS)
|
|
|
|
{
|
|
|
|
free (src);
|
|
|
|
return IDNA_PUNYCODE_ERROR;
|
|
|
|
}
|
|
|
|
out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 7. Prepend the ACE prefix.
|
|
|
|
*/
|
|
|
|
|
|
|
|
memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 8. Verify that the number of code points is in the range 1 to 63
|
|
|
|
* inclusive (0 is excluded).
|
|
|
|
*/
|
|
|
|
|
|
|
|
step8:
|
|
|
|
free (src);
|
|
|
|
if (strlen (out) < 1 || strlen (out) > 63)
|
|
|
|
return IDNA_INVALID_LENGTH;
|
|
|
|
|
|
|
|
return IDNA_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ToUnicode(). May realloc() utf8in. */
|
|
|
|
static int
|
|
|
|
idna_to_unicode_internal (char *utf8in,
|
|
|
|
uint32_t * out, size_t * outlen, int flags)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
char tmpout[64];
|
|
|
|
size_t utf8len = strlen (utf8in) + 1;
|
|
|
|
size_t addlen = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ToUnicode consists of the following steps:
|
|
|
|
*
|
|
|
|
* 1. If the sequence contains any code points outside the ASCII range
|
|
|
|
* (0..7F) then proceed to step 2, otherwise skip to step 3.
|
|
|
|
*/
|
|
|
|
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
int inasciirange;
|
|
|
|
|
|
|
|
inasciirange = 1;
|
|
|
|
for (i = 0; utf8in[i]; i++)
|
|
|
|
if (utf8in[i] & ~0x7F)
|
|
|
|
inasciirange = 0;
|
|
|
|
if (inasciirange)
|
|
|
|
goto step3;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 2. Perform the steps specified in [NAMEPREP] and fail if there is an
|
|
|
|
* error. (If step 3 of ToASCII is also performed here, it will not
|
|
|
|
* affect the overall behavior of ToUnicode, but it is not
|
|
|
|
* necessary.) The AllowUnassigned flag is used in [NAMEPREP].
|
|
|
|
*/
|
|
|
|
do
|
|
|
|
{
|
2004-05-07 05:57:57 +02:00
|
|
|
char *newp = realloc (utf8in, utf8len + addlen);
|
|
|
|
if (newp == NULL)
|
|
|
|
{
|
|
|
|
free (utf8in);
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
}
|
|
|
|
utf8in = newp;
|
2004-03-08 04:55:39 +01:00
|
|
|
if (flags & IDNA_ALLOW_UNASSIGNED)
|
|
|
|
rc = stringprep_nameprep (utf8in, utf8len + addlen);
|
|
|
|
else
|
|
|
|
rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
|
|
|
|
addlen += 1;
|
|
|
|
}
|
|
|
|
while (rc == STRINGPREP_TOO_SMALL_BUFFER);
|
|
|
|
|
|
|
|
if (rc != STRINGPREP_OK)
|
2004-05-07 05:57:57 +02:00
|
|
|
{
|
|
|
|
free (utf8in);
|
|
|
|
return IDNA_STRINGPREP_ERROR;
|
|
|
|
}
|
2004-03-08 04:55:39 +01:00
|
|
|
|
|
|
|
/* 3. Verify that the sequence begins with the ACE prefix, and save a
|
|
|
|
* copy of the sequence.
|
|
|
|
*/
|
|
|
|
|
|
|
|
step3:
|
|
|
|
if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
|
2004-05-07 05:57:57 +02:00
|
|
|
{
|
|
|
|
free (utf8in);
|
|
|
|
return IDNA_NO_ACE_PREFIX;
|
|
|
|
}
|
2004-03-08 04:55:39 +01:00
|
|
|
|
|
|
|
/* 4. Remove the ACE prefix.
|
|
|
|
*/
|
|
|
|
|
|
|
|
memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
|
|
|
|
strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
|
|
|
|
|
|
|
|
/* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
|
|
|
|
* and fail if there is an error. Save a copy of the result of
|
|
|
|
* this step.
|
|
|
|
*/
|
|
|
|
|
|
|
|
(*outlen)--; /* reserve one for the zero */
|
|
|
|
|
|
|
|
rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
|
|
|
|
if (rc != PUNYCODE_SUCCESS)
|
2004-05-07 05:57:57 +02:00
|
|
|
{
|
|
|
|
free (utf8in);
|
|
|
|
return IDNA_PUNYCODE_ERROR;
|
|
|
|
}
|
2004-03-08 04:55:39 +01:00
|
|
|
|
|
|
|
out[*outlen] = 0; /* add zero */
|
|
|
|
|
|
|
|
/* 6. Apply ToASCII.
|
|
|
|
*/
|
|
|
|
|
|
|
|
rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
|
|
|
|
if (rc != IDNA_SUCCESS)
|
2004-05-07 05:57:57 +02:00
|
|
|
{
|
|
|
|
free (utf8in);
|
|
|
|
return rc;
|
|
|
|
}
|
2004-03-08 04:55:39 +01:00
|
|
|
|
|
|
|
/* 7. Verify that the result of step 6 matches the saved copy from
|
|
|
|
* step 3, using a case-insensitive ASCII comparison.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
|
2004-05-07 05:57:57 +02:00
|
|
|
{
|
|
|
|
free (utf8in);
|
|
|
|
return IDNA_ROUNDTRIP_VERIFY_ERROR;
|
|
|
|
}
|
2004-03-08 04:55:39 +01:00
|
|
|
|
|
|
|
/* 8. Return the saved copy from step 5.
|
|
|
|
*/
|
|
|
|
|
2004-05-07 05:57:57 +02:00
|
|
|
free (utf8in);
|
2004-03-08 04:55:39 +01:00
|
|
|
return IDNA_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_unicode_44i
|
|
|
|
* @in: input array with unicode code points.
|
|
|
|
* @inlen: length of input array with unicode code points.
|
|
|
|
* @out: output array with unicode code points.
|
|
|
|
* @outlen: on input, maximum size of output array with unicode code points,
|
|
|
|
* on exit, actual size of output array with unicode code points.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* The ToUnicode operation takes a sequence of Unicode code points
|
|
|
|
* that make up one label and returns a sequence of Unicode code
|
|
|
|
* points. If the input sequence is a label in ACE form, then the
|
|
|
|
* result is an equivalent internationalized label that is not in ACE
|
|
|
|
* form, otherwise the original sequence is returned unaltered.
|
|
|
|
*
|
|
|
|
* ToUnicode never fails. If any step fails, then the original input
|
|
|
|
* sequence is returned immediately in that step.
|
|
|
|
*
|
|
|
|
* The Punycode decoder can never output more code points than it
|
|
|
|
* inputs, but Nameprep can, and therefore ToUnicode can. Note that
|
|
|
|
* the number of octets needed to represent a sequence of code points
|
|
|
|
* depends on the particular character encoding used.
|
|
|
|
*
|
|
|
|
* The inputs to ToUnicode are a sequence of code points, the
|
|
|
|
* AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
|
|
|
|
* ToUnicode is always a sequence of Unicode code points.
|
|
|
|
*
|
|
|
|
* Return value: Returns error condition, but it must only be used for
|
|
|
|
* debugging purposes. The output buffer is always
|
|
|
|
* guaranteed to contain the correct data according to
|
|
|
|
* the specification (sans malloc induced errors). NB!
|
|
|
|
* This means that you normally ignore the return code
|
|
|
|
* from this function, as checking it means breaking the
|
|
|
|
* standard.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
idna_to_unicode_44i (const uint32_t * in, size_t inlen,
|
|
|
|
uint32_t * out, size_t * outlen, int flags)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
size_t outlensave = *outlen;
|
|
|
|
char *p;
|
|
|
|
|
|
|
|
p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
|
|
|
|
if (p == NULL)
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
|
|
|
|
rc = idna_to_unicode_internal (p, out, outlen, flags);
|
|
|
|
if (rc != IDNA_SUCCESS)
|
|
|
|
{
|
|
|
|
memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
|
|
|
|
inlen : outlensave));
|
|
|
|
*outlen = inlen;
|
|
|
|
}
|
|
|
|
|
2004-05-07 05:57:57 +02:00
|
|
|
/* p is freed in idna_to_unicode_internal. */
|
2004-03-08 04:55:39 +01:00
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Wrappers that handle several labels */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_ascii_4z:
|
|
|
|
* @input: zero terminated input Unicode string.
|
|
|
|
* @output: pointer to newly allocated output string.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* Convert UCS-4 domain name to ASCII string. The domain name may
|
|
|
|
* contain several labels, separated by dots. The output buffer must
|
|
|
|
* be deallocated by the caller.
|
|
|
|
*
|
|
|
|
* Return value: Returns IDNA_SUCCESS on success, or error code.
|
|
|
|
**/
|
|
|
|
int
|
|
|
|
idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
|
|
|
|
{
|
|
|
|
const uint32_t *start = input;
|
|
|
|
const uint32_t *end = input;
|
|
|
|
char buf[64];
|
|
|
|
char *out = NULL;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
/* 1) Whenever dots are used as label separators, the following
|
|
|
|
characters MUST be recognized as dots: U+002E (full stop),
|
|
|
|
U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
|
|
|
|
U+FF61 (halfwidth ideographic full stop). */
|
|
|
|
|
2004-03-14 10:17:48 +01:00
|
|
|
if (input[0] == 0)
|
2004-03-08 04:55:39 +01:00
|
|
|
{
|
|
|
|
/* Handle implicit zero-length root label. */
|
|
|
|
*output = malloc (1);
|
|
|
|
if (!*output)
|
|
|
|
return IDNA_MALLOC_ERROR;
|
2004-03-14 10:17:48 +01:00
|
|
|
strcpy (*output, "");
|
|
|
|
return IDNA_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DOTP (input[0]) && input[1] == 0)
|
|
|
|
{
|
|
|
|
/* Handle explicit zero-length root label. */
|
|
|
|
*output = malloc (2);
|
|
|
|
if (!*output)
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
strcpy (*output, ".");
|
2004-03-08 04:55:39 +01:00
|
|
|
return IDNA_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
*output = NULL;
|
|
|
|
do
|
|
|
|
{
|
|
|
|
end = start;
|
|
|
|
|
|
|
|
for (; *end && !DOTP (*end); end++)
|
|
|
|
;
|
|
|
|
|
|
|
|
if (*end == '\0' && start == end)
|
|
|
|
{
|
|
|
|
/* Handle explicit zero-length root label. */
|
|
|
|
buf[0] = '\0';
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
rc = idna_to_ascii_4i (start, end - start, buf, flags);
|
|
|
|
if (rc != IDNA_SUCCESS)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (out)
|
|
|
|
{
|
2004-05-07 05:57:57 +02:00
|
|
|
char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
|
|
|
|
if (!newp)
|
|
|
|
{
|
|
|
|
free (out);
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
}
|
|
|
|
out = newp;
|
2004-03-08 04:55:39 +01:00
|
|
|
strcat (out, ".");
|
|
|
|
strcat (out, buf);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
out = (char *) malloc (strlen (buf) + 1);
|
|
|
|
if (!out)
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
strcpy (out, buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
start = end + 1;
|
|
|
|
}
|
|
|
|
while (*end);
|
|
|
|
|
|
|
|
*output = out;
|
|
|
|
|
|
|
|
return IDNA_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_ascii_8z:
|
|
|
|
* @input: zero terminated input UTF-8 string.
|
|
|
|
* @output: pointer to newly allocated output string.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* Convert UTF-8 domain name to ASCII string. The domain name may
|
|
|
|
* contain several labels, separated by dots. The output buffer must
|
|
|
|
* be deallocated by the caller.
|
|
|
|
*
|
|
|
|
* Return value: Returns IDNA_SUCCESS on success, or error code.
|
|
|
|
**/
|
|
|
|
int
|
|
|
|
idna_to_ascii_8z (const char *input, char **output, int flags)
|
|
|
|
{
|
|
|
|
uint32_t *ucs4;
|
|
|
|
size_t ucs4len;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
|
|
|
|
if (!ucs4)
|
|
|
|
return IDNA_ICONV_ERROR;
|
|
|
|
|
|
|
|
rc = idna_to_ascii_4z (ucs4, output, flags);
|
|
|
|
|
|
|
|
free (ucs4);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_ascii_lz:
|
|
|
|
* @input: zero terminated input UTF-8 string.
|
|
|
|
* @output: pointer to newly allocated output string.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* Convert domain name in the locale's encoding to ASCII string. The
|
|
|
|
* domain name may contain several labels, separated by dots. The
|
|
|
|
* output buffer must be deallocated by the caller.
|
|
|
|
*
|
|
|
|
* Return value: Returns IDNA_SUCCESS on success, or error code.
|
|
|
|
**/
|
|
|
|
int
|
|
|
|
idna_to_ascii_lz (const char *input, char **output, int flags)
|
|
|
|
{
|
|
|
|
char *utf8;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
utf8 = stringprep_locale_to_utf8 (input);
|
|
|
|
if (!utf8)
|
|
|
|
return IDNA_ICONV_ERROR;
|
|
|
|
|
|
|
|
rc = idna_to_ascii_8z (utf8, output, flags);
|
|
|
|
|
|
|
|
free (utf8);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_unicode_4z4z:
|
|
|
|
* @input: zero-terminated Unicode string.
|
|
|
|
* @output: pointer to newly allocated output Unicode string.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* Convert possibly ACE encoded domain name in UCS-4 format into a
|
|
|
|
* UCS-4 string. The domain name may contain several labels,
|
|
|
|
* separated by dots. The output buffer must be deallocated by the
|
|
|
|
* caller.
|
|
|
|
*
|
|
|
|
* Return value: Returns IDNA_SUCCESS on success, or error code.
|
|
|
|
**/
|
|
|
|
int
|
|
|
|
idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
|
|
|
|
{
|
|
|
|
const uint32_t *start = input;
|
|
|
|
const uint32_t *end = input;
|
|
|
|
uint32_t *buf;
|
|
|
|
size_t buflen;
|
|
|
|
uint32_t *out = NULL;
|
|
|
|
size_t outlen = 0;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
*output = NULL;
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
end = start;
|
|
|
|
|
|
|
|
for (; *end && !DOTP (*end); end++)
|
|
|
|
;
|
|
|
|
|
|
|
|
buflen = end - start;
|
|
|
|
buf = malloc (sizeof (buf[0]) * (buflen + 1));
|
|
|
|
if (!buf)
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
|
|
|
|
rc = idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
|
|
|
|
/* don't check rc as per specification! */
|
|
|
|
|
|
|
|
if (out)
|
|
|
|
{
|
2004-05-07 05:57:57 +02:00
|
|
|
uint32_t *newp = realloc (out,
|
|
|
|
sizeof (out[0])
|
|
|
|
* (outlen + 1 + buflen + 1));
|
|
|
|
if (!newp)
|
|
|
|
{
|
|
|
|
free (buf);
|
|
|
|
free (out);
|
|
|
|
return IDNA_MALLOC_ERROR;
|
|
|
|
}
|
|
|
|
out = newp;
|
2004-03-08 04:55:39 +01:00
|
|
|
out[outlen++] = 0x002E; /* '.' (full stop) */
|
|
|
|
memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
|
|
|
|
outlen += buflen;
|
|
|
|
out[outlen] = 0x0;
|
|
|
|
free (buf);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
out = buf;
|
|
|
|
outlen = buflen;
|
|
|
|
out[outlen] = 0x0;
|
|
|
|
}
|
|
|
|
|
|
|
|
start = end + 1;
|
|
|
|
}
|
|
|
|
while (*end);
|
|
|
|
|
|
|
|
*output = out;
|
|
|
|
|
|
|
|
return IDNA_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_unicode_8z4z:
|
|
|
|
* @input: zero-terminated UTF-8 string.
|
|
|
|
* @output: pointer to newly allocated output Unicode string.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* Convert possibly ACE encoded domain name in UTF-8 format into a
|
|
|
|
* UCS-4 string. The domain name may contain several labels,
|
|
|
|
* separated by dots. The output buffer must be deallocated by the
|
|
|
|
* caller.
|
|
|
|
*
|
|
|
|
* Return value: Returns IDNA_SUCCESS on success, or error code.
|
|
|
|
**/
|
|
|
|
int
|
|
|
|
idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
|
|
|
|
{
|
|
|
|
uint32_t *ucs4;
|
|
|
|
size_t ucs4len;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
|
|
|
|
if (!ucs4)
|
|
|
|
return IDNA_ICONV_ERROR;
|
|
|
|
|
|
|
|
rc = idna_to_unicode_4z4z (ucs4, output, flags);
|
|
|
|
free (ucs4);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_unicode_8z8z:
|
|
|
|
* @input: zero-terminated UTF-8 string.
|
|
|
|
* @output: pointer to newly allocated output UTF-8 string.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* Convert possibly ACE encoded domain name in UTF-8 format into a
|
|
|
|
* UTF-8 string. The domain name may contain several labels,
|
|
|
|
* separated by dots. The output buffer must be deallocated by the
|
|
|
|
* caller.
|
|
|
|
*
|
|
|
|
* Return value: Returns IDNA_SUCCESS on success, or error code.
|
|
|
|
**/
|
|
|
|
int
|
|
|
|
idna_to_unicode_8z8z (const char *input, char **output, int flags)
|
|
|
|
{
|
|
|
|
uint32_t *ucs4;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
rc = idna_to_unicode_8z4z (input, &ucs4, flags);
|
|
|
|
*output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
|
|
|
|
free (ucs4);
|
|
|
|
|
|
|
|
if (!*output)
|
|
|
|
return IDNA_ICONV_ERROR;
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_unicode_8zlz:
|
|
|
|
* @input: zero-terminated UTF-8 string.
|
|
|
|
* @output: pointer to newly allocated output string encoded in the
|
|
|
|
* current locale's character set.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* Convert possibly ACE encoded domain name in UTF-8 format into a
|
|
|
|
* string encoded in the current locale's character set. The domain
|
|
|
|
* name may contain several labels, separated by dots. The output
|
|
|
|
* buffer must be deallocated by the caller.
|
|
|
|
*
|
|
|
|
* Return value: Returns IDNA_SUCCESS on success, or error code.
|
|
|
|
**/
|
|
|
|
int
|
|
|
|
idna_to_unicode_8zlz (const char *input, char **output, int flags)
|
|
|
|
{
|
|
|
|
char *utf8;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
rc = idna_to_unicode_8z8z (input, &utf8, flags);
|
|
|
|
*output = stringprep_utf8_to_locale (utf8);
|
|
|
|
free (utf8);
|
|
|
|
|
|
|
|
if (!*output)
|
|
|
|
return IDNA_ICONV_ERROR;
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* idna_to_unicode_lzlz:
|
|
|
|
* @input: zero-terminated string encoded in the current locale's
|
|
|
|
* character set.
|
|
|
|
* @output: pointer to newly allocated output string encoded in the
|
|
|
|
* current locale's character set.
|
|
|
|
* @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
|
|
|
|
*
|
|
|
|
* Convert possibly ACE encoded domain name in the locale's character
|
|
|
|
* set into a string encoded in the current locale's character set.
|
|
|
|
* The domain name may contain several labels, separated by dots. The
|
|
|
|
* output buffer must be deallocated by the caller.
|
|
|
|
*
|
|
|
|
* Return value: Returns IDNA_SUCCESS on success, or error code.
|
|
|
|
**/
|
|
|
|
int
|
|
|
|
idna_to_unicode_lzlz (const char *input, char **output, int flags)
|
|
|
|
{
|
|
|
|
char *utf8;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
utf8 = stringprep_locale_to_utf8 (input);
|
|
|
|
if (!utf8)
|
|
|
|
return IDNA_ICONV_ERROR;
|
|
|
|
|
|
|
|
rc = idna_to_unicode_8zlz (utf8, output, flags);
|
|
|
|
free (utf8);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* IDNA_ACE_PREFIX
|
|
|
|
*
|
|
|
|
* The IANA allocated prefix to use for IDNA. "xn--"
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Idna_rc:
|
|
|
|
* @IDNA_SUCCESS: Successful operation. This value is guaranteed to
|
|
|
|
* always be zero, the remaining ones are only guaranteed to hold
|
|
|
|
* non-zero values, for logical comparison purposes.
|
|
|
|
* @IDNA_STRINGPREP_ERROR: Error during string preparation.
|
|
|
|
* @IDNA_PUNYCODE_ERROR: Error during punycode operation.
|
|
|
|
* @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
|
|
|
|
* the string contains non-LDH ASCII characters.
|
|
|
|
* @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
|
|
|
|
* the string contains a leading or trailing hyphen-minus (U+002D).
|
|
|
|
* @IDNA_INVALID_LENGTH: The final output string is not within the
|
|
|
|
* (inclusive) range 1 to 63 characters.
|
|
|
|
* @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
|
|
|
|
* (for ToUnicode).
|
|
|
|
* @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
|
|
|
|
* string does not equal the input.
|
|
|
|
* @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
|
|
|
|
* ToASCII).
|
|
|
|
* @IDNA_ICONV_ERROR: Could not convert string in locale encoding.
|
|
|
|
* @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
|
|
|
|
* fatal error).
|
2004-05-04 21:25:38 +02:00
|
|
|
* @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
|
|
|
|
* internally in libc).
|
2004-03-08 04:55:39 +01:00
|
|
|
*
|
|
|
|
* Enumerated return codes of idna_to_ascii_4i(),
|
|
|
|
* idna_to_unicode_44i() functions (and functions derived from those
|
|
|
|
* functions). The value 0 is guaranteed to always correspond to
|
|
|
|
* success.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Idna_flags:
|
|
|
|
* @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
|
|
|
|
* Unicode code points.
|
|
|
|
* @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
|
|
|
|
* rules (i.e., normal host name rules).
|
|
|
|
*
|
|
|
|
* Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
|
|
|
|
*/
|