2010-12-03 05:34:57 +01:00
|
|
|
/* go-rune.c -- rune functions for Go.
|
|
|
|
|
|
|
|
Copyright 2009, 2010 The Go Authors. All rights reserved.
|
|
|
|
Use of this source code is governed by a BSD-style
|
|
|
|
license that can be found in the LICENSE file. */
|
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
2012-11-01 04:02:13 +01:00
|
|
|
#include "runtime.h"
|
2010-12-03 05:34:57 +01:00
|
|
|
#include "go-string.h"
|
|
|
|
|
|
|
|
/* Get a character from the UTF-8 string STR, of length LEN. Store
|
|
|
|
the Unicode character, if any, in *RUNE. Return the number of
|
|
|
|
characters used from STR. */
|
|
|
|
|
|
|
|
int
|
2012-11-06 19:28:21 +01:00
|
|
|
__go_get_rune (const unsigned char *str, size_t len, int32 *rune)
|
2010-12-03 05:34:57 +01:00
|
|
|
{
|
2012-10-23 06:31:11 +02:00
|
|
|
int c, c1, c2, c3, l;
|
2010-12-03 05:34:57 +01:00
|
|
|
|
|
|
|
/* Default to the "replacement character". */
|
|
|
|
*rune = 0xfffd;
|
|
|
|
|
|
|
|
if (len <= 0)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
c = *str;
|
|
|
|
if (c <= 0x7f)
|
|
|
|
{
|
|
|
|
*rune = c;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len <= 1)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
c1 = str[1];
|
|
|
|
if ((c & 0xe0) == 0xc0
|
|
|
|
&& (c1 & 0xc0) == 0x80)
|
|
|
|
{
|
2012-10-23 06:31:11 +02:00
|
|
|
l = (((c & 0x1f) << 6) + (c1 & 0x3f));
|
|
|
|
if (l <= 0x7f)
|
|
|
|
return 1;
|
|
|
|
*rune = l;
|
2010-12-03 05:34:57 +01:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len <= 2)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
c2 = str[2];
|
|
|
|
if ((c & 0xf0) == 0xe0
|
|
|
|
&& (c1 & 0xc0) == 0x80
|
|
|
|
&& (c2 & 0xc0) == 0x80)
|
|
|
|
{
|
2012-10-23 06:31:11 +02:00
|
|
|
l = (((c & 0xf) << 12)
|
|
|
|
+ ((c1 & 0x3f) << 6)
|
|
|
|
+ (c2 & 0x3f));
|
2012-09-22 09:18:45 +02:00
|
|
|
|
2012-10-23 06:31:11 +02:00
|
|
|
if (l <= 0x7ff)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (l >= 0xd800 && l < 0xe000)
|
2012-09-22 09:18:45 +02:00
|
|
|
{
|
|
|
|
/* Invalid surrogate half; return replace character. */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2012-10-23 06:31:11 +02:00
|
|
|
*rune = l;
|
|
|
|
|
2010-12-03 05:34:57 +01:00
|
|
|
return 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len <= 3)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
c3 = str[3];
|
|
|
|
if ((c & 0xf8) == 0xf0
|
|
|
|
&& (c1 & 0xc0) == 0x80
|
|
|
|
&& (c2 & 0xc0) == 0x80
|
|
|
|
&& (c3 & 0xc0) == 0x80)
|
|
|
|
{
|
2012-10-23 06:31:11 +02:00
|
|
|
l = (((c & 0x7) << 18)
|
|
|
|
+ ((c1 & 0x3f) << 12)
|
|
|
|
+ ((c2 & 0x3f) << 6)
|
|
|
|
+ (c3 & 0x3f));
|
|
|
|
|
|
|
|
if (l <= 0xffff || l > 0x10ffff)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
*rune = l;
|
2010-12-03 05:34:57 +01:00
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Invalid encoding. Return 1 so that we advance. */
|
|
|
|
return 1;
|
|
|
|
}
|