Use unicode.Mark for combining unicode range

This commit is contained in:
Zachary Yedidia 2020-05-20 18:01:10 -04:00
parent ff0683d6d0
commit 5c8a2332d9
2 changed files with 8 additions and 30 deletions

View file

@ -16,17 +16,6 @@ import (
// For rendering, micro will display the combining characters. It's not perfect // For rendering, micro will display the combining characters. It's not perfect
// but it's pretty good. // but it's pretty good.
// combining character range table
var combining = &unicode.RangeTable{
R16: []unicode.Range16{
{0x0300, 0x036f, 1}, // combining diacritical marks
{0x1ab0, 0x1aff, 1}, // combining diacritical marks extended
{0x1dc0, 0x1dff, 1}, // combining diacritical marks supplement
{0x20d0, 0x20ff, 1}, // combining diacritical marks for symbols
{0xfe20, 0xfe2f, 1}, // combining half marks
},
}
// DecodeCharacter returns the next character from an array of bytes // DecodeCharacter returns the next character from an array of bytes
// A character is a rune along with any accompanying combining runes // A character is a rune along with any accompanying combining runes
func DecodeCharacter(b []byte) (rune, []rune, int) { func DecodeCharacter(b []byte) (rune, []rune, int) {
@ -35,7 +24,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
c, s := utf8.DecodeRune(b) c, s := utf8.DecodeRune(b)
var combc []rune var combc []rune
for unicode.In(c, combining) { for unicode.In(c, unicode.Mark) {
combc = append(combc, c) combc = append(combc, c)
size += s size += s
@ -54,7 +43,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
c, s := utf8.DecodeRuneInString(str) c, s := utf8.DecodeRuneInString(str)
var combc []rune var combc []rune
for unicode.In(c, combining) { for unicode.In(c, unicode.Mark) {
combc = append(combc, c) combc = append(combc, c)
size += s size += s
@ -72,7 +61,7 @@ func CharacterCount(b []byte) int {
for len(b) > 0 { for len(b) > 0 {
r, size := utf8.DecodeRune(b) r, size := utf8.DecodeRune(b)
if !unicode.In(r, combining) { if !unicode.In(r, unicode.Mark) {
s++ s++
} }
@ -88,7 +77,7 @@ func CharacterCountInString(str string) int {
s := 0 s := 0
for _, r := range str { for _, r := range str {
if !unicode.In(r, combining) { if !unicode.In(r, unicode.Mark) {
s++ s++
} }
} }

View file

@ -5,17 +5,6 @@ import (
"unicode/utf8" "unicode/utf8"
) )
// combining character range table
var combining = &unicode.RangeTable{
R16: []unicode.Range16{
{0x0300, 0x036f, 1}, // combining diacritical marks
{0x1ab0, 0x1aff, 1}, // combining diacritical marks extended
{0x1dc0, 0x1dff, 1}, // combining diacritical marks supplement
{0x20d0, 0x20ff, 1}, // combining diacritical marks for symbols
{0xfe20, 0xfe2f, 1}, // combining half marks
},
}
// DecodeCharacter returns the next character from an array of bytes // DecodeCharacter returns the next character from an array of bytes
// A character is a rune along with any accompanying combining runes // A character is a rune along with any accompanying combining runes
func DecodeCharacter(b []byte) (rune, []rune, int) { func DecodeCharacter(b []byte) (rune, []rune, int) {
@ -24,7 +13,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
c, s := utf8.DecodeRune(b) c, s := utf8.DecodeRune(b)
var combc []rune var combc []rune
for unicode.In(c, combining) { for unicode.In(c, unicode.Mark) {
combc = append(combc, c) combc = append(combc, c)
size += s size += s
@ -43,7 +32,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
c, s := utf8.DecodeRuneInString(str) c, s := utf8.DecodeRuneInString(str)
var combc []rune var combc []rune
for unicode.In(c, combining) { for unicode.In(c, unicode.Mark) {
combc = append(combc, c) combc = append(combc, c)
size += s size += s
@ -61,7 +50,7 @@ func CharacterCount(b []byte) int {
for len(b) > 0 { for len(b) > 0 {
r, size := utf8.DecodeRune(b) r, size := utf8.DecodeRune(b)
if !unicode.In(r, combining) { if !unicode.In(r, unicode.Mark) {
s++ s++
} }
@ -77,7 +66,7 @@ func CharacterCountInString(str string) int {
s := 0 s := 0
for _, r := range str { for _, r := range str {
if !unicode.In(r, combining) { if !unicode.In(r, unicode.Mark) {
s++ s++
} }
} }