Use unicode.Mark for combining unicode range
This commit is contained in:
parent
ff0683d6d0
commit
5c8a2332d9
2 changed files with 8 additions and 30 deletions
|
@ -16,17 +16,6 @@ import (
|
||||||
// For rendering, micro will display the combining characters. It's not perfect
|
// For rendering, micro will display the combining characters. It's not perfect
|
||||||
// but it's pretty good.
|
// but it's pretty good.
|
||||||
|
|
||||||
// combining character range table
|
|
||||||
var combining = &unicode.RangeTable{
|
|
||||||
R16: []unicode.Range16{
|
|
||||||
{0x0300, 0x036f, 1}, // combining diacritical marks
|
|
||||||
{0x1ab0, 0x1aff, 1}, // combining diacritical marks extended
|
|
||||||
{0x1dc0, 0x1dff, 1}, // combining diacritical marks supplement
|
|
||||||
{0x20d0, 0x20ff, 1}, // combining diacritical marks for symbols
|
|
||||||
{0xfe20, 0xfe2f, 1}, // combining half marks
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// DecodeCharacter returns the next character from an array of bytes
|
// DecodeCharacter returns the next character from an array of bytes
|
||||||
// A character is a rune along with any accompanying combining runes
|
// A character is a rune along with any accompanying combining runes
|
||||||
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||||
|
@ -35,7 +24,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||||
c, s := utf8.DecodeRune(b)
|
c, s := utf8.DecodeRune(b)
|
||||||
|
|
||||||
var combc []rune
|
var combc []rune
|
||||||
for unicode.In(c, combining) {
|
for unicode.In(c, unicode.Mark) {
|
||||||
combc = append(combc, c)
|
combc = append(combc, c)
|
||||||
size += s
|
size += s
|
||||||
|
|
||||||
|
@ -54,7 +43,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
|
||||||
c, s := utf8.DecodeRuneInString(str)
|
c, s := utf8.DecodeRuneInString(str)
|
||||||
|
|
||||||
var combc []rune
|
var combc []rune
|
||||||
for unicode.In(c, combining) {
|
for unicode.In(c, unicode.Mark) {
|
||||||
combc = append(combc, c)
|
combc = append(combc, c)
|
||||||
size += s
|
size += s
|
||||||
|
|
||||||
|
@ -72,7 +61,7 @@ func CharacterCount(b []byte) int {
|
||||||
|
|
||||||
for len(b) > 0 {
|
for len(b) > 0 {
|
||||||
r, size := utf8.DecodeRune(b)
|
r, size := utf8.DecodeRune(b)
|
||||||
if !unicode.In(r, combining) {
|
if !unicode.In(r, unicode.Mark) {
|
||||||
s++
|
s++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +77,7 @@ func CharacterCountInString(str string) int {
|
||||||
s := 0
|
s := 0
|
||||||
|
|
||||||
for _, r := range str {
|
for _, r := range str {
|
||||||
if !unicode.In(r, combining) {
|
if !unicode.In(r, unicode.Mark) {
|
||||||
s++
|
s++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,17 +5,6 @@ import (
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// combining character range table
|
|
||||||
var combining = &unicode.RangeTable{
|
|
||||||
R16: []unicode.Range16{
|
|
||||||
{0x0300, 0x036f, 1}, // combining diacritical marks
|
|
||||||
{0x1ab0, 0x1aff, 1}, // combining diacritical marks extended
|
|
||||||
{0x1dc0, 0x1dff, 1}, // combining diacritical marks supplement
|
|
||||||
{0x20d0, 0x20ff, 1}, // combining diacritical marks for symbols
|
|
||||||
{0xfe20, 0xfe2f, 1}, // combining half marks
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// DecodeCharacter returns the next character from an array of bytes
|
// DecodeCharacter returns the next character from an array of bytes
|
||||||
// A character is a rune along with any accompanying combining runes
|
// A character is a rune along with any accompanying combining runes
|
||||||
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||||
|
@ -24,7 +13,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||||
c, s := utf8.DecodeRune(b)
|
c, s := utf8.DecodeRune(b)
|
||||||
|
|
||||||
var combc []rune
|
var combc []rune
|
||||||
for unicode.In(c, combining) {
|
for unicode.In(c, unicode.Mark) {
|
||||||
combc = append(combc, c)
|
combc = append(combc, c)
|
||||||
size += s
|
size += s
|
||||||
|
|
||||||
|
@ -43,7 +32,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
|
||||||
c, s := utf8.DecodeRuneInString(str)
|
c, s := utf8.DecodeRuneInString(str)
|
||||||
|
|
||||||
var combc []rune
|
var combc []rune
|
||||||
for unicode.In(c, combining) {
|
for unicode.In(c, unicode.Mark) {
|
||||||
combc = append(combc, c)
|
combc = append(combc, c)
|
||||||
size += s
|
size += s
|
||||||
|
|
||||||
|
@ -61,7 +50,7 @@ func CharacterCount(b []byte) int {
|
||||||
|
|
||||||
for len(b) > 0 {
|
for len(b) > 0 {
|
||||||
r, size := utf8.DecodeRune(b)
|
r, size := utf8.DecodeRune(b)
|
||||||
if !unicode.In(r, combining) {
|
if !unicode.In(r, unicode.Mark) {
|
||||||
s++
|
s++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,7 +66,7 @@ func CharacterCountInString(str string) int {
|
||||||
s := 0
|
s := 0
|
||||||
|
|
||||||
for _, r := range str {
|
for _, r := range str {
|
||||||
if !unicode.In(r, combining) {
|
if !unicode.In(r, unicode.Mark) {
|
||||||
s++
|
s++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue