Make determining whether a code point represents a combining mark faster (#1719)
This commit is contained in:
parent
efb38b8636
commit
5ce26cca71
3 changed files with 29 additions and 9 deletions
2
Makefile
2
Makefile
|
@ -79,7 +79,7 @@ bench-compare:
|
|||
for i in 1 2 3; do \
|
||||
go test -bench=. ./internal/...; \
|
||||
done > benchmark_results
|
||||
benchstat benchmark_results_baseline benchmark_results
|
||||
benchstat -alpha 0.15 benchmark_results_baseline benchmark_results
|
||||
|
||||
clean:
|
||||
rm -f micro
|
||||
|
|
|
@ -16,6 +16,16 @@ import (
|
|||
// For rendering, micro will display the combining characters. It's not perfect
|
||||
// but it's pretty good.
|
||||
|
||||
var minMark = rune(unicode.Mark.R16[0].Lo)
|
||||
|
||||
func isMark(r rune) bool {
|
||||
// Fast path
|
||||
if r < minMark {
|
||||
return false
|
||||
}
|
||||
return unicode.In(r, unicode.Mark)
|
||||
}
|
||||
|
||||
// DecodeCharacter returns the next character from an array of bytes
|
||||
// A character is a rune along with any accompanying combining runes
|
||||
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||
|
@ -24,7 +34,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
|
|||
c, s := utf8.DecodeRune(b)
|
||||
|
||||
var combc []rune
|
||||
for unicode.In(c, unicode.Mark) {
|
||||
for isMark(c) {
|
||||
combc = append(combc, c)
|
||||
size += s
|
||||
|
||||
|
@ -43,7 +53,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
|
|||
c, s := utf8.DecodeRuneInString(str)
|
||||
|
||||
var combc []rune
|
||||
for unicode.In(c, unicode.Mark) {
|
||||
for isMark(c) {
|
||||
combc = append(combc, c)
|
||||
size += s
|
||||
|
||||
|
@ -61,7 +71,7 @@ func CharacterCount(b []byte) int {
|
|||
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
if !unicode.In(r, unicode.Mark) {
|
||||
if !isMark(r) {
|
||||
s++
|
||||
}
|
||||
|
||||
|
@ -77,7 +87,7 @@ func CharacterCountInString(str string) int {
|
|||
s := 0
|
||||
|
||||
for _, r := range str {
|
||||
if !unicode.In(r, unicode.Mark) {
|
||||
if !isMark(r) {
|
||||
s++
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,16 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var minMark = rune(unicode.Mark.R16[0].Lo)
|
||||
|
||||
func isMark(r rune) bool {
|
||||
// Fast path
|
||||
if r < minMark {
|
||||
return false
|
||||
}
|
||||
return unicode.In(r, unicode.Mark)
|
||||
}
|
||||
|
||||
// DecodeCharacter returns the next character from an array of bytes
|
||||
// A character is a rune along with any accompanying combining runes
|
||||
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||
|
@ -13,7 +23,7 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
|
|||
c, s := utf8.DecodeRune(b)
|
||||
|
||||
var combc []rune
|
||||
for unicode.In(c, unicode.Mark) {
|
||||
for isMark(c) {
|
||||
combc = append(combc, c)
|
||||
size += s
|
||||
|
||||
|
@ -32,7 +42,7 @@ func DecodeCharacterInString(str string) (rune, []rune, int) {
|
|||
c, s := utf8.DecodeRuneInString(str)
|
||||
|
||||
var combc []rune
|
||||
for unicode.In(c, unicode.Mark) {
|
||||
for isMark(c) {
|
||||
combc = append(combc, c)
|
||||
size += s
|
||||
|
||||
|
@ -50,7 +60,7 @@ func CharacterCount(b []byte) int {
|
|||
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
if !unicode.In(r, unicode.Mark) {
|
||||
if !isMark(r) {
|
||||
s++
|
||||
}
|
||||
|
||||
|
@ -66,7 +76,7 @@ func CharacterCountInString(str string) int {
|
|||
s := 0
|
||||
|
||||
for _, r := range str {
|
||||
if !unicode.In(r, unicode.Mark) {
|
||||
if !isMark(r) {
|
||||
s++
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue