micro/pkg/highlight/parser.go

457 lines
10 KiB
Go
Raw Normal View History

package highlight
import (
"bytes"
"errors"
"fmt"
"regexp"
"gopkg.in/yaml.v2"
)
2017-03-28 02:35:28 +03:00
// A Group represents a syntax group
type Group uint8
2017-03-20 22:14:04 +03:00
2017-03-28 02:35:28 +03:00
// Groups contains all of the groups that are defined
// You can access them in the map via their string name
var Groups map[string]Group
var numGroups Group
// String returns the group name attached to the specific group
func (g Group) String() string {
for k, v := range Groups {
2017-03-28 02:35:28 +03:00
if v == g {
2017-03-20 22:14:04 +03:00
return k
}
}
return ""
}
// A Def is a full syntax definition for a language
// It has a filetype, information about how to detect the filetype based
// on filename or header (the first line of the file)
// Then it has the rules which define how to highlight the file
type Def struct {
*Header
rules *rules
}
type Header struct {
FileType string
FileNameRegex *regexp.Regexp
HeaderRegex *regexp.Regexp
SignatureRegex *regexp.Regexp
}
type HeaderYaml struct {
FileType string `yaml:"filetype"`
Detect struct {
FNameRegexStr string `yaml:"filename"`
HeaderRegexStr string `yaml:"header"`
SignatureRegexStr string `yaml:"signature"`
} `yaml:"detect"`
}
type File struct {
FileType string
yamlSrc map[interface{}]interface{}
}
// A Pattern is one simple syntax rule
// It has a group that the rule belongs to, as well as
// the regular expression to match the pattern
2017-03-28 02:35:28 +03:00
type pattern struct {
group Group
regex *regexp.Regexp
}
2017-03-28 02:35:28 +03:00
// rules defines which patterns and regions can be used to highlight
// a filetype
2017-03-28 02:35:28 +03:00
type rules struct {
regions []*region
patterns []*pattern
includes []string
}
2017-03-28 02:35:28 +03:00
// A region is a highlighted region (such as a multiline comment, or a string)
// It belongs to a group, and has start and end regular expressions
2017-03-28 02:35:28 +03:00
// A region also has rules of its own that only apply when matching inside the
// region and also rules from the above region do not match inside this region
// Note that a region may contain more regions
2017-03-28 02:35:28 +03:00
type region struct {
group Group
limitGroup Group
parent *region
start *regexp.Regexp
end *regexp.Regexp
skip *regexp.Regexp
rules *rules
}
2017-03-20 22:14:04 +03:00
func init() {
2017-03-28 02:35:28 +03:00
Groups = make(map[string]Group)
2017-03-20 22:14:04 +03:00
}
// MakeHeader takes a header (.hdr file) file and parses the header
// Header files make parsing more efficient when you only want to compute
// on the headers of syntax files
// A yaml file might take ~400us to parse while a header file only takes ~20us
func MakeHeader(data []byte) (*Header, error) {
lines := bytes.Split(data, []byte{'\n'})
if len(lines) < 4 {
return nil, errors.New("Header file has incorrect format")
}
header := new(Header)
var err error
header.FileType = string(lines[0])
fnameRegexStr := string(lines[1])
headerRegexStr := string(lines[2])
signatureRegexStr := string(lines[3])
if fnameRegexStr != "" {
header.FileNameRegex, err = regexp.Compile(fnameRegexStr)
}
if err == nil && headerRegexStr != "" {
header.HeaderRegex, err = regexp.Compile(headerRegexStr)
}
if err == nil && signatureRegexStr != "" {
header.SignatureRegex, err = regexp.Compile(signatureRegexStr)
}
if err != nil {
return nil, err
}
return header, nil
}
// MakeHeaderYaml takes a yaml spec for a syntax file and parses the
// header
func MakeHeaderYaml(data []byte) (*Header, error) {
var hdrYaml HeaderYaml
err := yaml.Unmarshal(data, &hdrYaml)
if err != nil {
return nil, err
}
header := new(Header)
header.FileType = hdrYaml.FileType
if hdrYaml.Detect.FNameRegexStr != "" {
header.FileNameRegex, err = regexp.Compile(hdrYaml.Detect.FNameRegexStr)
}
if err == nil && hdrYaml.Detect.HeaderRegexStr != "" {
header.HeaderRegex, err = regexp.Compile(hdrYaml.Detect.HeaderRegexStr)
}
if err == nil && hdrYaml.Detect.SignatureRegexStr != "" {
header.SignatureRegex, err = regexp.Compile(hdrYaml.Detect.SignatureRegexStr)
}
if err != nil {
return nil, err
}
return header, nil
}
// MatchFileName will check the given file name with the stored regex
func (header *Header) MatchFileName(filename string) bool {
if header.FileNameRegex != nil {
return header.FileNameRegex.MatchString(filename)
}
return false
}
func (header *Header) MatchFileHeader(firstLine []byte) bool {
if header.HeaderRegex != nil {
return header.HeaderRegex.Match(firstLine)
}
return false
}
// HasFileSignature checks the presence of a stored signature
func (header *Header) HasFileSignature() bool {
return header.SignatureRegex != nil
}
// MatchFileSignature will check the given line with the stored regex
func (header *Header) MatchFileSignature(line []byte) bool {
if header.SignatureRegex != nil {
return header.SignatureRegex.Match(line)
}
return false
}
func ParseFile(input []byte) (f *File, err error) {
// This is just so if we have an error, we can exit cleanly and return the parse error to the user
defer func() {
2017-06-21 21:26:05 +03:00
if r := recover(); r != nil {
var ok bool
err, ok = r.(error)
if !ok {
err = fmt.Errorf("pkg: %v", r)
}
}
}()
var rules map[interface{}]interface{}
if err = yaml.Unmarshal(input, &rules); err != nil {
return nil, err
}
f = new(File)
f.yamlSrc = rules
for k, v := range rules {
if k == "filetype" {
filetype := v.(string)
if filetype == "" {
return nil, errors.New("empty filetype")
}
f.FileType = filetype
break
}
}
if f.FileType == "" {
return nil, errors.New("missing filetype")
}
return f, err
}
// ParseDef parses an input syntax file into a highlight Def
func ParseDef(f *File, header *Header) (s *Def, err error) {
// This is just so if we have an error, we can exit cleanly and return the parse error to the user
defer func() {
2017-06-21 21:26:05 +03:00
if r := recover(); r != nil {
var ok bool
err, ok = r.(error)
if !ok {
err = fmt.Errorf("pkg: %v", r)
}
}
}()
Fix crash when syntax file has no `rules` (#3213) If a syntax file aaa.yaml contains no `rules` directive, then after `set filetype aaa` micro crashes with d.rules nil pointer dereference in HasIncludes(): Micro encountered an error: runtime.errorString runtime error: invalid memory address or nil pointer dereference runtime/panic.go:221 (0x44c527) runtime/panic.go:220 (0x44c4f7) github.com/zyedidia/micro/v2/pkg/highlight/parser.go:239 (0x820919) github.com/zyedidia/micro/v2/internal/buffer/buffer.go:830 (0x82b818) github.com/zyedidia/micro/v2/internal/buffer/settings.go:33 (0x83b665) github.com/zyedidia/micro/v2/internal/action/command.go:578 (0x87d75f) github.com/zyedidia/micro/v2/internal/action/command.go:598 (0x87da79) github.com/zyedidia/micro/v2/internal/action/command.go:634 (0x87de54) github.com/zyedidia/micro/v2/internal/action/command.go:1030 (0x880f68) github.com/zyedidia/micro/v2/internal/action/actions.go:1545 (0x870d72) github.com/zyedidia/micro/v2/internal/info/infobuffer.go:152 (0x8421b4) github.com/zyedidia/micro/v2/internal/action/infopane.go:208 (0x8854cc) github.com/zyedidia/micro/v2/internal/action/infopane.go:54 (0x8844d6) github.com/zyedidia/micro/v2/internal/action/infopane.go:131 (0x884d42) github.com/zyedidia/micro/v2/internal/action/infopane.go:95 (0x8849ff) github.com/zyedidia/micro/v2/cmd/micro/micro.go:481 (0x8bfb86) github.com/zyedidia/micro/v2/cmd/micro/micro.go:397 (0x8bf63e) runtime/proc.go:255 (0x438867) runtime/asm_amd64.s:1581 (0x467a81)
2024-03-25 21:35:57 +03:00
src := f.yamlSrc
s = new(Def)
s.Header = header
Fix crash when syntax file has no `rules` (#3213) If a syntax file aaa.yaml contains no `rules` directive, then after `set filetype aaa` micro crashes with d.rules nil pointer dereference in HasIncludes(): Micro encountered an error: runtime.errorString runtime error: invalid memory address or nil pointer dereference runtime/panic.go:221 (0x44c527) runtime/panic.go:220 (0x44c4f7) github.com/zyedidia/micro/v2/pkg/highlight/parser.go:239 (0x820919) github.com/zyedidia/micro/v2/internal/buffer/buffer.go:830 (0x82b818) github.com/zyedidia/micro/v2/internal/buffer/settings.go:33 (0x83b665) github.com/zyedidia/micro/v2/internal/action/command.go:578 (0x87d75f) github.com/zyedidia/micro/v2/internal/action/command.go:598 (0x87da79) github.com/zyedidia/micro/v2/internal/action/command.go:634 (0x87de54) github.com/zyedidia/micro/v2/internal/action/command.go:1030 (0x880f68) github.com/zyedidia/micro/v2/internal/action/actions.go:1545 (0x870d72) github.com/zyedidia/micro/v2/internal/info/infobuffer.go:152 (0x8421b4) github.com/zyedidia/micro/v2/internal/action/infopane.go:208 (0x8854cc) github.com/zyedidia/micro/v2/internal/action/infopane.go:54 (0x8844d6) github.com/zyedidia/micro/v2/internal/action/infopane.go:131 (0x884d42) github.com/zyedidia/micro/v2/internal/action/infopane.go:95 (0x8849ff) github.com/zyedidia/micro/v2/cmd/micro/micro.go:481 (0x8bfb86) github.com/zyedidia/micro/v2/cmd/micro/micro.go:397 (0x8bf63e) runtime/proc.go:255 (0x438867) runtime/asm_amd64.s:1581 (0x467a81)
2024-03-25 21:35:57 +03:00
for k, v := range src {
if k == "rules" {
inputRules := v.([]interface{})
rules, err := parseRules(inputRules, nil)
if err != nil {
return nil, err
}
s.rules = rules
}
}
Fix crash when syntax file has no `rules` (#3213) If a syntax file aaa.yaml contains no `rules` directive, then after `set filetype aaa` micro crashes with d.rules nil pointer dereference in HasIncludes(): Micro encountered an error: runtime.errorString runtime error: invalid memory address or nil pointer dereference runtime/panic.go:221 (0x44c527) runtime/panic.go:220 (0x44c4f7) github.com/zyedidia/micro/v2/pkg/highlight/parser.go:239 (0x820919) github.com/zyedidia/micro/v2/internal/buffer/buffer.go:830 (0x82b818) github.com/zyedidia/micro/v2/internal/buffer/settings.go:33 (0x83b665) github.com/zyedidia/micro/v2/internal/action/command.go:578 (0x87d75f) github.com/zyedidia/micro/v2/internal/action/command.go:598 (0x87da79) github.com/zyedidia/micro/v2/internal/action/command.go:634 (0x87de54) github.com/zyedidia/micro/v2/internal/action/command.go:1030 (0x880f68) github.com/zyedidia/micro/v2/internal/action/actions.go:1545 (0x870d72) github.com/zyedidia/micro/v2/internal/info/infobuffer.go:152 (0x8421b4) github.com/zyedidia/micro/v2/internal/action/infopane.go:208 (0x8854cc) github.com/zyedidia/micro/v2/internal/action/infopane.go:54 (0x8844d6) github.com/zyedidia/micro/v2/internal/action/infopane.go:131 (0x884d42) github.com/zyedidia/micro/v2/internal/action/infopane.go:95 (0x8849ff) github.com/zyedidia/micro/v2/cmd/micro/micro.go:481 (0x8bfb86) github.com/zyedidia/micro/v2/cmd/micro/micro.go:397 (0x8bf63e) runtime/proc.go:255 (0x438867) runtime/asm_amd64.s:1581 (0x467a81)
2024-03-25 21:35:57 +03:00
if s.rules == nil {
// allow empty rules
s.rules = new(rules)
}
return s, err
}
2019-12-29 05:57:03 +03:00
// HasIncludes returns whether this syntax def has any include statements
func HasIncludes(d *Def) bool {
hasIncludes := len(d.rules.includes) > 0
for _, r := range d.rules.regions {
hasIncludes = hasIncludes || hasIncludesInRegion(r)
}
return hasIncludes
}
func hasIncludesInRegion(region *region) bool {
hasIncludes := len(region.rules.includes) > 0
for _, r := range region.rules.regions {
hasIncludes = hasIncludes || hasIncludesInRegion(r)
}
return hasIncludes
}
// GetIncludes returns a list of filetypes that are included by this syntax def
func GetIncludes(d *Def) []string {
includes := d.rules.includes
for _, r := range d.rules.regions {
includes = append(includes, getIncludesInRegion(r)...)
}
return includes
}
func getIncludesInRegion(region *region) []string {
includes := region.rules.includes
for _, r := range region.rules.regions {
includes = append(includes, getIncludesInRegion(r)...)
}
return includes
}
2017-03-28 02:35:28 +03:00
// ResolveIncludes will sort out the rules for including other filetypes
// You should call this after parsing all the Defs
func ResolveIncludes(def *Def, files []*File) {
resolveIncludesInDef(files, def)
}
func resolveIncludesInDef(files []*File, d *Def) {
for _, lang := range d.rules.includes {
for _, searchFile := range files {
if lang == searchFile.FileType {
searchDef, _ := ParseDef(searchFile, nil)
d.rules.patterns = append(d.rules.patterns, searchDef.rules.patterns...)
d.rules.regions = append(d.rules.regions, searchDef.rules.regions...)
}
}
}
for _, r := range d.rules.regions {
resolveIncludesInRegion(files, r)
r.parent = nil
}
}
func resolveIncludesInRegion(files []*File, region *region) {
for _, lang := range region.rules.includes {
for _, searchFile := range files {
if lang == searchFile.FileType {
searchDef, _ := ParseDef(searchFile, nil)
region.rules.patterns = append(region.rules.patterns, searchDef.rules.patterns...)
region.rules.regions = append(region.rules.regions, searchDef.rules.regions...)
}
}
}
for _, r := range region.rules.regions {
resolveIncludesInRegion(files, r)
r.parent = region
}
}
2017-06-21 21:37:30 +03:00
func parseRules(input []interface{}, curRegion *region) (ru *rules, err error) {
2017-06-21 21:26:05 +03:00
defer func() {
if r := recover(); r != nil {
var ok bool
err, ok = r.(error)
if !ok {
err = fmt.Errorf("pkg: %v", r)
}
}
}()
2017-06-21 21:37:30 +03:00
ru = new(rules)
for _, v := range input {
rule := v.(map[interface{}]interface{})
for k, val := range rule {
group := k
switch object := val.(type) {
case string:
if k == "include" {
2017-06-21 21:37:30 +03:00
ru.includes = append(ru.includes, object)
} else {
// Pattern
r, err := regexp.Compile(object)
if err != nil {
return nil, err
}
2017-03-20 22:14:04 +03:00
groupStr := group.(string)
if _, ok := Groups[groupStr]; !ok {
2017-03-20 22:14:04 +03:00
numGroups++
Groups[groupStr] = numGroups
2017-03-20 22:14:04 +03:00
}
groupNum := Groups[groupStr]
2017-06-21 21:37:30 +03:00
ru.patterns = append(ru.patterns, &pattern{groupNum, r})
}
case map[interface{}]interface{}:
2017-03-28 02:35:28 +03:00
// region
region, err := parseRegion(group.(string), object, curRegion)
if err != nil {
return nil, err
}
2017-06-21 21:37:30 +03:00
ru.regions = append(ru.regions, region)
default:
return nil, fmt.Errorf("Bad type %T", object)
}
}
}
2017-06-21 21:37:30 +03:00
return ru, nil
}
2017-06-21 21:37:30 +03:00
func parseRegion(group string, regionInfo map[interface{}]interface{}, prevRegion *region) (r *region, err error) {
2017-06-21 21:26:05 +03:00
defer func() {
if r := recover(); r != nil {
var ok bool
err, ok = r.(error)
if !ok {
err = fmt.Errorf("pkg: %v", r)
}
}
}()
2017-06-21 21:37:30 +03:00
r = new(region)
if _, ok := Groups[group]; !ok {
2017-03-20 22:14:04 +03:00
numGroups++
Groups[group] = numGroups
2017-03-20 22:14:04 +03:00
}
groupNum := Groups[group]
2017-06-21 21:37:30 +03:00
r.group = groupNum
r.parent = prevRegion
2017-06-21 21:37:30 +03:00
r.start, err = regexp.Compile(regionInfo["start"].(string))
if err != nil {
return nil, err
}
2017-06-21 21:37:30 +03:00
r.end, err = regexp.Compile(regionInfo["end"].(string))
if err != nil {
return nil, err
}
// skip is optional
if _, ok := regionInfo["skip"]; ok {
2017-06-21 21:37:30 +03:00
r.skip, err = regexp.Compile(regionInfo["skip"].(string))
if err != nil {
return nil, err
}
}
// limit-color is optional
if _, ok := regionInfo["limit-group"]; ok {
groupStr := regionInfo["limit-group"].(string)
if _, ok := Groups[groupStr]; !ok {
numGroups++
Groups[groupStr] = numGroups
}
groupNum := Groups[groupStr]
2017-06-21 21:37:30 +03:00
r.limitGroup = groupNum
if err != nil {
return nil, err
}
} else {
2017-06-21 21:37:30 +03:00
r.limitGroup = r.group
}
2017-06-21 21:37:30 +03:00
r.rules, err = parseRules(regionInfo["rules"].([]interface{}), r)
if err != nil {
return nil, err
}
2017-06-21 21:37:30 +03:00
return r, nil
}