rewrite in go

This commit is contained in:
raf 2023-10-27 10:01:55 +03:00
commit 05642cb93b
No known key found for this signature in database
GPG key ID: 02D1DD3FA08B6B29
743 changed files with 334055 additions and 55 deletions

View file

@ -0,0 +1,42 @@
package characters
var invalidAsciiTable = [256]bool{
0x00: true,
0x01: true,
0x02: true,
0x03: true,
0x04: true,
0x05: true,
0x06: true,
0x07: true,
0x08: true,
// 0x09 TAB
// 0x0A LF
0x0B: true,
0x0C: true,
// 0x0D CR
0x0E: true,
0x0F: true,
0x10: true,
0x11: true,
0x12: true,
0x13: true,
0x14: true,
0x15: true,
0x16: true,
0x17: true,
0x18: true,
0x19: true,
0x1A: true,
0x1B: true,
0x1C: true,
0x1D: true,
0x1E: true,
0x1F: true,
// 0x20 - 0x7E Printable ASCII characters
0x7F: true,
}
func InvalidAscii(b byte) bool {
return invalidAsciiTable[b]
}

View file

@ -0,0 +1,199 @@
package characters
import (
"unicode/utf8"
)
type utf8Err struct {
Index int
Size int
}
func (u utf8Err) Zero() bool {
return u.Size == 0
}
// Verified that a given string is only made of valid UTF-8 characters allowed
// by the TOML spec:
//
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters other than tab (U+0000
// to U+0008, U+000A to U+001F, U+007F).
//
// It is a copy of the Go 1.17 utf8.Valid implementation, tweaked to exit early
// when a character is not allowed.
//
// The returned utf8Err is Zero() if the string is valid, or contains the byte
// index and size of the invalid character.
//
// quotation mark => already checked
// backslash => already checked
// 0-0x8 => invalid
// 0x9 => tab, ok
// 0xA - 0x1F => invalid
// 0x7F => invalid
func Utf8TomlValidAlreadyEscaped(p []byte) (err utf8Err) {
// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
offset := 0
for len(p) >= 8 {
// Combining two 32 bit loads allows the same code to be used
// for 32 and 64 bit platforms.
// The compiler can generate a 32bit load for first32 and second32
// on many platforms. See test/codegen/memcombine.go.
first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24
if (first32|second32)&0x80808080 != 0 {
// Found a non ASCII byte (>= RuneSelf).
break
}
for i, b := range p[:8] {
if InvalidAscii(b) {
err.Index = offset + i
err.Size = 1
return
}
}
p = p[8:]
offset += 8
}
n := len(p)
for i := 0; i < n; {
pi := p[i]
if pi < utf8.RuneSelf {
if InvalidAscii(pi) {
err.Index = offset + i
err.Size = 1
return
}
i++
continue
}
x := first[pi]
if x == xx {
// Illegal starter byte.
err.Index = offset + i
err.Size = 1
return
}
size := int(x & 7)
if i+size > n {
// Short or invalid.
err.Index = offset + i
err.Size = n - i
return
}
accept := acceptRanges[x>>4]
if c := p[i+1]; c < accept.lo || accept.hi < c {
err.Index = offset + i
err.Size = 2
return
} else if size == 2 {
} else if c := p[i+2]; c < locb || hicb < c {
err.Index = offset + i
err.Size = 3
return
} else if size == 3 {
} else if c := p[i+3]; c < locb || hicb < c {
err.Index = offset + i
err.Size = 4
return
}
i += size
}
return
}
// Return the size of the next rune if valid, 0 otherwise.
func Utf8ValidNext(p []byte) int {
c := p[0]
if c < utf8.RuneSelf {
if InvalidAscii(c) {
return 0
}
return 1
}
x := first[c]
if x == xx {
// Illegal starter byte.
return 0
}
size := int(x & 7)
if size > len(p) {
// Short or invalid.
return 0
}
accept := acceptRanges[x>>4]
if c := p[1]; c < accept.lo || accept.hi < c {
return 0
} else if size == 2 {
} else if c := p[2]; c < locb || hicb < c {
return 0
} else if size == 3 {
} else if c := p[3]; c < locb || hicb < c {
return 0
}
return size
}
// acceptRange gives the range of valid values for the second byte in a UTF-8
// sequence.
type acceptRange struct {
lo uint8 // lowest value for second byte.
hi uint8 // highest value for second byte.
}
// acceptRanges has size 16 to avoid bounds checks in the code that uses it.
var acceptRanges = [16]acceptRange{
0: {locb, hicb},
1: {0xA0, hicb},
2: {locb, 0x9F},
3: {0x90, hicb},
4: {locb, 0x8F},
}
// first is information about the first byte in a UTF-8 sequence.
var first = [256]uint8{
// 1 2 3 4 5 6 7 8 9 A B C D E F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
// 1 2 3 4 5 6 7 8 9 A B C D E F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
}
const (
// The default lowest and highest continuation byte.
locb = 0b10000000
hicb = 0b10111111
// These names of these constants are chosen to give nice alignment in the
// table below. The first nibble is an index into acceptRanges or F for
// special one-byte cases. The second nibble is the Rune length or the
// Status for the special one-byte case.
xx = 0xF1 // invalid: size 1
as = 0xF0 // ASCII: size 1
s1 = 0x02 // accept 0, size 2
s2 = 0x13 // accept 1, size 3
s3 = 0x03 // accept 0, size 3
s4 = 0x23 // accept 2, size 3
s5 = 0x34 // accept 3, size 4
s6 = 0x04 // accept 0, size 4
s7 = 0x44 // accept 4, size 4
)

View file

@ -0,0 +1,65 @@
package danger
import (
"fmt"
"reflect"
"unsafe"
)
const maxInt = uintptr(int(^uint(0) >> 1))
func SubsliceOffset(data []byte, subslice []byte) int {
datap := (*reflect.SliceHeader)(unsafe.Pointer(&data))
hlp := (*reflect.SliceHeader)(unsafe.Pointer(&subslice))
if hlp.Data < datap.Data {
panic(fmt.Errorf("subslice address (%d) is before data address (%d)", hlp.Data, datap.Data))
}
offset := hlp.Data - datap.Data
if offset > maxInt {
panic(fmt.Errorf("slice offset larger than int (%d)", offset))
}
intoffset := int(offset)
if intoffset > datap.Len {
panic(fmt.Errorf("slice offset (%d) is farther than data length (%d)", intoffset, datap.Len))
}
if intoffset+hlp.Len > datap.Len {
panic(fmt.Errorf("slice ends (%d+%d) is farther than data length (%d)", intoffset, hlp.Len, datap.Len))
}
return intoffset
}
func BytesRange(start []byte, end []byte) []byte {
if start == nil || end == nil {
panic("cannot call BytesRange with nil")
}
startp := (*reflect.SliceHeader)(unsafe.Pointer(&start))
endp := (*reflect.SliceHeader)(unsafe.Pointer(&end))
if startp.Data > endp.Data {
panic(fmt.Errorf("start pointer address (%d) is after end pointer address (%d)", startp.Data, endp.Data))
}
l := startp.Len
endLen := int(endp.Data-startp.Data) + endp.Len
if endLen > l {
l = endLen
}
if l > startp.Cap {
panic(fmt.Errorf("range length is larger than capacity"))
}
return start[:l]
}
func Stride(ptr unsafe.Pointer, size uintptr, offset int) unsafe.Pointer {
// TODO: replace with unsafe.Add when Go 1.17 is released
// https://github.com/golang/go/issues/40481
return unsafe.Pointer(uintptr(ptr) + uintptr(int(size)*offset))
}

View file

@ -0,0 +1,23 @@
package danger
import (
"reflect"
"unsafe"
)
// typeID is used as key in encoder and decoder caches to enable using
// the optimize runtime.mapaccess2_fast64 function instead of the more
// expensive lookup if we were to use reflect.Type as map key.
//
// typeID holds the pointer to the reflect.Type value, which is unique
// in the program.
//
// https://github.com/segmentio/encoding/blob/master/json/codec.go#L59-L61
type TypeID unsafe.Pointer
func MakeTypeID(t reflect.Type) TypeID {
// reflect.Type has the fields:
// typ unsafe.Pointer
// ptr unsafe.Pointer
return TypeID((*[2]unsafe.Pointer)(unsafe.Pointer(&t))[1])
}

View file

@ -0,0 +1,48 @@
package tracker
import "github.com/pelletier/go-toml/v2/unstable"
// KeyTracker is a tracker that keeps track of the current Key as the AST is
// walked.
type KeyTracker struct {
k []string
}
// UpdateTable sets the state of the tracker with the AST table node.
func (t *KeyTracker) UpdateTable(node *unstable.Node) {
t.reset()
t.Push(node)
}
// UpdateArrayTable sets the state of the tracker with the AST array table node.
func (t *KeyTracker) UpdateArrayTable(node *unstable.Node) {
t.reset()
t.Push(node)
}
// Push the given key on the stack.
func (t *KeyTracker) Push(node *unstable.Node) {
it := node.Key()
for it.Next() {
t.k = append(t.k, string(it.Node().Data))
}
}
// Pop key from stack.
func (t *KeyTracker) Pop(node *unstable.Node) {
it := node.Key()
for it.Next() {
t.k = t.k[:len(t.k)-1]
}
}
// Key returns the current key
func (t *KeyTracker) Key() []string {
k := make([]string, len(t.k))
copy(k, t.k)
return k
}
func (t *KeyTracker) reset() {
t.k = t.k[:0]
}

View file

@ -0,0 +1,356 @@
package tracker
import (
"bytes"
"fmt"
"sync"
"github.com/pelletier/go-toml/v2/unstable"
)
type keyKind uint8
const (
invalidKind keyKind = iota
valueKind
tableKind
arrayTableKind
)
func (k keyKind) String() string {
switch k {
case invalidKind:
return "invalid"
case valueKind:
return "value"
case tableKind:
return "table"
case arrayTableKind:
return "array table"
}
panic("missing keyKind string mapping")
}
// SeenTracker tracks which keys have been seen with which TOML type to flag
// duplicates and mismatches according to the spec.
//
// Each node in the visited tree is represented by an entry. Each entry has an
// identifier, which is provided by a counter. Entries are stored in the array
// entries. As new nodes are discovered (referenced for the first time in the
// TOML document), entries are created and appended to the array. An entry
// points to its parent using its id.
//
// To find whether a given key (sequence of []byte) has already been visited,
// the entries are linearly searched, looking for one with the right name and
// parent id.
//
// Given that all keys appear in the document after their parent, it is
// guaranteed that all descendants of a node are stored after the node, this
// speeds up the search process.
//
// When encountering [[array tables]], the descendants of that node are removed
// to allow that branch of the tree to be "rediscovered". To maintain the
// invariant above, the deletion process needs to keep the order of entries.
// This results in more copies in that case.
type SeenTracker struct {
entries []entry
currentIdx int
}
var pool sync.Pool
func (s *SeenTracker) reset() {
// Always contains a root element at index 0.
s.currentIdx = 0
if len(s.entries) == 0 {
s.entries = make([]entry, 1, 2)
} else {
s.entries = s.entries[:1]
}
s.entries[0].child = -1
s.entries[0].next = -1
}
type entry struct {
// Use -1 to indicate no child or no sibling.
child int
next int
name []byte
kind keyKind
explicit bool
kv bool
}
// Find the index of the child of parentIdx with key k. Returns -1 if
// it does not exist.
func (s *SeenTracker) find(parentIdx int, k []byte) int {
for i := s.entries[parentIdx].child; i >= 0; i = s.entries[i].next {
if bytes.Equal(s.entries[i].name, k) {
return i
}
}
return -1
}
// Remove all descendants of node at position idx.
func (s *SeenTracker) clear(idx int) {
if idx >= len(s.entries) {
return
}
for i := s.entries[idx].child; i >= 0; {
next := s.entries[i].next
n := s.entries[0].next
s.entries[0].next = i
s.entries[i].next = n
s.entries[i].name = nil
s.clear(i)
i = next
}
s.entries[idx].child = -1
}
func (s *SeenTracker) create(parentIdx int, name []byte, kind keyKind, explicit bool, kv bool) int {
e := entry{
child: -1,
next: s.entries[parentIdx].child,
name: name,
kind: kind,
explicit: explicit,
kv: kv,
}
var idx int
if s.entries[0].next >= 0 {
idx = s.entries[0].next
s.entries[0].next = s.entries[idx].next
s.entries[idx] = e
} else {
idx = len(s.entries)
s.entries = append(s.entries, e)
}
s.entries[parentIdx].child = idx
return idx
}
func (s *SeenTracker) setExplicitFlag(parentIdx int) {
for i := s.entries[parentIdx].child; i >= 0; i = s.entries[i].next {
if s.entries[i].kv {
s.entries[i].explicit = true
s.entries[i].kv = false
}
s.setExplicitFlag(i)
}
}
// CheckExpression takes a top-level node and checks that it does not contain
// keys that have been seen in previous calls, and validates that types are
// consistent.
func (s *SeenTracker) CheckExpression(node *unstable.Node) error {
if s.entries == nil {
s.reset()
}
switch node.Kind {
case unstable.KeyValue:
return s.checkKeyValue(node)
case unstable.Table:
return s.checkTable(node)
case unstable.ArrayTable:
return s.checkArrayTable(node)
default:
panic(fmt.Errorf("this should not be a top level node type: %s", node.Kind))
}
}
func (s *SeenTracker) checkTable(node *unstable.Node) error {
if s.currentIdx >= 0 {
s.setExplicitFlag(s.currentIdx)
}
it := node.Key()
parentIdx := 0
// This code is duplicated in checkArrayTable. This is because factoring
// it in a function requires to copy the iterator, or allocate it to the
// heap, which is not cheap.
for it.Next() {
if it.IsLast() {
break
}
k := it.Node().Data
idx := s.find(parentIdx, k)
if idx < 0 {
idx = s.create(parentIdx, k, tableKind, false, false)
} else {
entry := s.entries[idx]
if entry.kind == valueKind {
return fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind)
}
}
parentIdx = idx
}
k := it.Node().Data
idx := s.find(parentIdx, k)
if idx >= 0 {
kind := s.entries[idx].kind
if kind != tableKind {
return fmt.Errorf("toml: key %s should be a table, not a %s", string(k), kind)
}
if s.entries[idx].explicit {
return fmt.Errorf("toml: table %s already exists", string(k))
}
s.entries[idx].explicit = true
} else {
idx = s.create(parentIdx, k, tableKind, true, false)
}
s.currentIdx = idx
return nil
}
func (s *SeenTracker) checkArrayTable(node *unstable.Node) error {
if s.currentIdx >= 0 {
s.setExplicitFlag(s.currentIdx)
}
it := node.Key()
parentIdx := 0
for it.Next() {
if it.IsLast() {
break
}
k := it.Node().Data
idx := s.find(parentIdx, k)
if idx < 0 {
idx = s.create(parentIdx, k, tableKind, false, false)
} else {
entry := s.entries[idx]
if entry.kind == valueKind {
return fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind)
}
}
parentIdx = idx
}
k := it.Node().Data
idx := s.find(parentIdx, k)
if idx >= 0 {
kind := s.entries[idx].kind
if kind != arrayTableKind {
return fmt.Errorf("toml: key %s already exists as a %s, but should be an array table", kind, string(k))
}
s.clear(idx)
} else {
idx = s.create(parentIdx, k, arrayTableKind, true, false)
}
s.currentIdx = idx
return nil
}
func (s *SeenTracker) checkKeyValue(node *unstable.Node) error {
parentIdx := s.currentIdx
it := node.Key()
for it.Next() {
k := it.Node().Data
idx := s.find(parentIdx, k)
if idx < 0 {
idx = s.create(parentIdx, k, tableKind, false, true)
} else {
entry := s.entries[idx]
if it.IsLast() {
return fmt.Errorf("toml: key %s is already defined", string(k))
} else if entry.kind != tableKind {
return fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind)
} else if entry.explicit {
return fmt.Errorf("toml: cannot redefine table %s that has already been explicitly defined", string(k))
}
}
parentIdx = idx
}
s.entries[parentIdx].kind = valueKind
value := node.Value()
switch value.Kind {
case unstable.InlineTable:
return s.checkInlineTable(value)
case unstable.Array:
return s.checkArray(value)
}
return nil
}
func (s *SeenTracker) checkArray(node *unstable.Node) error {
it := node.Children()
for it.Next() {
n := it.Node()
switch n.Kind {
case unstable.InlineTable:
err := s.checkInlineTable(n)
if err != nil {
return err
}
case unstable.Array:
err := s.checkArray(n)
if err != nil {
return err
}
}
}
return nil
}
func (s *SeenTracker) checkInlineTable(node *unstable.Node) error {
if pool.New == nil {
pool.New = func() interface{} {
return &SeenTracker{}
}
}
s = pool.Get().(*SeenTracker)
s.reset()
it := node.Children()
for it.Next() {
n := it.Node()
err := s.checkKeyValue(n)
if err != nil {
return err
}
}
// As inline tables are self-contained, the tracker does not
// need to retain the details of what they contain. The
// keyValue element that creates the inline table is kept to
// mark the presence of the inline table and prevent
// redefinition of its keys: check* functions cannot walk into
// a value.
pool.Put(s)
return nil
}

View file

@ -0,0 +1 @@
package tracker