[hexxy] DEV branch - extremely speed up hex encoding

This commit is contained in:
mizi 2024-02-01 12:24:06 -09:00
parent 6d4d21c621
commit fee5350430
2 changed files with 569 additions and 215 deletions

176
encode.go Normal file
View file

@ -0,0 +1,176 @@
package main
import ()
// returns -1 on success
// returns k > -1 if space found where k is index of space byte
func binaryDecode(dst, src []byte) int {
var v, d byte
for i := 0; i < len(src); i++ {
v, d = src[i], d<<1
if isSpace(v) { // found a space, so between groups
if i == 0 {
return 1
}
return i
}
if v == '1' {
d ^= 1
} else if v != '0' {
return i // will catch issues like "000000: "
}
}
dst[0] = d
return -1
}
func cfmtEncode(dst, src []byte, hextable string) {
b := src[0]
dst[3] = hextable[b&0x0f]
dst[2] = hextable[b>>4]
dst[1] = 'x'
dst[0] = '0'
}
// copied from encoding/hex package in order to add support for uppercase hex
func hexEncode(dst, src []byte, hextable string) {
b := src[0]
dst[1] = hextable[b&0x0f]
dst[0] = hextable[b>>4]
}
// copied from encoding/hex package
// returns -1 on bad byte or space (\t \s \n)
// returns -2 on two consecutive spaces
// returns 0 on success
func hexDecode(dst, src []byte) int {
_, _ = src[2], dst[0]
if isSpace(src[0]) {
if isSpace(src[1]) {
return -2
}
return -1
}
if isPrefix(src[0:2]) {
src = src[2:]
}
for i := 0; i < len(src)/2; i++ {
a, ok := fromHexChar(src[i*2])
if !ok {
return -1
}
b, ok := fromHexChar(src[i*2+1])
if !ok {
return -1
}
dst[0] = (a << 4) | b
}
return 0
}
// copied from encoding/hex package
func fromHexChar(c byte) (byte, bool) {
switch {
case '0' <= c && c <= '9':
return c - '0', true
case 'a' <= c && c <= 'f':
return c - 'a' + 10, true
case 'A' <= c && c <= 'F':
return c - 'A' + 10, true
}
return 0, false
}
// check if entire line is full of empty []byte{0} bytes (nul in C)
func empty(b *[]byte) bool {
for i := 0; i < len(*b); i++ {
if (*b)[i] != 0 {
return false
}
}
return true
}
// quick binary tree check
// probably horribly written idk it's late at night
func parseSpecifier(b string) float64 {
lb := len(b)
if lb == 0 {
return 0
}
var b0, b1 byte
if lb < 2 {
b0 = b[0]
b1 = '0'
} else {
b1 = b[1]
b0 = b[0]
}
if b1 != '0' {
if b1 == 'b' { // bits, so convert bytes to bits for os.Seek()
if b0 == 'k' || b0 == 'K' {
return 0.0078125
}
if b0 == 'm' || b0 == 'M' {
return 7.62939453125e-06
}
if b0 == 'g' || b0 == 'G' {
return 7.45058059692383e-09
}
}
if b1 == 'B' { // kilo/mega/giga- bytes are assumed
if b0 == 'k' || b0 == 'K' {
return 1024
}
if b0 == 'm' || b0 == 'M' {
return 1048576
}
if b0 == 'g' || b0 == 'G' {
return 1073741824
}
}
} else { // kilo/mega/giga- bytes are assumed for single b, k, m, g
if b0 == 'k' || b0 == 'K' {
return 1024
}
if b0 == 'm' || b0 == 'M' {
return 1048576
}
if b0 == 'g' || b0 == 'G' {
return 1073741824
}
}
return 1 // assumes bytes as fallback
}
// is byte a space? (\t, \n, \s)
func isSpace(b byte) bool {
switch b {
case 32, 12, 9:
return true
default:
return false
}
}
// are the two bytes hex prefixes? (0x or 0X)
func isPrefix(b []byte) bool {
return b[0] == '0' && (b[1] == 'x' || b[1] == 'X')
}

598
hexxy.go
View file

@ -2,31 +2,80 @@ package main
import ( import (
"bufio" "bufio"
"encoding/hex"
"errors" "errors"
"fmt" "fmt"
"io" "io"
"log" "log"
"os" "os"
"strconv" "strconv"
"strings"
"github.com/jessevdk/go-flags" "github.com/jessevdk/go-flags"
) )
var opts struct { var opts struct {
NoColor bool `short:"N" long:"no-color" description:"do not print output with color"`
OffsetFormat string `short:"t" long:"radix" default:"x" choice:"d" choice:"o" choice:"x" description:"Print offset in [d|o|x] format"` OffsetFormat string `short:"t" long:"radix" default:"x" choice:"d" choice:"o" choice:"x" description:"Print offset in [d|o|x] format"`
Binary bool `short:"b" long:"binary" description:"output in binary format (01010101) incompatible with plain, reverse and include"`
Reverse bool `short:"r" long:"reverse" description:"re-assemble hexdump output back into binary"` Reverse bool `short:"r" long:"reverse" description:"re-assemble hexdump output back into binary"`
Autoskip bool `short:"a" long:"autoskip" description:"toggle autoskip (replaces blank lines with a *)"`
Bars bool `short:"B" long:"bars" description:"delimiter bars in ascii table"`
Seek int64 `short:"s" long:"seek" description:"start at <seek> bytes"`
Len int64 `short:"l" long:"len" description:"stop after <len> octets"`
Columns int `short:"c" long:"columns" description:"column count"`
GroupSize int `short:"g" long:"groups" description:"group count"`
Plain bool `short:"p" long:"plain" description:"plain output without ascii table and offset row [often used with hexxy -r]"` Plain bool `short:"p" long:"plain" description:"plain output without ascii table and offset row [often used with hexxy -r]"`
Upper bool `short:"u" long:"upper" description:"output hex in UPPERCASE format"`
CInclude bool `short:"i" long:"include" description:"output in C include format"`
OutputFile string `short:"o" long:"output" description:"automatically output to file instead of STDOUT"`
Separator string `long:"separator" default:"|" description:"separator character for the ascii character table"`
ForceColor bool `short:"F" long:"force-color" description:"color is automatically disabled if output is a pipe, this option forces color output"` ForceColor bool `short:"F" long:"force-color" description:"color is automatically disabled if output is a pipe, this option forces color output"`
Separator string `short:"s" long:"separator" default:"|" description:"separator character for the ascii character table"` NoColor bool `short:"N" long:"no-color" description:"do not print output with color"`
Verbose bool `short:"v" long:"verbose" description:"print debugging information and verbose output"` Verbose bool `short:"v" long:"verbose" description:"print debugging information and verbose output"`
} }
var Debug = func(string, ...interface{}) {} var Debug = func(string, ...interface{}) {}
var OffsetFormat string
var Separator string const (
dumpHex = iota
dumpBinary
dumpCformat
dumpPlain
)
const (
ldigits = "0123456789abcdef"
udigits = "0123456789ABCDEF"
)
var (
dumpType int
space = []byte(" ")
doubleSpace = []byte(" ")
dot = []byte(".")
newLine = []byte("\n")
zeroHeader = []byte("0000000: ")
unsignedChar = []byte("unsigned char ")
unsignedInt = []byte("};\nunsigned int ")
lenEquals = []byte("_len = ")
brackets = []byte("[] = {")
asterisk = []byte("*")
commaSpace = []byte(", ")
comma = []byte(",")
semiColonNl = []byte(";\n")
bar = []byte("|")
)
func binaryEncode(dst, src []byte) {
d := uint(0)
_, _ = src[0], dst[7]
for i := 7; i >= 0; i-- {
if src[0]&(1<<d) == 0 {
dst[i] = 0
} else {
dst[i] = 1
}
d++
}
}
const GREY = "\x1b[38;2;111;111;111m" const GREY = "\x1b[38;2;111;111;111m"
const CLR = "\x1b[0m" const CLR = "\x1b[0m"
@ -59,49 +108,19 @@ func (c *Color) Colorize(s string, clr byte) string {
return c.values[clr] + s + NOCOLOR return c.values[clr] + s + NOCOLOR
} }
func stdinOpen() bool { func inputIsPipe() bool {
stat, _ := os.Stdin.Stat() stat, _ := os.Stdin.Stat()
if stat.Mode()&os.ModeCharDevice == os.ModeCharDevice { return stat.Mode()&os.ModeCharDevice != os.ModeCharDevice
return false
} else {
return true
}
} }
func asciiRow(ascii []byte, clr *Color, stdout io.Writer) { func outputIsPipe() bool {
var s string stat, _ := os.Stdout.Stat()
for _, b := range ascii { return stat.Mode()&os.ModeCharDevice != os.ModeCharDevice
if b >= 33 && b <= 126 {
s = clr.Colorize(string(b), b)
} else {
s = clr.Colorize(".", b)
} }
fmt.Fprint(stdout, s) func HexdumpPlain(file *os.File) error {
} var i uint64
} reader := bufio.NewReaderSize(file, 10*1024)
func printOffset(offset uint64) string {
return fmt.Sprintf(OffsetFormat, offset)
}
func printSeparator(writer io.Writer, newline bool) {
// WHY???
if newline {
fmt.Fprintln(writer, Separator)
} else {
fmt.Fprint(writer, Separator)
}
}
func Hexdump(file *os.File, color *Color) error {
stdout := bufio.NewWriter(os.Stdout)
stderr := os.Stderr
ascii := [16]byte{}
defer stdout.Flush()
var i uint64 = 0
reader := bufio.NewReaderSize(file, 10*1024*1024)
for { for {
b, err := reader.ReadByte() b, err := reader.ReadByte()
@ -109,154 +128,285 @@ func Hexdump(file *os.File, color *Color) error {
break break
} }
if err != nil { if err != nil {
fmt.Fprintf(stderr, "Failed to read %v: %v\n", file.Name(), err) return fmt.Errorf("Failed to read %v: %w\n", file.Name(), err)
return err
} }
ascii[i%16] = b if i%30 == 0 {
println()
// offset
if i%16 == 0 {
// fmt.Fprintf(stdout, "%08x ", i)
offy := printOffset(i)
fmt.Fprint(stdout, offy)
}
// byte
fmt.Fprintf(stdout, color.Colorize("%02x", b)+" ", b)
// extra space every 4 bytes
if (i+1)%4 == 0 {
fmt.Fprint(stdout, " ")
}
// print ascii row and newline │ | ┆
if (i+1)%16 == 0 {
// fmt.Fprint(stdout, "│")
printSeparator(stdout, false)
asciiRow(ascii[:i%16], color, stdout)
// fmt.Fprintln(stdout, "│")
printSeparator(stdout, true)
ascii = [16]byte{} // reset
} }
fmt.Printf("%02x", b)
i++ i++
} }
if i%16 != 0 {
left := int(16 - i%16)
spaces := 3*left + (left-1)/4 + 1
fmt.Fprint(stdout, strings.Repeat(" ", spaces))
printSeparator(stdout, false)
asciiRow(ascii[:i%16], color, stdout)
printSeparator(stdout, true)
offy := printOffset(i)
fmt.Fprintln(stdout, offy)
// fmt.Fprintf(stdout, "%08x\n", i)
}
return nil return nil
} }
func HexdumpPlain(file *os.File) error { // func plain2Binary(file *os.File) error {
// stdout := bufio.NewWriter(os.Stdout) // return reverse(os.Stdout, os.Stdin)
// stderr := os.Stderr
// defer stdout.Flush()
src, err := io.ReadAll(file)
if err != nil {
return err
}
dst := make([]byte, hex.EncodedLen(len(src)))
hex.Encode(dst, src)
fmt.Printf("%s\n", dst)
// reader := bufio.NewReaderSize(file, 10*1024*1024)
// for {
// b, err := reader.ReadByte()
// if errors.Is(err, io.EOF) {
// break
// } // }
// if err != nil {
// fmt.Fprintf(stderr, "Failed to read %v: %v\n", file.Name(), err) // func getOffsetFormat() error {
// return err // var prefix string
// var suffix string
// var format string
// switch opts.OffsetFormat {
// case "d":
// format = prefix + "%08d " + suffix
// case "o":
// format = prefix + "%08o " + suffix
// case "x":
// format = prefix + "%08x " + suffix
// default:
// return fmt.Errorf("Offset format must be [d|o|x]")
// } // }
// stdout.WriteString(fmt.Sprintf("%02x", string(b)))
// }
return nil
}
func plain2Binary(file *os.File) error {
return reverse(os.Stdout, os.Stdin)
// contents, err := io.ReadAll(file)
// if err != nil {
// return err
// }
// fmt.Println(len(contents))
// fmt.Printf("Binary byte representation: %08b\n", contents)
// _, err = hex.Decode(contents, dst)
// if err != nil {
// return err
// }
// os.Stdout.Write(dst)
// dest := make([]byte, hex.EncodedLen(len(contents)))
// hex.Decode(dest, contents)
// fmt.Printf("%s\n", dest)
// return nil // return nil
}
func getOffsetFormat() error {
var prefix string
var suffix string
var sep string
// turn off color if output is a pipe
// idk if I like this though since I often
// use hexxy asdf | head -n 10 but I also want to work on --reverse option
// stat, _ := os.Stdout.Stat()
// if stat.Mode()&os.ModeCharDevice == 0 && !opts.ForceColor {
// opts.NoColor = true
// } // }
if !opts.NoColor { func XXD(r io.Reader, w io.Writer, filename string) error {
prefix = GREY var (
suffix = CLR lineOffset int64
sep = "│" hexOffset = make([]byte, 6)
groupSize int
cols int
octs int
caps = ldigits
doCheader = true
doCEnd bool
varDeclChar = make([]byte, 14+len(filename)+6) // for "unsigned char NAME_FORMAT[] = {"
varDeclInt = make([]byte, 16+len(filename)+7) // enough room for "unsigned int NAME_FORMAT = "
nulLine int64
totalOcts int64
)
if dumpType == dumpCformat {
_ = copy(varDeclChar[0:14], unsignedChar[:])
_ = copy(varDeclInt[0:14], lenEquals[:])
for i := 0; i < len(filename); i++ {
if filename[i] != '.' {
varDeclChar[14+i] = filename[i]
varDeclInt[16+i] = filename[i]
} else { } else {
prefix = "" varDeclChar[14+i] = '_'
suffix = "" varDeclInt[16+i] = '_'
sep = "|" }
}
// copy "[] = {" and "_len = "
_ = copy(varDeclChar[14+len(filename):], brackets[:])
_ = copy(varDeclInt[16+len(filename):], lenEquals[:])
} }
if opts.Separator != "" { if opts.Upper {
sep = opts.Separator caps = udigits
} }
Separator = prefix + sep + suffix if opts.Columns == -1 {
switch dumpType {
switch opts.OffsetFormat { case dumpPlain:
case "d": cols = 30
OffsetFormat = prefix + "%08d " + suffix case dumpCformat:
case "o": cols = 12
OffsetFormat = prefix + "%08o " + suffix case dumpBinary:
case "x": cols = 6
OffsetFormat = prefix + "%08x " + suffix
default: default:
return fmt.Errorf("Offset format must be [d|o|x]") cols = 16
}
} else {
cols = opts.Columns
}
switch dumpType {
case dumpBinary:
octs = 8
groupSize = 1
case dumpPlain:
octs = 0
case dumpCformat:
octs = 4
default:
octs = 2
groupSize = 2
}
if opts.GroupSize != -1 {
groupSize = opts.GroupSize
}
if opts.Len != -1 {
if opts.Len < int64(cols) {
cols = int(opts.Len)
}
}
if octs < 1 {
octs = cols
}
// allocate their size based on the users specs, hence why its declared here
var (
line = make([]byte, cols)
char = make([]byte, octs)
)
c := int64(0)
nl := int64(0)
r = bufio.NewReader(r)
var (
n int
err error
)
for {
n, err = io.ReadFull(r, line)
if err != nil && errors.Is(err, io.EOF) && errors.Is(err, io.ErrUnexpectedEOF) {
return fmt.Errorf("hexxy: %v", err)
}
if dumpType == dumpPlain && n != 0 {
for i := 0; i < n; i++ {
hexEncode(char, line[i:i+1], caps)
w.Write(char)
c++
}
continue
}
if n == 0 {
if dumpType == dumpPlain {
w.Write(newLine)
}
if dumpType == dumpCformat {
doCEnd = true
} else {
return nil
}
}
if opts.Len != -1 {
if totalOcts == opts.Len {
break
}
totalOcts += opts.Len
}
if opts.Autoskip && empty(&line) {
if nulLine == 1 {
w.Write(asterisk)
w.Write(newLine)
}
nulLine++
if nulLine > 1 {
lineOffset++
continue
}
}
// hex or binary formats only
if dumpType <= dumpBinary {
// line offset
hexOffset = strconv.AppendInt(hexOffset[0:0], lineOffset, 16)
w.Write(zeroHeader[0:(6 - len(hexOffset))])
w.Write(hexOffset)
w.Write(zeroHeader[6:])
lineOffset++
} else if doCheader {
w.Write(varDeclChar)
w.Write(newLine)
doCheader = false
}
if dumpType == dumpBinary {
// binary values
for i, k := 0, octs; i < n; i, k = i+1, k+octs {
binaryEncode(char, line[i:i+1])
w.Write(char)
c++
if k == octs*groupSize {
k = 0
w.Write(space)
}
}
} else if dumpType == dumpCformat {
if !doCEnd {
w.Write(doubleSpace)
}
for i := 0; i < n; i++ {
cfmtEncode(char, line[i:i+1], caps)
w.Write(char)
c++
// no space at EOL
if i != n-1 {
w.Write(commaSpace)
} else if n == cols {
w.Write(comma)
}
}
} else {
// hex values -- default
for i, k := 0, octs; i < n; i, k = i+1, k+octs {
hexEncode(char, line[i:i+1], caps)
w.Write(char)
c++
if k == octs*groupSize {
k = 0
w.Write(space)
}
}
}
if doCEnd {
w.Write(varDeclInt)
w.Write([]byte(strconv.FormatInt(c, 10)))
w.Write(semiColonNl)
return nil
}
if n < len(line) && dumpType <= dumpBinary {
for i := n * octs; i < len(line)*octs; i++ {
w.Write(space)
if i%octs == 1 {
w.Write(space)
}
}
}
if dumpType != dumpCformat {
w.Write(space)
}
if dumpType <= dumpBinary {
// character values
b := line[:n]
// |hello,.world!|
if opts.Bars {
w.Write(bar)
}
var v byte
for i := 0; i < len(b); i++ {
v = b[i]
if v > 0x1f && v < 0x7f {
w.Write(line[i : i+1])
} else {
w.Write(dot)
}
}
if opts.Bars {
w.Write(bar)
}
}
w.Write(newLine)
nl++
} }
return nil return nil
} }
@ -264,10 +414,6 @@ func getOffsetFormat() error {
func Hexxy(args []string) error { func Hexxy(args []string) error {
color := &Color{} color := &Color{}
if opts.Reverse {
return plain2Binary(os.Stdin)
}
if opts.NoColor { if opts.NoColor {
color.disable = true color.disable = true
} }
@ -276,37 +422,83 @@ func Hexxy(args []string) error {
color.Compute() color.Compute()
} }
if len(args) < 1 && stdinOpen() { var infile, outfile *os.File
if opts.Plain { var err error
return HexdumpPlain(os.Stdin)
} else {
return Hexdump(os.Stdin, color)
}
}
for _, f := range args { if len(args) < 1 && inputIsPipe() {
file, err := os.Open(f) infile = os.Stdin
} else {
infile, err = os.Open(args[0])
if err != nil { if err != nil {
return err return fmt.Errorf("hexxy: %v", err.Error())
} }
defer file.Close() }
defer infile.Close()
if opts.Plain { if opts.Seek != -1 {
if err := HexdumpPlain(file); err != nil { _, err = infile.Seek(opts.Seek, io.SeekStart)
return err if err != nil {
return fmt.Errorf("hexxy: %v", err.Error())
}
}
if opts.OutputFile != "" {
outfile, err = os.Open(opts.OutputFile)
if err != nil {
return fmt.Errorf("hexxy: %v", err.Error())
} }
} else { } else {
if err := Hexdump(file, color); err != nil { outfile = os.Stdout
return err
} }
defer outfile.Close()
switch {
case opts.Binary:
dumpType = dumpBinary
case opts.CInclude:
dumpType = dumpCformat
case opts.Plain:
dumpType = dumpPlain
default:
dumpType = dumpHex
} }
out := bufio.NewWriter(outfile)
defer out.Flush()
if opts.Reverse {
}
if err := XXD(infile, out, infile.Name()); err != nil {
return fmt.Errorf("hexxy: %v", err.Error())
} }
return nil return nil
} }
const usage_msg = `
hexxy is a command line hex dumping tool
Examples:
hexxy [OPTIONS] input-file
`
// extra usage examples
func usage() {
fmt.Fprint(os.Stderr, usage_msg)
}
func init() {
opts.Seek = -1 // default no-op value
opts.Columns = -1
opts.GroupSize = -1
opts.Len = -1
}
func main() { func main() {
args, err := flags.Parse(&opts) parser := flags.NewParser(&opts, flags.Default)
args, err := parser.Parse()
if flags.WroteHelp(err) { if flags.WroteHelp(err) {
os.Exit(0) os.Exit(0)
} }
@ -314,28 +506,14 @@ func main() {
log.Fatal(err) log.Fatal(err)
} }
if opts.Verbose { if !inputIsPipe() && len(args) == 0 {
Debug = log.Printf parser.WriteHelp(os.Stderr)
}
if opts.Reverse {
// f, err := os.Open(args[0])
// if err != nil {
// panic(err)
// }
// defer f.Close()
err = plain2Binary(os.Stdin)
if err != nil {
log.Fatal(err)
}
os.Exit(0) os.Exit(0)
} }
err = getOffsetFormat() if opts.Verbose {
if err != nil { Debug = log.Printf
log.Fatal(err)
} }
if err := Hexxy(args); err != nil { if err := Hexxy(args); err != nil {
log.Fatal(err) log.Fatal(err)
} }