From fee5350430bd40334693fa42eea6d8acd5658e14 Mon Sep 17 00:00:00 2001 From: mizi Date: Thu, 1 Feb 2024 12:24:06 -0900 Subject: [PATCH] [hexxy] DEV branch - extremely speed up hex encoding --- encode.go | 176 ++++++++++++++++ hexxy.go | 608 +++++++++++++++++++++++++++++++++++------------------- 2 files changed, 569 insertions(+), 215 deletions(-) create mode 100644 encode.go diff --git a/encode.go b/encode.go new file mode 100644 index 0000000..20fa5ac --- /dev/null +++ b/encode.go @@ -0,0 +1,176 @@ +package main + +import () + +// returns -1 on success +// returns k > -1 if space found where k is index of space byte +func binaryDecode(dst, src []byte) int { + var v, d byte + + for i := 0; i < len(src); i++ { + v, d = src[i], d<<1 + if isSpace(v) { // found a space, so between groups + if i == 0 { + return 1 + } + return i + } + if v == '1' { + d ^= 1 + } else if v != '0' { + return i // will catch issues like "000000: " + } + } + + dst[0] = d + return -1 +} + +func cfmtEncode(dst, src []byte, hextable string) { + b := src[0] + dst[3] = hextable[b&0x0f] + dst[2] = hextable[b>>4] + dst[1] = 'x' + dst[0] = '0' +} + +// copied from encoding/hex package in order to add support for uppercase hex +func hexEncode(dst, src []byte, hextable string) { + b := src[0] + dst[1] = hextable[b&0x0f] + dst[0] = hextable[b>>4] +} + +// copied from encoding/hex package +// returns -1 on bad byte or space (\t \s \n) +// returns -2 on two consecutive spaces +// returns 0 on success +func hexDecode(dst, src []byte) int { + _, _ = src[2], dst[0] + + if isSpace(src[0]) { + if isSpace(src[1]) { + return -2 + } + return -1 + } + + if isPrefix(src[0:2]) { + src = src[2:] + } + + for i := 0; i < len(src)/2; i++ { + a, ok := fromHexChar(src[i*2]) + if !ok { + return -1 + } + b, ok := fromHexChar(src[i*2+1]) + if !ok { + return -1 + } + + dst[0] = (a << 4) | b + } + return 0 +} + +// copied from encoding/hex package +func fromHexChar(c byte) (byte, bool) { + switch { + case '0' <= c && c <= '9': + return c - '0', true + case 'a' <= c && c <= 'f': + return c - 'a' + 10, true + case 'A' <= c && c <= 'F': + return c - 'A' + 10, true + } + + return 0, false +} + +// check if entire line is full of empty []byte{0} bytes (nul in C) +func empty(b *[]byte) bool { + for i := 0; i < len(*b); i++ { + if (*b)[i] != 0 { + return false + } + } + return true +} + +// quick binary tree check +// probably horribly written idk it's late at night +func parseSpecifier(b string) float64 { + lb := len(b) + if lb == 0 { + return 0 + } + + var b0, b1 byte + if lb < 2 { + b0 = b[0] + b1 = '0' + } else { + b1 = b[1] + b0 = b[0] + } + + if b1 != '0' { + if b1 == 'b' { // bits, so convert bytes to bits for os.Seek() + if b0 == 'k' || b0 == 'K' { + return 0.0078125 + } + + if b0 == 'm' || b0 == 'M' { + return 7.62939453125e-06 + } + + if b0 == 'g' || b0 == 'G' { + return 7.45058059692383e-09 + } + } + + if b1 == 'B' { // kilo/mega/giga- bytes are assumed + if b0 == 'k' || b0 == 'K' { + return 1024 + } + + if b0 == 'm' || b0 == 'M' { + return 1048576 + } + + if b0 == 'g' || b0 == 'G' { + return 1073741824 + } + } + } else { // kilo/mega/giga- bytes are assumed for single b, k, m, g + if b0 == 'k' || b0 == 'K' { + return 1024 + } + + if b0 == 'm' || b0 == 'M' { + return 1048576 + } + + if b0 == 'g' || b0 == 'G' { + return 1073741824 + } + } + + return 1 // assumes bytes as fallback +} + +// is byte a space? (\t, \n, \s) +func isSpace(b byte) bool { + switch b { + case 32, 12, 9: + return true + default: + return false + } +} + +// are the two bytes hex prefixes? (0x or 0X) +func isPrefix(b []byte) bool { + return b[0] == '0' && (b[1] == 'x' || b[1] == 'X') +} diff --git a/hexxy.go b/hexxy.go index 28b63c0..06e3bab 100644 --- a/hexxy.go +++ b/hexxy.go @@ -2,31 +2,80 @@ package main import ( "bufio" - "encoding/hex" "errors" "fmt" "io" "log" "os" "strconv" - "strings" "github.com/jessevdk/go-flags" ) var opts struct { - NoColor bool `short:"N" long:"no-color" description:"do not print output with color"` OffsetFormat string `short:"t" long:"radix" default:"x" choice:"d" choice:"o" choice:"x" description:"Print offset in [d|o|x] format"` + Binary bool `short:"b" long:"binary" description:"output in binary format (01010101) incompatible with plain, reverse and include"` Reverse bool `short:"r" long:"reverse" description:"re-assemble hexdump output back into binary"` + Autoskip bool `short:"a" long:"autoskip" description:"toggle autoskip (replaces blank lines with a *)"` + Bars bool `short:"B" long:"bars" description:"delimiter bars in ascii table"` + Seek int64 `short:"s" long:"seek" description:"start at bytes"` + Len int64 `short:"l" long:"len" description:"stop after octets"` + Columns int `short:"c" long:"columns" description:"column count"` + GroupSize int `short:"g" long:"groups" description:"group count"` Plain bool `short:"p" long:"plain" description:"plain output without ascii table and offset row [often used with hexxy -r]"` + Upper bool `short:"u" long:"upper" description:"output hex in UPPERCASE format"` + CInclude bool `short:"i" long:"include" description:"output in C include format"` + OutputFile string `short:"o" long:"output" description:"automatically output to file instead of STDOUT"` + Separator string `long:"separator" default:"|" description:"separator character for the ascii character table"` ForceColor bool `short:"F" long:"force-color" description:"color is automatically disabled if output is a pipe, this option forces color output"` - Separator string `short:"s" long:"separator" default:"|" description:"separator character for the ascii character table"` + NoColor bool `short:"N" long:"no-color" description:"do not print output with color"` Verbose bool `short:"v" long:"verbose" description:"print debugging information and verbose output"` } var Debug = func(string, ...interface{}) {} -var OffsetFormat string -var Separator string + +const ( + dumpHex = iota + dumpBinary + dumpCformat + dumpPlain +) + +const ( + ldigits = "0123456789abcdef" + udigits = "0123456789ABCDEF" +) + +var ( + dumpType int + space = []byte(" ") + doubleSpace = []byte(" ") + dot = []byte(".") + newLine = []byte("\n") + zeroHeader = []byte("0000000: ") + unsignedChar = []byte("unsigned char ") + unsignedInt = []byte("};\nunsigned int ") + lenEquals = []byte("_len = ") + brackets = []byte("[] = {") + asterisk = []byte("*") + commaSpace = []byte(", ") + comma = []byte(",") + semiColonNl = []byte(";\n") + bar = []byte("|") +) + +func binaryEncode(dst, src []byte) { + d := uint(0) + _, _ = src[0], dst[7] + for i := 7; i >= 0; i-- { + if src[0]&(1<= 33 && b <= 126 { - s = clr.Colorize(string(b), b) - } else { - s = clr.Colorize(".", b) - } - - fmt.Fprint(stdout, s) - } +func outputIsPipe() bool { + stat, _ := os.Stdout.Stat() + return stat.Mode()&os.ModeCharDevice != os.ModeCharDevice } -func printOffset(offset uint64) string { - return fmt.Sprintf(OffsetFormat, offset) -} - -func printSeparator(writer io.Writer, newline bool) { - // WHY??? - if newline { - fmt.Fprintln(writer, Separator) - } else { - fmt.Fprint(writer, Separator) - } -} - -func Hexdump(file *os.File, color *Color) error { - stdout := bufio.NewWriter(os.Stdout) - stderr := os.Stderr - ascii := [16]byte{} - defer stdout.Flush() - - var i uint64 = 0 - reader := bufio.NewReaderSize(file, 10*1024*1024) +func HexdumpPlain(file *os.File) error { + var i uint64 + reader := bufio.NewReaderSize(file, 10*1024) for { b, err := reader.ReadByte() @@ -109,154 +128,285 @@ func Hexdump(file *os.File, color *Color) error { break } if err != nil { - fmt.Fprintf(stderr, "Failed to read %v: %v\n", file.Name(), err) - return err + return fmt.Errorf("Failed to read %v: %w\n", file.Name(), err) } - ascii[i%16] = b - - // offset - if i%16 == 0 { - // fmt.Fprintf(stdout, "%08x ", i) - offy := printOffset(i) - fmt.Fprint(stdout, offy) - } - - // byte - fmt.Fprintf(stdout, color.Colorize("%02x", b)+" ", b) - - // extra space every 4 bytes - if (i+1)%4 == 0 { - fmt.Fprint(stdout, " ") - } - - // print ascii row and newline │ | ┆ - if (i+1)%16 == 0 { - // fmt.Fprint(stdout, "│") - printSeparator(stdout, false) - - asciiRow(ascii[:i%16], color, stdout) - - // fmt.Fprintln(stdout, "│") - printSeparator(stdout, true) - - ascii = [16]byte{} // reset + if i%30 == 0 { + println() } + fmt.Printf("%02x", b) i++ } - if i%16 != 0 { - left := int(16 - i%16) - spaces := 3*left + (left-1)/4 + 1 - - fmt.Fprint(stdout, strings.Repeat(" ", spaces)) - printSeparator(stdout, false) - - asciiRow(ascii[:i%16], color, stdout) - printSeparator(stdout, true) - - offy := printOffset(i) - fmt.Fprintln(stdout, offy) - // fmt.Fprintf(stdout, "%08x\n", i) - } - return nil } -func HexdumpPlain(file *os.File) error { - // stdout := bufio.NewWriter(os.Stdout) - // stderr := os.Stderr - // defer stdout.Flush() +// func plain2Binary(file *os.File) error { +// return reverse(os.Stdout, os.Stdin) +// } - src, err := io.ReadAll(file) - if err != nil { - return err +// func getOffsetFormat() error { +// var prefix string +// var suffix string +// var format string + +// switch opts.OffsetFormat { +// case "d": +// format = prefix + "%08d " + suffix +// case "o": +// format = prefix + "%08o " + suffix +// case "x": +// format = prefix + "%08x " + suffix +// default: +// return fmt.Errorf("Offset format must be [d|o|x]") +// } +// return nil +// } + +func XXD(r io.Reader, w io.Writer, filename string) error { + var ( + lineOffset int64 + hexOffset = make([]byte, 6) + groupSize int + cols int + octs int + caps = ldigits + doCheader = true + doCEnd bool + varDeclChar = make([]byte, 14+len(filename)+6) // for "unsigned char NAME_FORMAT[] = {" + varDeclInt = make([]byte, 16+len(filename)+7) // enough room for "unsigned int NAME_FORMAT = " + nulLine int64 + totalOcts int64 + ) + + if dumpType == dumpCformat { + _ = copy(varDeclChar[0:14], unsignedChar[:]) + _ = copy(varDeclInt[0:14], lenEquals[:]) + + for i := 0; i < len(filename); i++ { + if filename[i] != '.' { + varDeclChar[14+i] = filename[i] + varDeclInt[16+i] = filename[i] + } else { + varDeclChar[14+i] = '_' + varDeclInt[16+i] = '_' + } + } + // copy "[] = {" and "_len = " + _ = copy(varDeclChar[14+len(filename):], brackets[:]) + _ = copy(varDeclInt[16+len(filename):], lenEquals[:]) } - dst := make([]byte, hex.EncodedLen(len(src))) - hex.Encode(dst, src) - fmt.Printf("%s\n", dst) + if opts.Upper { + caps = udigits + } - // reader := bufio.NewReaderSize(file, 10*1024*1024) - - // for { - // b, err := reader.ReadByte() - // if errors.Is(err, io.EOF) { - // break - // } - // if err != nil { - // fmt.Fprintf(stderr, "Failed to read %v: %v\n", file.Name(), err) - // return err - // } - // stdout.WriteString(fmt.Sprintf("%02x", string(b))) - // } - return nil -} - -func plain2Binary(file *os.File) error { - - return reverse(os.Stdout, os.Stdin) - // contents, err := io.ReadAll(file) - // if err != nil { - // return err - // } - // fmt.Println(len(contents)) - // fmt.Printf("Binary byte representation: %08b\n", contents) - - // _, err = hex.Decode(contents, dst) - // if err != nil { - // return err - // } - // os.Stdout.Write(dst) - - // dest := make([]byte, hex.EncodedLen(len(contents))) - // hex.Decode(dest, contents) - - // fmt.Printf("%s\n", dest) - - // return nil -} - -func getOffsetFormat() error { - var prefix string - var suffix string - var sep string - - // turn off color if output is a pipe - // idk if I like this though since I often - // use hexxy asdf | head -n 10 but I also want to work on --reverse option - - // stat, _ := os.Stdout.Stat() - // if stat.Mode()&os.ModeCharDevice == 0 && !opts.ForceColor { - // opts.NoColor = true - // } - - if !opts.NoColor { - prefix = GREY - suffix = CLR - sep = "│" + if opts.Columns == -1 { + switch dumpType { + case dumpPlain: + cols = 30 + case dumpCformat: + cols = 12 + case dumpBinary: + cols = 6 + default: + cols = 16 + } } else { - prefix = "" - suffix = "" - sep = "|" + cols = opts.Columns } - if opts.Separator != "" { - sep = opts.Separator - } - - Separator = prefix + sep + suffix - - switch opts.OffsetFormat { - case "d": - OffsetFormat = prefix + "%08d " + suffix - case "o": - OffsetFormat = prefix + "%08o " + suffix - case "x": - OffsetFormat = prefix + "%08x " + suffix + switch dumpType { + case dumpBinary: + octs = 8 + groupSize = 1 + case dumpPlain: + octs = 0 + case dumpCformat: + octs = 4 default: - return fmt.Errorf("Offset format must be [d|o|x]") + octs = 2 + groupSize = 2 + } + + if opts.GroupSize != -1 { + groupSize = opts.GroupSize + } + + if opts.Len != -1 { + if opts.Len < int64(cols) { + cols = int(opts.Len) + } + } + + if octs < 1 { + octs = cols + } + + // allocate their size based on the users specs, hence why its declared here + var ( + line = make([]byte, cols) + char = make([]byte, octs) + ) + + c := int64(0) + nl := int64(0) + r = bufio.NewReader(r) + + var ( + n int + err error + ) + + for { + n, err = io.ReadFull(r, line) + if err != nil && errors.Is(err, io.EOF) && errors.Is(err, io.ErrUnexpectedEOF) { + return fmt.Errorf("hexxy: %v", err) + } + + if dumpType == dumpPlain && n != 0 { + for i := 0; i < n; i++ { + hexEncode(char, line[i:i+1], caps) + w.Write(char) + c++ + } + continue + } + + if n == 0 { + if dumpType == dumpPlain { + w.Write(newLine) + } + + if dumpType == dumpCformat { + doCEnd = true + } else { + return nil + } + } + + if opts.Len != -1 { + if totalOcts == opts.Len { + break + } + totalOcts += opts.Len + } + + if opts.Autoskip && empty(&line) { + if nulLine == 1 { + w.Write(asterisk) + w.Write(newLine) + } + + nulLine++ + + if nulLine > 1 { + lineOffset++ + continue + } + } + + // hex or binary formats only + if dumpType <= dumpBinary { + // line offset + hexOffset = strconv.AppendInt(hexOffset[0:0], lineOffset, 16) + w.Write(zeroHeader[0:(6 - len(hexOffset))]) + w.Write(hexOffset) + w.Write(zeroHeader[6:]) + lineOffset++ + } else if doCheader { + w.Write(varDeclChar) + w.Write(newLine) + doCheader = false + } + + if dumpType == dumpBinary { + // binary values + for i, k := 0, octs; i < n; i, k = i+1, k+octs { + binaryEncode(char, line[i:i+1]) + w.Write(char) + c++ + + if k == octs*groupSize { + k = 0 + w.Write(space) + } + } + } else if dumpType == dumpCformat { + if !doCEnd { + w.Write(doubleSpace) + } + for i := 0; i < n; i++ { + cfmtEncode(char, line[i:i+1], caps) + w.Write(char) + c++ + // no space at EOL + if i != n-1 { + w.Write(commaSpace) + } else if n == cols { + w.Write(comma) + } + } + } else { + // hex values -- default + for i, k := 0, octs; i < n; i, k = i+1, k+octs { + hexEncode(char, line[i:i+1], caps) + w.Write(char) + c++ + + if k == octs*groupSize { + k = 0 + w.Write(space) + } + } + } + + if doCEnd { + w.Write(varDeclInt) + w.Write([]byte(strconv.FormatInt(c, 10))) + w.Write(semiColonNl) + return nil + } + + if n < len(line) && dumpType <= dumpBinary { + for i := n * octs; i < len(line)*octs; i++ { + w.Write(space) + + if i%octs == 1 { + w.Write(space) + } + } + } + + if dumpType != dumpCformat { + w.Write(space) + } + + if dumpType <= dumpBinary { + // character values + b := line[:n] + // |hello,.world!| + if opts.Bars { + w.Write(bar) + } + + var v byte + for i := 0; i < len(b); i++ { + v = b[i] + if v > 0x1f && v < 0x7f { + w.Write(line[i : i+1]) + } else { + w.Write(dot) + } + } + + if opts.Bars { + w.Write(bar) + } + } + w.Write(newLine) + nl++ } return nil } @@ -264,10 +414,6 @@ func getOffsetFormat() error { func Hexxy(args []string) error { color := &Color{} - if opts.Reverse { - return plain2Binary(os.Stdin) - } - if opts.NoColor { color.disable = true } @@ -276,37 +422,83 @@ func Hexxy(args []string) error { color.Compute() } - if len(args) < 1 && stdinOpen() { - if opts.Plain { - return HexdumpPlain(os.Stdin) - } else { - return Hexdump(os.Stdin, color) + var infile, outfile *os.File + var err error + + if len(args) < 1 && inputIsPipe() { + infile = os.Stdin + } else { + infile, err = os.Open(args[0]) + if err != nil { + return fmt.Errorf("hexxy: %v", err.Error()) + } + } + defer infile.Close() + + if opts.Seek != -1 { + _, err = infile.Seek(opts.Seek, io.SeekStart) + if err != nil { + return fmt.Errorf("hexxy: %v", err.Error()) } } - for _, f := range args { - file, err := os.Open(f) + if opts.OutputFile != "" { + outfile, err = os.Open(opts.OutputFile) if err != nil { - return err + return fmt.Errorf("hexxy: %v", err.Error()) } - defer file.Close() + } else { + outfile = os.Stdout + } + defer outfile.Close() - if opts.Plain { - if err := HexdumpPlain(file); err != nil { - return err - } - } else { - if err := Hexdump(file, color); err != nil { - return err - } - } + switch { + case opts.Binary: + dumpType = dumpBinary + case opts.CInclude: + dumpType = dumpCformat + case opts.Plain: + dumpType = dumpPlain + default: + dumpType = dumpHex + } + + out := bufio.NewWriter(outfile) + defer out.Flush() + + if opts.Reverse { + + } + + if err := XXD(infile, out, infile.Name()); err != nil { + return fmt.Errorf("hexxy: %v", err.Error()) } return nil } +const usage_msg = ` +hexxy is a command line hex dumping tool + +Examples: + hexxy [OPTIONS] input-file +` + +// extra usage examples +func usage() { + fmt.Fprint(os.Stderr, usage_msg) +} + +func init() { + opts.Seek = -1 // default no-op value + opts.Columns = -1 + opts.GroupSize = -1 + opts.Len = -1 +} + func main() { - args, err := flags.Parse(&opts) + parser := flags.NewParser(&opts, flags.Default) + args, err := parser.Parse() if flags.WroteHelp(err) { os.Exit(0) } @@ -314,28 +506,14 @@ func main() { log.Fatal(err) } - if opts.Verbose { - Debug = log.Printf - } - - if opts.Reverse { - // f, err := os.Open(args[0]) - // if err != nil { - // panic(err) - // } - // defer f.Close() - err = plain2Binary(os.Stdin) - if err != nil { - log.Fatal(err) - } + if !inputIsPipe() && len(args) == 0 { + parser.WriteHelp(os.Stderr) os.Exit(0) } - err = getOffsetFormat() - if err != nil { - log.Fatal(err) + if opts.Verbose { + Debug = log.Printf } - if err := Hexxy(args); err != nil { log.Fatal(err) }