commit 3bc4b2b4a11d83c4d7506c2ac51908b99e6476f1 Author: raf Date: Tue May 21 09:54:42 2024 +0000 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3cef7be --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +zig-cache/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e776377 --- /dev/null +++ b/LICENSE @@ -0,0 +1,28 @@ +BSD 3-Clause License + +Copyright (c) 2024, raf + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..4d20203 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +# Zid + +Zid is a Content Identifier (CID) Trie implementation in Zig. + +## Features + +- Trie structure for fast access to identifiers based on their byte values. +- Proper manages memory allocation and deallocation. +- Custom error types for better error reporting and handling. + +## Building + +1. Get the Zig compiler appropriate to your distribution (Generally, if you are + sane, `nix-shell -p zig`) will do the trick.) +2. Clone the project +3. `zig build` or `zig build-exe src/main.zig` (former is recommended, but + doesn't matter) + +## Usage + + + +> [!CAUTION] +> Zid, for the time being, should be considered highly unstable and must be +> avoided in actual projects, unless you know what you are doing. + + + +### Adding Identifiers + +To add an identifier to the trie, use the `add` method. Pass the identifier as a +byte slice (i.e. `[]u8`): + +```zig +try trie.add("your_identifier_here"); +``` + +### Searching for Identifiers + +To search for an identifier in the trie, use the lookup method. Again, pass the +identifier as a byte slice: + +```zig +const result = try trie.lookup("identifier_to_search"); +``` + +## FAQ + +[IPFS docs]: https://docs.ipfs.tech/concepts/content-addressing/#what-is-a-cid + +### What the hell is a CID? + +See [IPFS docs]. My interpretation of a CID Trie is a tree-like data structure +used for _efficiently_ storing and retrieving identifiers, with efficiency being +the primary goal. + +### Why doesn't this build? + +It's my first time building a Zig project. Call it a minor friction. + +## TODO + +- Support for Multiple Tries +- Batch Operations +- Dynamic MaxCIDLen (it's currently a runtime constant) +- Persistence Layer +- Concurrency diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..0442569 --- /dev/null +++ b/build.zig @@ -0,0 +1,91 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const lib = b.addStaticLibrary(.{ + .name = "zid", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the library to be installed into the standard + // location when the user invokes the "install" step (the default step when + // running `zig build`). + b.installArtifact(lib); + + const exe = b.addExecutable(.{ + .name = "zid", + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const lib_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); + + const exe_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_lib_unit_tests.step); + test_step.dependOn(&run_exe_unit_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..a012f01 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,13 @@ +.{ + .name = "zid", + .version = "0.1.0", + .minimum_zig_version = "0.12.0", // not a hard-requirement, but I've built this package with 0.12 only. + + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + "LICENSE", + "README.md", + }, +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..32bf50f --- /dev/null +++ b/src/main.zig @@ -0,0 +1,85 @@ +const std = @import("std"); + +pub var errCollision = std.errors.New("collisions"); +pub var errNotFound = std.errors.New("not found"); + +pub const maxCIDLen = 20; +pub const CIDTrie = struct { + leaves: [256]?*CIDTrie, + end: bool, + + pub fn add(self: *CIDTrie, cid: []u8) !void { + if (cid.len > maxCIDLen) return error.CIDTooLong; + var trie = self; + for (cid) |b| { + if (trie.leaves[b] == null) { + trie.leaves[b] = try std.heap.page_allocator.alloc(CIDTrie); + trie.leaves[b].?.end = false; + } else if (trie.leaves[b].?.end) { + return errCollision; + } + if (cid.len == 1) { + for (trie.leaves[b].?.leaves) |leaf| { + if (leaf != null) return errCollision; + } + trie.leaves[b].?.end = true; + } + trie = trie.leaves[b]; + } + } + + pub fn lookup(self: *CIDTrie, data: []u8) ![]u8 { + if (data.len > maxCIDLen) return error.CIDTooLong; + if (self.leaves[data[0]] == null) return errNotFound; + var nextLevel = self; + var currentIndex: usize = 0; + + for (data) |b| { + nextLevel = nextLevel.leaves[b]; + if (nextLevel.? != null and nextLevel.?.end) { + return data[0..currentIndex]; + } + currentIndex += 1; + } + return errNotFound; + } +}; + +pub fn main() void { + var allocator = std.heap.page_allocator; + + const trie = allocator.create(CIDTrie) catch |err| { + std.debug.print("Failed to allocate memory: {}\n", .{err}); + return; + }; + defer allocator.destroy(trie); + + // add entries + const addResult = trie.add("hello"[0..]) catch |err| { + std.debug.print("Failed to add 'hello': {}\n", .{err}); + return; + }; + + std.debug.print("Added 'hello': {}\n", .{addResult}); // this assumes add returns a meaningful value, alternative is to remove + + // lookup entries + const lookupHelloResult = trie.lookup("hello"[0..]) catch |err| { + std.debug.print("Failed to lookup 'hello': {}\n", .{err}); + return; + }; + std.debug.print("Found 'hello': {}\n", .{lookupHelloResult}); + + const lookupWorldResult = trie.lookup("world"[0..]) catch |err| { + std.debug.print("Failed to lookup 'world': {}\n", .{err}); + return; + }; + std.debug.print("Found 'world': {}\n", .{lookupWorldResult}); + + // attempt to lookup a nonexistent entry + const lookupNonexistentResult = trie.lookup("nonexistent"[0..]); + if (lookupNonexistentResult) |result| { + std.debug.print("Unexpectedly found 'nonexistent': {}\n", .{result}); + } else |err| { + std.debug.print("Expected error for 'nonexistent': {}\n", .{err}); + } +}