zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob e3a3e929 (12463B) - Raw


      1 //! To get started, run this tool with no args and read the help message.
      2 //!
      3 //! The build system of Linux requires specifying a single target
      4 //! architecture. Meanwhile, Zig supports out-of-the-box cross compilation for
      5 //! every target. So the process to create libc headers that Zig ships is to use
      6 //! this tool.
      7 //!
      8 //! First, use the Linux build systems to create installations of all the
      9 //! targets in the `linux_targets` variable.
     10 //!
     11 //! Next, run this tool to create a new directory which puts .h files into
     12 //! <arch> subdirectories, with `any-linux-any` being files that apply to
     13 //! all architectures.
     14 //!
     15 //! You'll then have to manually update Zig source repo with these new files.
     16 
     17 const std = @import("std");
     18 const Arch = std.Target.Cpu.Arch;
     19 const Abi = std.Target.Abi;
     20 const assert = std.debug.assert;
     21 const Blake3 = std.crypto.hash.Blake3;
     22 
     23 const LibCTarget = struct {
     24     name: []const u8,
     25     arch: MultiArch,
     26 };
     27 
     28 const MultiArch = union(enum) {
     29     arm,
     30     arm64,
     31     loongarch,
     32     mips,
     33     powerpc,
     34     riscv,
     35     sparc,
     36     x86,
     37     specific: Arch,
     38 
     39     fn eql(a: MultiArch, b: MultiArch) bool {
     40         if (@intFromEnum(a) != @intFromEnum(b))
     41             return false;
     42         if (a != .specific)
     43             return true;
     44         return a.specific == b.specific;
     45     }
     46 };
     47 
     48 const linux_targets = [_]LibCTarget{
     49     LibCTarget{
     50         .name = "arc",
     51         .arch = MultiArch{ .specific = Arch.arc },
     52     },
     53     LibCTarget{
     54         .name = "arm",
     55         .arch = .arm,
     56     },
     57     LibCTarget{
     58         .name = "arm64",
     59         .arch = .{ .specific = .aarch64 },
     60     },
     61     LibCTarget{
     62         .name = "csky",
     63         .arch = .{ .specific = .csky },
     64     },
     65     LibCTarget{
     66         .name = "hexagon",
     67         .arch = .{ .specific = .hexagon },
     68     },
     69     LibCTarget{
     70         .name = "m68k",
     71         .arch = .{ .specific = .m68k },
     72     },
     73     LibCTarget{
     74         .name = "loongarch",
     75         .arch = .loongarch,
     76     },
     77     LibCTarget{
     78         .name = "mips",
     79         .arch = .mips,
     80     },
     81     LibCTarget{
     82         .name = "powerpc",
     83         .arch = .powerpc,
     84     },
     85     LibCTarget{
     86         .name = "riscv",
     87         .arch = .riscv,
     88     },
     89     LibCTarget{
     90         .name = "s390",
     91         .arch = .{ .specific = .s390x },
     92     },
     93     LibCTarget{
     94         .name = "sparc",
     95         .arch = .{ .specific = .sparc },
     96     },
     97     LibCTarget{
     98         .name = "x86",
     99         .arch = .x86,
    100     },
    101     LibCTarget{
    102         .name = "xtensa",
    103         .arch = .{ .specific = .xtensa },
    104     },
    105 };
    106 
    107 const DestTarget = struct {
    108     arch: MultiArch,
    109 
    110     const HashContext = struct {
    111         pub fn hash(self: @This(), a: DestTarget) u32 {
    112             _ = self;
    113             var hasher = std.hash.Wyhash.init(0);
    114             std.hash.autoHash(&hasher, a.arch);
    115             return @truncate(u32, hasher.final());
    116         }
    117 
    118         pub fn eql(self: @This(), a: DestTarget, b: DestTarget, b_index: usize) bool {
    119             _ = self;
    120             _ = b_index;
    121             return a.arch.eql(b.arch);
    122         }
    123     };
    124 };
    125 
    126 const Contents = struct {
    127     bytes: []const u8,
    128     hit_count: usize,
    129     hash: []const u8,
    130     is_generic: bool,
    131 
    132     fn hitCountLessThan(context: void, lhs: *const Contents, rhs: *const Contents) bool {
    133         _ = context;
    134         return lhs.hit_count < rhs.hit_count;
    135     }
    136 };
    137 
    138 const HashToContents = std.StringHashMap(Contents);
    139 const TargetToHash = std.ArrayHashMap(DestTarget, []const u8, DestTarget.HashContext, true);
    140 const PathTable = std.StringHashMap(*TargetToHash);
    141 
    142 pub fn main() !void {
    143     var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    144     const arena = arena_state.allocator();
    145     const args = try std.process.argsAlloc(arena);
    146     var search_paths = std.ArrayList([]const u8).init(arena);
    147     var opt_out_dir: ?[]const u8 = null;
    148 
    149     var arg_i: usize = 1;
    150     while (arg_i < args.len) : (arg_i += 1) {
    151         if (std.mem.eql(u8, args[arg_i], "--help"))
    152             usageAndExit(args[0]);
    153         if (arg_i + 1 >= args.len) {
    154             std.debug.print("expected argument after '{s}'\n", .{args[arg_i]});
    155             usageAndExit(args[0]);
    156         }
    157 
    158         if (std.mem.eql(u8, args[arg_i], "--search-path")) {
    159             try search_paths.append(args[arg_i + 1]);
    160         } else if (std.mem.eql(u8, args[arg_i], "--out")) {
    161             assert(opt_out_dir == null);
    162             opt_out_dir = args[arg_i + 1];
    163         } else {
    164             std.debug.print("unrecognized argument: {s}\n", .{args[arg_i]});
    165             usageAndExit(args[0]);
    166         }
    167 
    168         arg_i += 1;
    169     }
    170 
    171     const out_dir = opt_out_dir orelse usageAndExit(args[0]);
    172     const generic_name = "any-linux-any";
    173 
    174     var path_table = PathTable.init(arena);
    175     var hash_to_contents = HashToContents.init(arena);
    176     var max_bytes_saved: usize = 0;
    177     var total_bytes: usize = 0;
    178 
    179     var hasher = Blake3.init(.{});
    180 
    181     for (linux_targets) |linux_target| {
    182         const dest_target = DestTarget{
    183             .arch = linux_target.arch,
    184         };
    185         search: for (search_paths.items) |search_path| {
    186             const target_include_dir = try std.fs.path.join(arena, &.{
    187                 search_path, linux_target.name, "include",
    188             });
    189             var dir_stack = std.ArrayList([]const u8).init(arena);
    190             try dir_stack.append(target_include_dir);
    191 
    192             while (dir_stack.popOrNull()) |full_dir_name| {
    193                 var iterable_dir = std.fs.cwd().openIterableDir(full_dir_name, .{}) catch |err| switch (err) {
    194                     error.FileNotFound => continue :search,
    195                     error.AccessDenied => continue :search,
    196                     else => return err,
    197                 };
    198                 defer iterable_dir.close();
    199 
    200                 var dir_it = iterable_dir.iterate();
    201 
    202                 while (try dir_it.next()) |entry| {
    203                     const full_path = try std.fs.path.join(arena, &[_][]const u8{ full_dir_name, entry.name });
    204                     switch (entry.kind) {
    205                         .directory => try dir_stack.append(full_path),
    206                         .file => {
    207                             const rel_path = try std.fs.path.relative(arena, target_include_dir, full_path);
    208                             const max_size = 2 * 1024 * 1024 * 1024;
    209                             const raw_bytes = try std.fs.cwd().readFileAlloc(arena, full_path, max_size);
    210                             const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t");
    211                             total_bytes += raw_bytes.len;
    212                             const hash = try arena.alloc(u8, 32);
    213                             hasher = Blake3.init(.{});
    214                             hasher.update(rel_path);
    215                             hasher.update(trimmed);
    216                             hasher.final(hash);
    217                             const gop = try hash_to_contents.getOrPut(hash);
    218                             if (gop.found_existing) {
    219                                 max_bytes_saved += raw_bytes.len;
    220                                 gop.value_ptr.hit_count += 1;
    221                                 std.debug.print("duplicate: {s} {s} ({:2})\n", .{
    222                                     linux_target.name,
    223                                     rel_path,
    224                                     std.fmt.fmtIntSizeDec(raw_bytes.len),
    225                                 });
    226                             } else {
    227                                 gop.value_ptr.* = Contents{
    228                                     .bytes = trimmed,
    229                                     .hit_count = 1,
    230                                     .hash = hash,
    231                                     .is_generic = false,
    232                                 };
    233                             }
    234                             const path_gop = try path_table.getOrPut(rel_path);
    235                             const target_to_hash = if (path_gop.found_existing) path_gop.value_ptr.* else blk: {
    236                                 const ptr = try arena.create(TargetToHash);
    237                                 ptr.* = TargetToHash.init(arena);
    238                                 path_gop.value_ptr.* = ptr;
    239                                 break :blk ptr;
    240                             };
    241                             try target_to_hash.putNoClobber(dest_target, hash);
    242                         },
    243                         else => std.debug.print("warning: weird file: {s}\n", .{full_path}),
    244                     }
    245                 }
    246             }
    247             break;
    248         } else {
    249             std.debug.print("warning: libc target not found: {s}\n", .{linux_target.name});
    250         }
    251     }
    252     std.debug.print("summary: {:2} could be reduced to {:2}\n", .{
    253         std.fmt.fmtIntSizeDec(total_bytes),
    254         std.fmt.fmtIntSizeDec(total_bytes - max_bytes_saved),
    255     });
    256     try std.fs.cwd().makePath(out_dir);
    257 
    258     var missed_opportunity_bytes: usize = 0;
    259     // iterate path_table. for each path, put all the hashes into a list. sort by hit_count.
    260     // the hash with the highest hit_count gets to be the "generic" one. everybody else
    261     // gets their header in a separate arch directory.
    262     var path_it = path_table.iterator();
    263     while (path_it.next()) |path_kv| {
    264         var contents_list = std.ArrayList(*Contents).init(arena);
    265         {
    266             var hash_it = path_kv.value_ptr.*.iterator();
    267             while (hash_it.next()) |hash_kv| {
    268                 const contents = hash_to_contents.getPtr(hash_kv.value_ptr.*).?;
    269                 try contents_list.append(contents);
    270             }
    271         }
    272         std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan);
    273         const best_contents = contents_list.popOrNull().?;
    274         if (best_contents.hit_count > 1) {
    275             // worth it to make it generic
    276             const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, generic_name, path_kv.key_ptr.* });
    277             try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?);
    278             try std.fs.cwd().writeFile(full_path, best_contents.bytes);
    279             best_contents.is_generic = true;
    280             while (contents_list.popOrNull()) |contender| {
    281                 if (contender.hit_count > 1) {
    282                     const this_missed_bytes = contender.hit_count * contender.bytes.len;
    283                     missed_opportunity_bytes += this_missed_bytes;
    284                     std.debug.print("Missed opportunity ({:2}): {s}\n", .{
    285                         std.fmt.fmtIntSizeDec(this_missed_bytes),
    286                         path_kv.key_ptr.*,
    287                     });
    288                 } else break;
    289             }
    290         }
    291         var hash_it = path_kv.value_ptr.*.iterator();
    292         while (hash_it.next()) |hash_kv| {
    293             const contents = hash_to_contents.get(hash_kv.value_ptr.*).?;
    294             if (contents.is_generic) continue;
    295 
    296             const dest_target = hash_kv.key_ptr.*;
    297             const arch_name = switch (dest_target.arch) {
    298                 .specific => |a| @tagName(a),
    299                 else => @tagName(dest_target.arch),
    300             };
    301             const out_subpath = try std.fmt.allocPrint(arena, "{s}-linux-any", .{arch_name});
    302             const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, out_subpath, path_kv.key_ptr.* });
    303             try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?);
    304             try std.fs.cwd().writeFile(full_path, contents.bytes);
    305         }
    306     }
    307 
    308     const bad_files = [_][]const u8{
    309         "any-linux-any/linux/netfilter/xt_CONNMARK.h",
    310         "any-linux-any/linux/netfilter/xt_DSCP.h",
    311         "any-linux-any/linux/netfilter/xt_MARK.h",
    312         "any-linux-any/linux/netfilter/xt_RATEEST.h",
    313         "any-linux-any/linux/netfilter/xt_TCPMSS.h",
    314         "any-linux-any/linux/netfilter_ipv4/ipt_ECN.h",
    315         "any-linux-any/linux/netfilter_ipv4/ipt_TTL.h",
    316         "any-linux-any/linux/netfilter_ipv6/ip6t_HL.h",
    317     };
    318     for (bad_files) |bad_file| {
    319         const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, bad_file });
    320         try std.fs.cwd().deleteFile(full_path);
    321     }
    322 }
    323 
    324 fn usageAndExit(arg0: []const u8) noreturn {
    325     std.debug.print("Usage: {s} [--search-path <dir>] --out <dir> --abi <name>\n", .{arg0});
    326     std.debug.print("--search-path can be used any number of times.\n", .{});
    327     std.debug.print("    subdirectories of search paths look like, e.g. x86_64-linux-gnu\n", .{});
    328     std.debug.print("--out is a dir that will be created, and populated with the results\n", .{});
    329     std.process.exit(1);
    330 }