blob e3a3e929 (12463B) - Raw
1 //! To get started, run this tool with no args and read the help message. 2 //! 3 //! The build system of Linux requires specifying a single target 4 //! architecture. Meanwhile, Zig supports out-of-the-box cross compilation for 5 //! every target. So the process to create libc headers that Zig ships is to use 6 //! this tool. 7 //! 8 //! First, use the Linux build systems to create installations of all the 9 //! targets in the `linux_targets` variable. 10 //! 11 //! Next, run this tool to create a new directory which puts .h files into 12 //! <arch> subdirectories, with `any-linux-any` being files that apply to 13 //! all architectures. 14 //! 15 //! You'll then have to manually update Zig source repo with these new files. 16 17 const std = @import("std"); 18 const Arch = std.Target.Cpu.Arch; 19 const Abi = std.Target.Abi; 20 const assert = std.debug.assert; 21 const Blake3 = std.crypto.hash.Blake3; 22 23 const LibCTarget = struct { 24 name: []const u8, 25 arch: MultiArch, 26 }; 27 28 const MultiArch = union(enum) { 29 arm, 30 arm64, 31 loongarch, 32 mips, 33 powerpc, 34 riscv, 35 sparc, 36 x86, 37 specific: Arch, 38 39 fn eql(a: MultiArch, b: MultiArch) bool { 40 if (@intFromEnum(a) != @intFromEnum(b)) 41 return false; 42 if (a != .specific) 43 return true; 44 return a.specific == b.specific; 45 } 46 }; 47 48 const linux_targets = [_]LibCTarget{ 49 LibCTarget{ 50 .name = "arc", 51 .arch = MultiArch{ .specific = Arch.arc }, 52 }, 53 LibCTarget{ 54 .name = "arm", 55 .arch = .arm, 56 }, 57 LibCTarget{ 58 .name = "arm64", 59 .arch = .{ .specific = .aarch64 }, 60 }, 61 LibCTarget{ 62 .name = "csky", 63 .arch = .{ .specific = .csky }, 64 }, 65 LibCTarget{ 66 .name = "hexagon", 67 .arch = .{ .specific = .hexagon }, 68 }, 69 LibCTarget{ 70 .name = "m68k", 71 .arch = .{ .specific = .m68k }, 72 }, 73 LibCTarget{ 74 .name = "loongarch", 75 .arch = .loongarch, 76 }, 77 LibCTarget{ 78 .name = "mips", 79 .arch = .mips, 80 }, 81 LibCTarget{ 82 .name = "powerpc", 83 .arch = .powerpc, 84 }, 85 LibCTarget{ 86 .name = "riscv", 87 .arch = .riscv, 88 }, 89 LibCTarget{ 90 .name = "s390", 91 .arch = .{ .specific = .s390x }, 92 }, 93 LibCTarget{ 94 .name = "sparc", 95 .arch = .{ .specific = .sparc }, 96 }, 97 LibCTarget{ 98 .name = "x86", 99 .arch = .x86, 100 }, 101 LibCTarget{ 102 .name = "xtensa", 103 .arch = .{ .specific = .xtensa }, 104 }, 105 }; 106 107 const DestTarget = struct { 108 arch: MultiArch, 109 110 const HashContext = struct { 111 pub fn hash(self: @This(), a: DestTarget) u32 { 112 _ = self; 113 var hasher = std.hash.Wyhash.init(0); 114 std.hash.autoHash(&hasher, a.arch); 115 return @truncate(u32, hasher.final()); 116 } 117 118 pub fn eql(self: @This(), a: DestTarget, b: DestTarget, b_index: usize) bool { 119 _ = self; 120 _ = b_index; 121 return a.arch.eql(b.arch); 122 } 123 }; 124 }; 125 126 const Contents = struct { 127 bytes: []const u8, 128 hit_count: usize, 129 hash: []const u8, 130 is_generic: bool, 131 132 fn hitCountLessThan(context: void, lhs: *const Contents, rhs: *const Contents) bool { 133 _ = context; 134 return lhs.hit_count < rhs.hit_count; 135 } 136 }; 137 138 const HashToContents = std.StringHashMap(Contents); 139 const TargetToHash = std.ArrayHashMap(DestTarget, []const u8, DestTarget.HashContext, true); 140 const PathTable = std.StringHashMap(*TargetToHash); 141 142 pub fn main() !void { 143 var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator); 144 const arena = arena_state.allocator(); 145 const args = try std.process.argsAlloc(arena); 146 var search_paths = std.ArrayList([]const u8).init(arena); 147 var opt_out_dir: ?[]const u8 = null; 148 149 var arg_i: usize = 1; 150 while (arg_i < args.len) : (arg_i += 1) { 151 if (std.mem.eql(u8, args[arg_i], "--help")) 152 usageAndExit(args[0]); 153 if (arg_i + 1 >= args.len) { 154 std.debug.print("expected argument after '{s}'\n", .{args[arg_i]}); 155 usageAndExit(args[0]); 156 } 157 158 if (std.mem.eql(u8, args[arg_i], "--search-path")) { 159 try search_paths.append(args[arg_i + 1]); 160 } else if (std.mem.eql(u8, args[arg_i], "--out")) { 161 assert(opt_out_dir == null); 162 opt_out_dir = args[arg_i + 1]; 163 } else { 164 std.debug.print("unrecognized argument: {s}\n", .{args[arg_i]}); 165 usageAndExit(args[0]); 166 } 167 168 arg_i += 1; 169 } 170 171 const out_dir = opt_out_dir orelse usageAndExit(args[0]); 172 const generic_name = "any-linux-any"; 173 174 var path_table = PathTable.init(arena); 175 var hash_to_contents = HashToContents.init(arena); 176 var max_bytes_saved: usize = 0; 177 var total_bytes: usize = 0; 178 179 var hasher = Blake3.init(.{}); 180 181 for (linux_targets) |linux_target| { 182 const dest_target = DestTarget{ 183 .arch = linux_target.arch, 184 }; 185 search: for (search_paths.items) |search_path| { 186 const target_include_dir = try std.fs.path.join(arena, &.{ 187 search_path, linux_target.name, "include", 188 }); 189 var dir_stack = std.ArrayList([]const u8).init(arena); 190 try dir_stack.append(target_include_dir); 191 192 while (dir_stack.popOrNull()) |full_dir_name| { 193 var iterable_dir = std.fs.cwd().openIterableDir(full_dir_name, .{}) catch |err| switch (err) { 194 error.FileNotFound => continue :search, 195 error.AccessDenied => continue :search, 196 else => return err, 197 }; 198 defer iterable_dir.close(); 199 200 var dir_it = iterable_dir.iterate(); 201 202 while (try dir_it.next()) |entry| { 203 const full_path = try std.fs.path.join(arena, &[_][]const u8{ full_dir_name, entry.name }); 204 switch (entry.kind) { 205 .directory => try dir_stack.append(full_path), 206 .file => { 207 const rel_path = try std.fs.path.relative(arena, target_include_dir, full_path); 208 const max_size = 2 * 1024 * 1024 * 1024; 209 const raw_bytes = try std.fs.cwd().readFileAlloc(arena, full_path, max_size); 210 const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t"); 211 total_bytes += raw_bytes.len; 212 const hash = try arena.alloc(u8, 32); 213 hasher = Blake3.init(.{}); 214 hasher.update(rel_path); 215 hasher.update(trimmed); 216 hasher.final(hash); 217 const gop = try hash_to_contents.getOrPut(hash); 218 if (gop.found_existing) { 219 max_bytes_saved += raw_bytes.len; 220 gop.value_ptr.hit_count += 1; 221 std.debug.print("duplicate: {s} {s} ({:2})\n", .{ 222 linux_target.name, 223 rel_path, 224 std.fmt.fmtIntSizeDec(raw_bytes.len), 225 }); 226 } else { 227 gop.value_ptr.* = Contents{ 228 .bytes = trimmed, 229 .hit_count = 1, 230 .hash = hash, 231 .is_generic = false, 232 }; 233 } 234 const path_gop = try path_table.getOrPut(rel_path); 235 const target_to_hash = if (path_gop.found_existing) path_gop.value_ptr.* else blk: { 236 const ptr = try arena.create(TargetToHash); 237 ptr.* = TargetToHash.init(arena); 238 path_gop.value_ptr.* = ptr; 239 break :blk ptr; 240 }; 241 try target_to_hash.putNoClobber(dest_target, hash); 242 }, 243 else => std.debug.print("warning: weird file: {s}\n", .{full_path}), 244 } 245 } 246 } 247 break; 248 } else { 249 std.debug.print("warning: libc target not found: {s}\n", .{linux_target.name}); 250 } 251 } 252 std.debug.print("summary: {:2} could be reduced to {:2}\n", .{ 253 std.fmt.fmtIntSizeDec(total_bytes), 254 std.fmt.fmtIntSizeDec(total_bytes - max_bytes_saved), 255 }); 256 try std.fs.cwd().makePath(out_dir); 257 258 var missed_opportunity_bytes: usize = 0; 259 // iterate path_table. for each path, put all the hashes into a list. sort by hit_count. 260 // the hash with the highest hit_count gets to be the "generic" one. everybody else 261 // gets their header in a separate arch directory. 262 var path_it = path_table.iterator(); 263 while (path_it.next()) |path_kv| { 264 var contents_list = std.ArrayList(*Contents).init(arena); 265 { 266 var hash_it = path_kv.value_ptr.*.iterator(); 267 while (hash_it.next()) |hash_kv| { 268 const contents = hash_to_contents.getPtr(hash_kv.value_ptr.*).?; 269 try contents_list.append(contents); 270 } 271 } 272 std.mem.sort(*Contents, contents_list.items, {}, Contents.hitCountLessThan); 273 const best_contents = contents_list.popOrNull().?; 274 if (best_contents.hit_count > 1) { 275 // worth it to make it generic 276 const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, generic_name, path_kv.key_ptr.* }); 277 try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?); 278 try std.fs.cwd().writeFile(full_path, best_contents.bytes); 279 best_contents.is_generic = true; 280 while (contents_list.popOrNull()) |contender| { 281 if (contender.hit_count > 1) { 282 const this_missed_bytes = contender.hit_count * contender.bytes.len; 283 missed_opportunity_bytes += this_missed_bytes; 284 std.debug.print("Missed opportunity ({:2}): {s}\n", .{ 285 std.fmt.fmtIntSizeDec(this_missed_bytes), 286 path_kv.key_ptr.*, 287 }); 288 } else break; 289 } 290 } 291 var hash_it = path_kv.value_ptr.*.iterator(); 292 while (hash_it.next()) |hash_kv| { 293 const contents = hash_to_contents.get(hash_kv.value_ptr.*).?; 294 if (contents.is_generic) continue; 295 296 const dest_target = hash_kv.key_ptr.*; 297 const arch_name = switch (dest_target.arch) { 298 .specific => |a| @tagName(a), 299 else => @tagName(dest_target.arch), 300 }; 301 const out_subpath = try std.fmt.allocPrint(arena, "{s}-linux-any", .{arch_name}); 302 const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, out_subpath, path_kv.key_ptr.* }); 303 try std.fs.cwd().makePath(std.fs.path.dirname(full_path).?); 304 try std.fs.cwd().writeFile(full_path, contents.bytes); 305 } 306 } 307 308 const bad_files = [_][]const u8{ 309 "any-linux-any/linux/netfilter/xt_CONNMARK.h", 310 "any-linux-any/linux/netfilter/xt_DSCP.h", 311 "any-linux-any/linux/netfilter/xt_MARK.h", 312 "any-linux-any/linux/netfilter/xt_RATEEST.h", 313 "any-linux-any/linux/netfilter/xt_TCPMSS.h", 314 "any-linux-any/linux/netfilter_ipv4/ipt_ECN.h", 315 "any-linux-any/linux/netfilter_ipv4/ipt_TTL.h", 316 "any-linux-any/linux/netfilter_ipv6/ip6t_HL.h", 317 }; 318 for (bad_files) |bad_file| { 319 const full_path = try std.fs.path.join(arena, &[_][]const u8{ out_dir, bad_file }); 320 try std.fs.cwd().deleteFile(full_path); 321 } 322 } 323 324 fn usageAndExit(arg0: []const u8) noreturn { 325 std.debug.print("Usage: {s} [--search-path <dir>] --out <dir> --abi <name>\n", .{arg0}); 326 std.debug.print("--search-path can be used any number of times.\n", .{}); 327 std.debug.print(" subdirectories of search paths look like, e.g. x86_64-linux-gnu\n", .{}); 328 std.debug.print("--out is a dir that will be created, and populated with the results\n", .{}); 329 std.process.exit(1); 330 }