arm: move cpu model table into system/arm.zig

Now we can reuse the table between CPU model parsers on Linux and Windows. Use similar parsing structure for Windows as we do for Linux. On Windows, we rely on two entries in the registry per CPU core: `CP 4000` and `Identifier`. Collating the data from the two allows us recreating most of the `/proc/cpuinfo` data natively on Windows. Additionally, we still allow for overwriting any CPU features as flagged by pulling the feature data embedded in `SharedUserData`.
2022-11-28 17:03:14 +01:00
parent 7fbd2955fa
commit 7bf12b1197
4 changed files with 310 additions and 309 deletions
--- a/lib/std/target/aarch64.zig
+++ b/lib/std/target/aarch64.zig
@@ -2252,19 +2252,4 @@ pub const cpu = struct {
            .v8a,
        }),
    };
-
-    pub const microsoft_sq3 = CpuModel{
-        .name = "microsoft_sq3",
-        .llvm_name = "generic",
-        .features = featureSet(&[_]Feature{
-            .aes,
-            .crc,
-            .crypto,
-            .dotprod,
-            .fp_armv8,
-            .lse,
-            .neon,
-            .sha2,
-        }),
-    };
 };
--- a/lib/std/zig/system/arm.zig
+++ b/lib/std/zig/system/arm.zig
@@ -0,0 +1,134 @@
+const std = @import("std");
+
+pub const CoreInfo = struct {
+    architecture: u8 = 0,
+    implementer: u8 = 0,
+    variant: u8 = 0,
+    part: u16 = 0,
+};
+
+pub const cpu_models = struct {
+    // Shorthands to simplify the tables below.
+    const A32 = std.Target.arm.cpu;
+    const A64 = std.Target.aarch64.cpu;
+
+    const E = struct {
+        part: u16,
+        variant: ?u8 = null, // null if matches any variant
+        m32: ?*const std.Target.Cpu.Model = null,
+        m64: ?*const std.Target.Cpu.Model = null,
+    };
+
+    // implementer = 0x41
+    const ARM = [_]E{
+        E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null },
+        E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null },
+        E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null },
+        E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null },
+        E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null },
+        E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null },
+        E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null },
+        E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null },
+        E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null },
+        E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null },
+        E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null },
+        E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null },
+        E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null },
+        E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null },
+        E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null },
+        E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null },
+        E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null },
+        E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null },
+        E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null },
+        E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null },
+        E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null },
+        E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null },
+        E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null },
+        E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 },
+        E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 },
+        E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 },
+        E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 },
+        E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 },
+        E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 },
+        E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 },
+        E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 },
+        E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 },
+        E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 },
+        E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null },
+        E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null },
+        E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null },
+        E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 },
+        E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c },
+        // This is a guess based on https://www.notebookcheck.net/Qualcomm-Snapdragon-8cx-Gen-3-Processor-Benchmarks-and-Specs.652916.0.html
+        E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c },
+        E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 },
+        E{ .part = 0xd02, .m64 = &A64.cortex_a34 },
+        E{ .part = 0xd06, .m64 = &A64.cortex_a65 },
+        E{ .part = 0xd43, .m64 = &A64.cortex_a65ae },
+    };
+    // implementer = 0x42
+    const Broadcom = [_]E{
+        E{ .part = 0x516, .m64 = &A64.thunderx2t99 },
+    };
+    // implementer = 0x43
+    const Cavium = [_]E{
+        E{ .part = 0x0a0, .m64 = &A64.thunderx },
+        E{ .part = 0x0a2, .m64 = &A64.thunderxt81 },
+        E{ .part = 0x0a3, .m64 = &A64.thunderxt83 },
+        E{ .part = 0x0a1, .m64 = &A64.thunderxt88 },
+        E{ .part = 0x0af, .m64 = &A64.thunderx2t99 },
+    };
+    // implementer = 0x46
+    const Fujitsu = [_]E{
+        E{ .part = 0x001, .m64 = &A64.a64fx },
+    };
+    // implementer = 0x48
+    const HiSilicon = [_]E{
+        E{ .part = 0xd01, .m64 = &A64.tsv110 },
+    };
+    // implementer = 0x4e
+    const Nvidia = [_]E{
+        E{ .part = 0x004, .m64 = &A64.carmel },
+    };
+    // implementer = 0x50
+    const Ampere = [_]E{
+        E{ .part = 0x000, .variant = 3, .m64 = &A64.emag },
+        E{ .part = 0x000, .m64 = &A64.xgene1 },
+    };
+    // implementer = 0x51
+    const Qualcomm = [_]E{
+        E{ .part = 0x06f, .m32 = &A32.krait },
+        E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo },
+        E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo },
+        E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo },
+        E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
+        E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
+        E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
+        E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
+        E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
+        E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
+        E{ .part = 0xc00, .m64 = &A64.falkor },
+        E{ .part = 0xc01, .m64 = &A64.saphira },
+    };
+
+    pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const std.Target.Cpu.Model {
+        const models = switch (core.implementer) {
+            0x41 => &ARM,
+            0x42 => &Broadcom,
+            0x43 => &Cavium,
+            0x46 => &Fujitsu,
+            0x48 => &HiSilicon,
+            0x50 => &Ampere,
+            0x51 => &Qualcomm,
+            else => return null,
+        };
+
+        for (models) |model| {
+            if (model.part == core.part and
+                (model.variant == null or model.variant.? == core.variant))
+                return if (is_64bit) model.m64 else model.m32;
+        }
+
+        return null;
+    }
+};
--- a/lib/std/zig/system/linux.zig
+++ b/lib/std/zig/system/linux.zig
@@ -159,129 +159,7 @@ const ArmCpuinfoImpl = struct {
        is_really_v6: bool = false,
    };

-    const cpu_models = struct {
-        // Shorthands to simplify the tables below.
-        const A32 = Target.arm.cpu;
-        const A64 = Target.aarch64.cpu;
-
-        const E = struct {
-            part: u16,
-            variant: ?u8 = null, // null if matches any variant
-            m32: ?*const Target.Cpu.Model = null,
-            m64: ?*const Target.Cpu.Model = null,
-        };
-
-        // implementer = 0x41
-        const ARM = [_]E{
-            E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null },
-            E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null },
-            E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null },
-            E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null },
-            E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null },
-            E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null },
-            E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null },
-            E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null },
-            E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null },
-            E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null },
-            E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null },
-            E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null },
-            E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null },
-            E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null },
-            E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null },
-            E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null },
-            E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null },
-            E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null },
-            E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null },
-            E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null },
-            E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null },
-            E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null },
-            E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null },
-            E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 },
-            E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 },
-            E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 },
-            E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 },
-            E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 },
-            E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 },
-            E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 },
-            E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 },
-            E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 },
-            E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 },
-            E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null },
-            E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null },
-            E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null },
-            E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 },
-            E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c },
-            E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 },
-            E{ .part = 0xd02, .m64 = &A64.cortex_a34 },
-            E{ .part = 0xd06, .m64 = &A64.cortex_a65 },
-            E{ .part = 0xd43, .m64 = &A64.cortex_a65ae },
-        };
-        // implementer = 0x42
-        const Broadcom = [_]E{
-            E{ .part = 0x516, .m64 = &A64.thunderx2t99 },
-        };
-        // implementer = 0x43
-        const Cavium = [_]E{
-            E{ .part = 0x0a0, .m64 = &A64.thunderx },
-            E{ .part = 0x0a2, .m64 = &A64.thunderxt81 },
-            E{ .part = 0x0a3, .m64 = &A64.thunderxt83 },
-            E{ .part = 0x0a1, .m64 = &A64.thunderxt88 },
-            E{ .part = 0x0af, .m64 = &A64.thunderx2t99 },
-        };
-        // implementer = 0x46
-        const Fujitsu = [_]E{
-            E{ .part = 0x001, .m64 = &A64.a64fx },
-        };
-        // implementer = 0x48
-        const HiSilicon = [_]E{
-            E{ .part = 0xd01, .m64 = &A64.tsv110 },
-        };
-        // implementer = 0x4e
-        const Nvidia = [_]E{
-            E{ .part = 0x004, .m64 = &A64.carmel },
-        };
-        // implementer = 0x50
-        const Ampere = [_]E{
-            E{ .part = 0x000, .variant = 3, .m64 = &A64.emag },
-            E{ .part = 0x000, .m64 = &A64.xgene1 },
-        };
-        // implementer = 0x51
-        const Qualcomm = [_]E{
-            E{ .part = 0x06f, .m32 = &A32.krait },
-            E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo },
-            E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo },
-            E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo },
-            E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
-            E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 },
-            E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
-            E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 },
-            E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
-            E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 },
-            E{ .part = 0xc00, .m64 = &A64.falkor },
-            E{ .part = 0xc01, .m64 = &A64.saphira },
-        };
-
-        fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model {
-            const models = switch (core.implementer) {
-                0x41 => &ARM,
-                0x42 => &Broadcom,
-                0x43 => &Cavium,
-                0x46 => &Fujitsu,
-                0x48 => &HiSilicon,
-                0x50 => &Ampere,
-                0x51 => &Qualcomm,
-                else => return null,
-            };
-
-            for (models) |model| {
-                if (model.part == core.part and
-                    (model.variant == null or model.variant.? == core.variant))
-                    return if (is_64bit) model.m64 else model.m32;
-            }
-
-            return null;
-        }
-    };
+    const cpu_models = @import("arm.zig").cpu_models;

    fn addOne(self: *ArmCpuinfoImpl) void {
        if (self.have_fields == 4 and self.core_no < self.cores.len) {
@@ -346,7 +224,12 @@ const ArmCpuinfoImpl = struct {

        var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined;
        for (self.cores[0..self.core_no]) |core, i| {
-            known_models[i] = cpu_models.isKnown(core, is_64bit);
+            known_models[i] = cpu_models.isKnown(.{
+                .architecture = core.architecture,
+                .implementer = core.implementer,
+                .variant = core.variant,
+                .part = core.part,
+            }, is_64bit);
        }

        // XXX We pick the first core on big.LITTLE systems, hopefully the
--- a/lib/std/zig/system/windows.zig
+++ b/lib/std/zig/system/windows.zig
@@ -45,54 +45,6 @@ pub fn detectRuntimeVersion() WindowsVersion {
    return @intToEnum(WindowsVersion, version);
 }

-const Armv8CpuInfoImpl = struct {
-    cores: [8]*const Target.Cpu.Model = undefined,
-    core_no: usize = 0,
-
-    const cpu_family_models = .{
-        // Family, Model, Revision
-        .{ 8, "D4C", 0, &Target.aarch64.cpu.microsoft_sq3 },
-    };
-
-    fn parseOne(self: *Armv8CpuInfoImpl, identifier: []const u8) void {
-        if (mem.indexOf(u8, identifier, "ARMv8") == null) return; // Sanity check
-
-        var family: ?usize = null;
-        var model: ?[]const u8 = null;
-        var revision: ?usize = null;
-
-        var tokens = mem.tokenize(u8, identifier, " ");
-        while (tokens.next()) |token| {
-            if (mem.eql(u8, token, "Family")) {
-                const raw = tokens.next() orelse continue;
-                family = std.fmt.parseInt(usize, raw, 10) catch null;
-            }
-            if (mem.eql(u8, token, "Model")) {
-                model = tokens.next();
-            }
-            if (mem.eql(u8, token, "Revision")) {
-                const raw = tokens.next() orelse continue;
-                revision = std.fmt.parseInt(usize, raw, 10) catch null;
-            }
-        }
-
-        if (family == null or model == null or revision == null) return;
-
-        inline for (cpu_family_models) |set| {
-            if (set[0] == family.? and mem.eql(u8, set[1], model.?) and set[2] == revision.?) {
-                self.cores[self.core_no] = set[3];
-                self.core_no += 1;
-                break;
-            }
-        }
-    }
-
-    fn finalize(self: Armv8CpuInfoImpl) ?*const Target.Cpu.Model {
-        if (self.core_no != 8) return null; // Implies we have seen a core we don't know much about
-        return self.cores[0];
-    }
-};
-
 // Technically, a registry value can be as long as 1MB. However, MS recommends storing
 // values larger than 2048 bytes in a file rather than directly in the registry, and since we
 // are only accessing a system hive \Registry\Machine, we stick to MS guidelines.
@@ -169,44 +121,16 @@ fn getCpuInfoFromRegistry(
                else => unreachable,
            }
        };
-        const default: struct { ptr: *anyopaque, len: u32 } = blk: {
-            switch (pair.value) {
-                REG.SZ,
-                REG.EXPAND_SZ,
-                REG.MULTI_SZ,
-                => {
-                    const def = std.unicode.utf8ToUtf16LeStringLiteral("Unknown");
-                    var buf: [def.len + 1]u16 = undefined;
-                    mem.copy(u16, &buf, def);
-                    buf[def.len] = 0;
-                    break :blk .{ .ptr = &buf, .len = @intCast(u32, (buf.len + 1) * 2) };
-                },
-
-                REG.DWORD,
-                REG.DWORD_BIG_ENDIAN,
-                => {
-                    var buf: [4]u8 = [_]u8{0} ** 4;
-                    break :blk .{ .ptr = &buf, .len = 4 };
-                },
-
-                REG.QWORD => {
-                    var buf: [8]u8 = [_]u8{0} ** 8;
-                    break :blk .{ .ptr = &buf, .len = 8 };
-                },
-
-                else => unreachable,
-            }
-        };
-        const key_name = std.unicode.utf8ToUtf16LeStringLiteral(pair.key);
+        const key_namee = std.unicode.utf8ToUtf16LeStringLiteral(pair.key);

        table[i + 1] = .{
            .QueryRoutine = null,
-            .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT,
-            .Name = @intToPtr([*:0]u16, @ptrToInt(key_name)),
+            .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED,
+            .Name = @intToPtr([*:0]u16, @ptrToInt(key_namee)),
            .EntryContext = ctx,
-            .DefaultType = pair.value,
-            .DefaultData = default.ptr,
-            .DefaultLength = default.len,
+            .DefaultType = REG.NONE,
+            .DefaultData = null,
+            .DefaultLength = 0,
        };
    }

@@ -261,102 +185,177 @@ fn getCpuInfoFromRegistry(
                else => unreachable,
            };
        },
-        else => return std.os.windows.unexpectedStatus(res),
+        else => return error.Unexpected,
    }
 }

-fn detectCpuModelArm64() !*const Target.Cpu.Model {
-    // Pull the CPU identifier from the registry.
-    // Assume max number of cores to be at 8.
-    const max_cpu_count = 8;
-    const cpu_count = getCpuCount();
-
-    if (cpu_count > max_cpu_count) return error.TooManyCpus;
-
-    // Parse the models from strings
-    var parser = Armv8CpuInfoImpl{};
-
-    var out_buf: [3][max_value_len]u8 = undefined;
-
-    var i: usize = 0;
-    while (i < cpu_count) : (i += 1) {
-        try getCpuInfoFromRegistry(i, 3, .{
-            .{ .key = "CP 4000", .value = REG.QWORD },
-            .{ .key = "Identifier", .value = REG.SZ },
-            .{ .key = "VendorIdentifier", .value = REG.SZ },
-        }, &out_buf);
-
-        const hex = out_buf[0][0..8];
-        const identifier = mem.sliceTo(out_buf[1][0..], 0);
-        const vendor_identifier = mem.sliceTo(out_buf[2][0..], 0);
-        std.log.warn("{d} => {x}, {s}, {s}", .{ i, std.fmt.fmtSliceHexLower(hex), identifier, vendor_identifier });
-    }
-
-    return parser.finalize() orelse Target.Cpu.Model.generic(.aarch64);
-}
-
-fn detectNativeCpuAndFeaturesArm64() Target.Cpu {
-    const Feature = Target.aarch64.Feature;
-
-    const model = detectCpuModelArm64() catch Target.Cpu.Model.generic(.aarch64);
-
-    var cpu = Target.Cpu{
-        .arch = .aarch64,
-        .model = model,
-        .features = model.features,
-    };
-
-    // Override any features that are either present or absent
-    if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) {
-        cpu.features.addFeature(@enumToInt(Feature.neon));
-    } else {
-        cpu.features.removeFeature(@enumToInt(Feature.neon));
-    }
-
-    if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) {
-        cpu.features.addFeature(@enumToInt(Feature.crc));
-    } else {
-        cpu.features.removeFeature(@enumToInt(Feature.crc));
-    }
-
-    if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
-        cpu.features.addFeature(@enumToInt(Feature.crypto));
-    } else {
-        cpu.features.removeFeature(@enumToInt(Feature.crypto));
-    }
-
-    if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) {
-        cpu.features.addFeature(@enumToInt(Feature.lse));
-    } else {
-        cpu.features.removeFeature(@enumToInt(Feature.lse));
-    }
-
-    if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
-        cpu.features.addFeature(@enumToInt(Feature.dotprod));
-    } else {
-        cpu.features.removeFeature(@enumToInt(Feature.dotprod));
-    }
-
-    if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) {
-        cpu.features.addFeature(@enumToInt(Feature.jsconv));
-    } else {
-        cpu.features.removeFeature(@enumToInt(Feature.jsconv));
-    }
-
-    return cpu;
-}
-
 fn getCpuCount() usize {
    return std.os.windows.peb().NumberOfProcessors;
 }

-pub fn detectNativeCpuAndFeatures() ?Target.Cpu {
-    switch (builtin.cpu.arch) {
-        .aarch64 => return detectNativeCpuAndFeaturesArm64(),
-        else => |arch| return .{
+const ArmCpuInfoImpl = struct {
+    cores: [4]CoreInfo = undefined,
+    core_no: usize = 0,
+    have_fields: usize = 0,
+
+    const CoreInfo = @import("arm.zig").CoreInfo;
+    const cpu_models = @import("arm.zig").cpu_models;
+
+    const Data = struct {
+        cp_4000: []const u8,
+        identifier: []const u8,
+    };
+
+    fn parseDataHook(self: *ArmCpuInfoImpl, data: Data) !void {
+        const info = &self.cores[self.core_no];
+        info.* = .{};
+
+        // CPU part
+        info.part = mem.readIntLittle(u16, data.cp_4000[0..2]) >> 4;
+        self.have_fields += 1;
+
+        // CPU implementer
+        info.implementer = data.cp_4000[3];
+        self.have_fields += 1;
+
+        var tokens = mem.tokenize(u8, data.identifier, " ");
+        while (tokens.next()) |token| {
+            if (mem.eql(u8, "Family", token)) {
+                // CPU architecture
+                const family = tokens.next() orelse continue;
+                info.architecture = try std.fmt.parseInt(u8, family, 10);
+                self.have_fields += 1;
+                break;
+            }
+        } else return;
+
+        self.addOne();
+    }
+
+    fn addOne(self: *ArmCpuInfoImpl) void {
+        if (self.have_fields == 3 and self.core_no < self.cores.len) {
+            if (self.core_no > 0) {
+                // Deduplicate the core info.
+                for (self.cores[0..self.core_no]) |it| {
+                    if (std.meta.eql(it, self.cores[self.core_no]))
+                        return;
+                }
+            }
+            self.core_no += 1;
+        }
+    }
+
+    fn finalize(self: ArmCpuInfoImpl, arch: Target.Cpu.Arch) ?Target.Cpu {
+        if (self.core_no == 0) return null;
+
+        const is_64bit = switch (arch) {
+            .aarch64, .aarch64_be, .aarch64_32 => true,
+            else => false,
+        };
+
+        var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined;
+        for (self.cores[0..self.core_no]) |core, i| {
+            known_models[i] = cpu_models.isKnown(core, is_64bit);
+        }
+
+        // XXX We pick the first core on big.LITTLE systems, hopefully the
+        // LITTLE one.
+        const model = known_models[0] orelse return null;
+        return Target.Cpu{
            .arch = arch,
-            .model = Target.Cpu.Model.generic(arch),
-            .features = Target.Cpu.Feature.Set.empty,
+            .model = model,
+            .features = model.features,
+        };
+    }
+};
+
+const ArmCpuInfoParser = CpuInfoParser(ArmCpuInfoImpl);
+
+fn CpuInfoParser(comptime impl: anytype) type {
+    return struct {
+        fn parse(arch: Target.Cpu.Arch) !?Target.Cpu {
+            var obj: impl = .{};
+            var out_buf: [2][max_value_len]u8 = undefined;
+
+            var i: usize = 0;
+            while (i < getCpuCount()) : (i += 1) {
+                try getCpuInfoFromRegistry(i, 2, .{
+                    .{ .key = "CP 4000", .value = REG.QWORD },
+                    .{ .key = "Identifier", .value = REG.SZ },
+                }, &out_buf);
+
+                const cp_4000 = out_buf[0][0..8];
+                const identifier = mem.sliceTo(out_buf[1][0..], 0);
+
+                try obj.parseDataHook(.{
+                    .cp_4000 = cp_4000,
+                    .identifier = identifier,
+                });
+            }
+
+            return obj.finalize(arch);
+        }
+    };
+}
+
+fn genericCpu(comptime arch: Target.Cpu.Arch) Target.Cpu {
+    return .{
+        .arch = arch,
+        .model = Target.Cpu.Model.generic(arch),
+        .features = Target.Cpu.Feature.Set.empty,
+    };
+}
+
+pub fn detectNativeCpuAndFeatures() ?Target.Cpu {
+    const current_arch = builtin.cpu.arch;
+    switch (current_arch) {
+        .aarch64, .aarch64_be, .aarch64_32 => {
+            var cpu = cpu: {
+                var maybe_cpu = ArmCpuInfoParser.parse(current_arch) catch break :cpu genericCpu(current_arch);
+                break :cpu maybe_cpu orelse genericCpu(current_arch);
+            };
+
+            const Feature = Target.aarch64.Feature;
+
+            // Override any features that are either present or absent
+            if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.neon));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.neon));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.crc));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.crc));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.crypto));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.crypto));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.lse));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.lse));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.dotprod));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.dotprod));
+            }
+
+            if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) {
+                cpu.features.addFeature(@enumToInt(Feature.jsconv));
+            } else {
+                cpu.features.removeFeature(@enumToInt(Feature.jsconv));
+            }
+
+            return cpu;
        },
+        else => {},
    }
 }