From 83ab57b7c42bbd7fb26a05ea11b0f21cee88894f Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 28 Jan 2026 00:56:52 -0500 Subject: [PATCH 1/9] Merge commit 'd9aae8cc544c5f524a12b5f3b2f3c3745c34dd74' into sync-from-portable-simd-2026-01-28 --- .github/workflows/ci.yml | 23 +- Cargo.lock | 273 ++++++++++++---- beginners-guide.md | 2 +- crates/core_simd/examples/dot_product.rs | 34 +- crates/core_simd/examples/matrix_inversion.rs | 2 +- crates/core_simd/src/fmt.rs | 3 +- crates/core_simd/src/iter.rs | 10 +- crates/core_simd/src/lane_count.rs | 40 --- crates/core_simd/src/lib.rs | 9 +- crates/core_simd/src/masks.rs | 180 ++++++----- crates/core_simd/src/masks/bitmask.rs | 228 -------------- crates/core_simd/src/masks/full_masks.rs | 296 ------------------ crates/core_simd/src/mod.rs | 3 +- crates/core_simd/src/ops.rs | 5 +- crates/core_simd/src/ops/assign.rs | 1 - crates/core_simd/src/ops/deref.rs | 3 - crates/core_simd/src/ops/shift_scalar.rs | 10 +- crates/core_simd/src/ops/unary.rs | 4 +- crates/core_simd/src/select.rs | 189 ++++++++--- crates/core_simd/src/simd/cmp/eq.rs | 24 +- crates/core_simd/src/simd/cmp/ord.rs | 60 ++-- crates/core_simd/src/simd/num/float.rs | 10 +- crates/core_simd/src/simd/num/int.rs | 14 +- crates/core_simd/src/simd/num/uint.rs | 10 +- crates/core_simd/src/simd/ptr/const_ptr.rs | 9 +- crates/core_simd/src/simd/ptr/mut_ptr.rs | 9 +- crates/core_simd/src/swizzle.rs | 62 ++-- crates/core_simd/src/swizzle_dyn.rs | 21 +- crates/core_simd/src/to_bytes.rs | 4 +- crates/core_simd/src/vector.rs | 105 ++----- crates/core_simd/src/vendor/loongarch64.rs | 45 ++- crates/core_simd/src/vendor/wasm32.rs | 14 - crates/core_simd/src/vendor/x86.rs | 22 -- crates/core_simd/tests/masks.rs | 4 +- crates/std_float/src/lib.rs | 110 +++---- crates/std_float/tests/float.rs | 29 +- crates/test_helpers/Cargo.toml | 1 + crates/test_helpers/src/approxeq.rs | 110 +++++++ crates/test_helpers/src/lib.rs | 80 ++++- rust-toolchain.toml | 2 +- 40 files changed, 873 insertions(+), 1187 deletions(-) delete mode 100644 crates/core_simd/src/lane_count.rs delete mode 100644 crates/core_simd/src/masks/bitmask.rs delete mode 100644 crates/core_simd/src/masks/full_masks.rs create mode 100644 crates/test_helpers/src/approxeq.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3984d8f0d8d99..de7efa3552836 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,7 @@ jobs: strategy: fail-fast: false matrix: - target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu] + target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, x86_64-unknown-linux-gnu] # `default` means we use the default target config for the target, # `native` means we run with `-Ctarget-cpu=native`, and anything else is # an arg to `-Ctarget-feature` @@ -68,18 +68,12 @@ jobs: exclude: # -Ctarget-cpu=native sounds like bad-news if target != host - { target: i686-pc-windows-msvc, target_feature: native } - - { target: i586-pc-windows-msvc, target_feature: native } include: # Populate the `matrix.os` field - { target: x86_64-unknown-linux-gnu, os: ubuntu-latest } - { target: x86_64-pc-windows-msvc, os: windows-latest } - { target: i686-pc-windows-msvc, os: windows-latest } - - { target: i586-pc-windows-msvc, os: windows-latest } - - # These are globally available on all the other targets. - - { target: i586-pc-windows-msvc, target_feature: +sse, os: windows-latest } - - { target: i586-pc-windows-msvc, target_feature: +sse2, os: windows-latest } # Annoyingly, the x86_64-unknown-linux-gnu runner *almost* always has # avx512vl, but occasionally doesn't. Maybe one day we can enable it. @@ -129,7 +123,7 @@ jobs: run: cargo doc --verbose --target=${{ matrix.target }} env: RUSTDOCFLAGS: -Dwarnings - + macos-tests: name: ${{ matrix.target }} runs-on: macos-latest @@ -246,9 +240,18 @@ jobs: miri: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + shard: [1, 2, 3, 4] env: PROPTEST_CASES: 16 steps: - uses: actions/checkout@v4 - - name: Test (Miri) - run: cargo miri test + + - name: Install cargo-nextest + uses: taiki-e/install-action@nextest + + - name: Test (Miri) (partition ${{ matrix.shard }}/4) + run: | + cargo miri nextest run --partition count:${{ matrix.shard }}/4 diff --git a/Cargo.lock b/Cargo.lock index 1584c704fb221..5a5f0d8907ae3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,12 +1,12 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "autocfg" -version = "1.1.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" @@ -16,31 +16,30 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] -name = "cfg-if" -version = "1.0.0" +name = "cc" +version = "1.2.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" +dependencies = [ + "shlex", +] [[package]] -name = "console_error_panic_hook" -version = "0.1.7" +name = "cfg-if" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" -dependencies = [ - "cfg-if", - "wasm-bindgen", -] +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "core_simd" @@ -53,47 +52,70 @@ dependencies = [ "wasm-bindgen-test", ] +[[package]] +name = "float-cmp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" +dependencies = [ + "num-traits", +] + [[package]] name = "js-sys" -version = "0.3.64" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] [[package]] name = "log" -version = "0.4.20" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "minicov" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b" +dependencies = [ + "cc", + "walkdir", +] [[package]] name = "num-traits" -version = "0.2.16" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "once_cell" -version = "1.18.0" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] [[package]] name = "proc-macro2" -version = "1.0.66" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] @@ -114,9 +136,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -167,10 +189,25 @@ dependencies = [ ] [[package]] -name = "scoped-tls" -version = "1.0.1" +name = "rustversion" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "std_float" @@ -184,9 +221,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.29" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -197,34 +234,46 @@ dependencies = [ name = "test_helpers" version = "0.1.0" dependencies = [ + "float-cmp", "proptest", ] [[package]] name = "unicode-ident" -version = "1.0.11" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "walkdir" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] [[package]] name = "wasm-bindgen" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", + "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn", @@ -233,21 +282,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.37" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -255,9 +305,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -268,19 +318,21 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.87" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "wasm-bindgen-test" -version = "0.3.37" +version = "0.3.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671" +checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3" dependencies = [ - "console_error_panic_hook", "js-sys", - "scoped-tls", + "minicov", "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", @@ -288,20 +340,123 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.37" +version = "0.3.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575" +checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" dependencies = [ "proc-macro2", "quote", + "syn", ] [[package]] name = "web-sys" -version = "0.3.64" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", ] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/beginners-guide.md b/beginners-guide.md index dc08d847ced50..4250a18315a6e 100644 --- a/beginners-guide.md +++ b/beginners-guide.md @@ -25,7 +25,7 @@ SIMD has a few special vocabulary terms you should know: * **Scalar:** "Scalar" in mathematical contexts refers to values that can be represented as a single element, mostly numbers like 6, 3.14, or -2. It can also be used to describe "scalar operations" that use strictly scalar values, like addition. This term is mostly used to differentiate between vectorized operations that use SIMD instructions and scalar operations that don't. -* **Lane:** A single element position within a vector is called a lane. If you have `N` lanes available then they're numbered from `0` to `N-1` when referring to them, again like an array. The biggest difference between an array element and a vector lane is that in general is *relatively costly* to access an individual lane value. On most architectures, the vector has to be pushed out of the SIMD register onto the stack, then an individual lane is accessed while it's on the stack (and possibly the stack value is read back into a register). For this reason, when working with SIMD you should avoid reading or writing the value of an individual lane during hot loops. +* **Lane:** A single element position within a vector is called a lane. If you have `N` lanes available then they're numbered from `0` to `N-1` when referring to them, again like an array. The biggest difference between an array element and a vector lane is that in general it is *relatively costly* to access an individual lane value. On most architectures, the vector has to be pushed out of the SIMD register onto the stack, then an individual lane is accessed while it's on the stack (and possibly the stack value is read back into a register). For this reason, when working with SIMD you should avoid reading or writing the value of an individual lane during hot loops. * **Bit Widths:** When talking about SIMD, the bit widths used are the bit size of the vectors involved, *not* the individual elements. So "128-bit SIMD" has 128-bit vectors, and that might be `f32x4`, `i32x4`, `i16x8`, or other variations. While 128-bit SIMD is the most common, there's also 64-bit, 256-bit, and even 512-bit on the newest CPUs. diff --git a/crates/core_simd/examples/dot_product.rs b/crates/core_simd/examples/dot_product.rs index 75d152ae7f0e3..4ef32bfa60b5e 100644 --- a/crates/core_simd/examples/dot_product.rs +++ b/crates/core_simd/examples/dot_product.rs @@ -1,8 +1,6 @@ //! Code taken from the `packed_simd` crate. //! Run this code with `cargo test --example dot_product`. -#![feature(array_chunks)] -#![feature(slice_as_chunks)] // Add these imports to use the stdsimd library #![feature(portable_simd)] use core_simd::simd::prelude::*; @@ -33,7 +31,7 @@ pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 { } // We now move on to the SIMD implementations: notice the following constructs: -// `array_chunks::<4>`: mapping this over the vector will let use construct SIMD vectors +// `as_chunks::<4>`: mapping this over the vector will let us construct SIMD vectors // `f32x4::from_array`: construct the SIMD vector from a slice // `(a * b).reduce_sum()`: Multiply both f32x4 vectors together, and then reduce them. // This approach essentially uses SIMD to produce a vector of length N/4 of all the products, @@ -42,9 +40,11 @@ pub fn dot_prod_scalar_1(a: &[f32], b: &[f32]) -> f32 { pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); // TODO handle remainder when a.len() % 4 != 0 - a.array_chunks::<4>() + a.as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .map(|(a, b)| (a * b).reduce_sum()) .sum() } @@ -60,9 +60,11 @@ pub fn dot_prod_simd_0(a: &[f32], b: &[f32]) -> f32 { pub fn dot_prod_simd_1(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); // TODO handle remainder when a.len() % 4 != 0 - a.array_chunks::<4>() + a.as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .fold(f32x4::splat(0.0), |acc, zipped| acc + zipped.0 * zipped.1) .reduce_sum() } @@ -74,9 +76,11 @@ pub fn dot_prod_simd_2(a: &[f32], b: &[f32]) -> f32 { assert_eq!(a.len(), b.len()); // TODO handle remainder when a.len() % 4 != 0 let mut res = f32x4::splat(0.0); - a.array_chunks::<4>() + a.as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .for_each(|(a, b)| { res = a.mul_add(b, res); }); @@ -113,9 +117,11 @@ pub fn dot_prod_simd_3(a: &[f32], b: &[f32]) -> f32 { // next example. pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 { let mut sum = a - .array_chunks::<4>() + .as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .map(|(a, b)| a * b) .fold(f32x4::splat(0.0), std::ops::Add::add) .reduce_sum(); @@ -131,9 +137,11 @@ pub fn dot_prod_simd_4(a: &[f32], b: &[f32]) -> f32 { // This version allocates a single `XMM` register for accumulation, and the folds don't allocate on top of that. // Notice the use of `mul_add`, which can do a multiply and an add operation ber iteration. pub fn dot_prod_simd_5(a: &[f32], b: &[f32]) -> f32 { - a.array_chunks::<4>() + a.as_chunks::<4>() + .0 + .iter() .map(|&a| f32x4::from_array(a)) - .zip(b.array_chunks::<4>().map(|&b| f32x4::from_array(b))) + .zip(b.as_chunks::<4>().0.iter().map(|&b| f32x4::from_array(b))) .fold(f32x4::splat(0.), |acc, (a, b)| a.mul_add(b, acc)) .reduce_sum() } diff --git a/crates/core_simd/examples/matrix_inversion.rs b/crates/core_simd/examples/matrix_inversion.rs index bad86414401d7..ad2eea9153e08 100644 --- a/crates/core_simd/examples/matrix_inversion.rs +++ b/crates/core_simd/examples/matrix_inversion.rs @@ -1,7 +1,7 @@ //! 4x4 matrix inverse // Code ported from the `packed_simd` crate // Run this code with `cargo test --example matrix_inversion` -#![feature(array_chunks, portable_simd)] +#![feature(portable_simd)] use core_simd::simd::prelude::*; // Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^) diff --git a/crates/core_simd/src/fmt.rs b/crates/core_simd/src/fmt.rs index 3a540f5a04908..90c520e75bb39 100644 --- a/crates/core_simd/src/fmt.rs +++ b/crates/core_simd/src/fmt.rs @@ -1,9 +1,8 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{Simd, SimdElement}; use core::fmt; impl fmt::Debug for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + fmt::Debug, { /// A `Simd` has a debug format like the one for `[T]`: diff --git a/crates/core_simd/src/iter.rs b/crates/core_simd/src/iter.rs index b3732fd74d5f6..fdc458efeda48 100644 --- a/crates/core_simd/src/iter.rs +++ b/crates/core_simd/src/iter.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::Simd; use core::{ iter::{Product, Sum}, ops::{Add, Mul}, @@ -7,8 +7,6 @@ use core::{ macro_rules! impl_traits { { $type:ty } => { impl Sum for Simd<$type, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn sum>(iter: I) -> Self { @@ -17,8 +15,6 @@ macro_rules! impl_traits { } impl Product for Simd<$type, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn product>(iter: I) -> Self { @@ -27,8 +23,6 @@ macro_rules! impl_traits { } impl<'a, const N: usize> Sum<&'a Self> for Simd<$type, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn sum>(iter: I) -> Self { @@ -37,8 +31,6 @@ macro_rules! impl_traits { } impl<'a, const N: usize> Product<&'a Self> for Simd<$type, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn product>(iter: I) -> Self { diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs deleted file mode 100644 index bbdfd5f5f3ed3..0000000000000 --- a/crates/core_simd/src/lane_count.rs +++ /dev/null @@ -1,40 +0,0 @@ -mod sealed { - pub trait Sealed {} -} -use sealed::Sealed; - -/// Specifies the number of lanes in a SIMD vector as a type. -pub struct LaneCount; - -impl LaneCount { - /// The number of bytes in a bitmask with this many lanes. - pub const BITMASK_LEN: usize = N.div_ceil(8); -} - -/// Statically guarantees that a lane count is marked as supported. -/// -/// This trait is *sealed*: the list of implementors below is total. -/// Users do not have the ability to mark additional `LaneCount` values as supported. -/// Only SIMD vectors with supported lane counts are constructable. -pub trait SupportedLaneCount: Sealed { - #[doc(hidden)] - type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>; -} - -impl Sealed for LaneCount {} - -macro_rules! supported_lane_count { - ($($lanes:literal),+) => { - $( - impl SupportedLaneCount for LaneCount<$lanes> { - type BitMask = [u8; ($lanes + 7) / 8]; - } - )+ - }; -} - -supported_lane_count!( - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64 -); diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index 717b882b64ba1..fe26d99b9194c 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -9,7 +9,8 @@ simd_ffi, staged_api, prelude_import, - ptr_metadata + ptr_metadata, + rustc_attrs )] #![cfg_attr( all( @@ -30,10 +31,6 @@ any(target_arch = "powerpc", target_arch = "powerpc64"), feature(stdarch_powerpc) )] -#![cfg_attr( - all(target_arch = "x86_64", target_feature = "avx512f"), - feature(stdarch_x86_avx512) -)] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny( unsafe_op_in_unsafe_fn, @@ -41,7 +38,7 @@ clippy::undocumented_unsafe_blocks )] #![doc(test(attr(deny(warnings))))] -#![allow(internal_features)] +#![allow(internal_features, clippy::repr_packed_without_abi)] #![unstable(feature = "portable_simd", issue = "86656")] //! Portable SIMD module. diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 19d45f4d3b31a..3e2209556b66b 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -2,20 +2,33 @@ //! Types representing #![allow(non_camel_case_types)] -#[cfg_attr( - not(all(target_arch = "x86_64", target_feature = "avx512f")), - path = "masks/full_masks.rs" -)] -#[cfg_attr( - all(target_arch = "x86_64", target_feature = "avx512f"), - path = "masks/bitmask.rs" -)] -mod mask_impl; - -use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount}; +use crate::simd::{Select, Simd, SimdCast, SimdElement}; use core::cmp::Ordering; use core::{fmt, mem}; +pub(crate) trait FixEndianness { + fn fix_endianness(self) -> Self; +} + +macro_rules! impl_fix_endianness { + { $($int:ty),* } => { + $( + impl FixEndianness for $int { + #[inline(always)] + fn fix_endianness(self) -> Self { + if cfg!(target_endian = "big") { + <$int>::reverse_bits(self) + } else { + self + } + } + } + )* + } +} + +impl_fix_endianness! { u8, u16, u32, u64 } + mod sealed { use super::*; @@ -28,7 +41,6 @@ mod sealed { pub trait Sealed { fn valid(values: Simd) -> bool where - LaneCount: SupportedLaneCount, Self: SimdElement; fn eq(self, other: Self) -> bool; @@ -56,8 +68,6 @@ macro_rules! impl_element { impl Sealed for $ty { #[inline] fn valid(value: Simd) -> bool - where - LaneCount: SupportedLaneCount, { // We can't use `Simd` directly, because `Simd`'s functions call this function and // we will end up with an infinite loop. @@ -108,23 +118,19 @@ impl_element! { isize, usize } /// The layout of this type is unspecified, and may change between platforms /// and/or Rust versions, and code should not assume that it is equivalent to /// `[T; N]`. +/// +/// `N` cannot be 0 and may be at most 64. This limit may be increased in +/// the future. #[repr(transparent)] -pub struct Mask(mask_impl::Mask) +pub struct Mask(Simd) where - T: MaskElement, - LaneCount: SupportedLaneCount; + T: MaskElement; -impl Copy for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} +impl Copy for Mask where T: MaskElement {} impl Clone for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn clone(&self) -> Self { @@ -135,12 +141,12 @@ where impl Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { /// Constructs a mask by setting all elements to the given value. #[inline] - pub fn splat(value: bool) -> Self { - Self(mask_impl::Mask::splat(value)) + #[rustc_const_unstable(feature = "portable_simd", issue = "86656")] + pub const fn splat(value: bool) -> Self { + Self(Simd::splat(if value { T::TRUE } else { T::FALSE })) } /// Converts an array of bools to a SIMD mask. @@ -156,7 +162,7 @@ where let bytes: [u8; N] = mem::transmute_copy(&array); let bools: Simd = core::intrinsics::simd::simd_ne(Simd::from_array(bytes), Simd::splat(0u8)); - Mask::from_int_unchecked(core::intrinsics::simd::simd_cast(bools)) + Mask::from_simd_unchecked(core::intrinsics::simd::simd_cast(bools)) } } @@ -174,7 +180,7 @@ where // This would be hypothetically valid as an "in-place" transmute, // but these are "dependently-sized" types, so copy elision it is! unsafe { - let mut bytes: Simd = core::intrinsics::simd::simd_cast(self.to_int()); + let mut bytes: Simd = core::intrinsics::simd::simd_cast(self.to_simd()); bytes &= Simd::splat(1i8); mem::transmute_copy(&bytes) } @@ -187,12 +193,12 @@ where /// All elements must be either 0 or -1. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] - pub unsafe fn from_int_unchecked(value: Simd) -> Self { + pub unsafe fn from_simd_unchecked(value: Simd) -> Self { // Safety: the caller must confirm this invariant unsafe { core::intrinsics::assume(::valid(value)); - Self(mask_impl::Mask::from_int_unchecked(value)) } + Self(value) } /// Converts a vector of integers to a mask, where 0 represents `false` and -1 @@ -203,25 +209,26 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] #[track_caller] - pub fn from_int(value: Simd) -> Self { + pub fn from_simd(value: Simd) -> Self { assert!(T::valid(value), "all values must be either 0 or -1",); // Safety: the validity has been checked - unsafe { Self::from_int_unchecked(value) } + unsafe { Self::from_simd_unchecked(value) } } /// Converts the mask to a vector of integers, where 0 represents `false` and -1 /// represents `true`. #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - pub fn to_int(self) -> Simd { - self.0.to_int() + pub fn to_simd(self) -> Simd { + self.0 } /// Converts the mask to a mask of any other element size. #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn cast(self) -> Mask { - Mask(self.0.convert()) + // Safety: mask elements are integers + unsafe { Mask(core::intrinsics::simd::simd_as(self.0)) } } /// Tests the value of the specified element. @@ -232,7 +239,7 @@ where #[must_use = "method returns a new bool and does not mutate the original value"] pub unsafe fn test_unchecked(&self, index: usize) -> bool { // Safety: the caller must confirm this invariant - unsafe { self.0.test_unchecked(index) } + unsafe { T::eq(*self.0.as_array().get_unchecked(index), T::TRUE) } } /// Tests the value of the specified element. @@ -243,9 +250,7 @@ where #[must_use = "method returns a new bool and does not mutate the original value"] #[track_caller] pub fn test(&self, index: usize) -> bool { - assert!(index < N, "element index out of range"); - // Safety: the element index has been checked - unsafe { self.test_unchecked(index) } + T::eq(self.0[index], T::TRUE) } /// Sets the value of the specified element. @@ -256,7 +261,7 @@ where pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) { // Safety: the caller must confirm this invariant unsafe { - self.0.set_unchecked(index, value); + *self.0.as_mut_array().get_unchecked_mut(index) = if value { T::TRUE } else { T::FALSE } } } @@ -267,35 +272,65 @@ where #[inline] #[track_caller] pub fn set(&mut self, index: usize, value: bool) { - assert!(index < N, "element index out of range"); - // Safety: the element index has been checked - unsafe { - self.set_unchecked(index, value); - } + self.0[index] = if value { T::TRUE } else { T::FALSE } } /// Returns true if any element is set, or false otherwise. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub fn any(self) -> bool { - self.0.any() + // Safety: `self` is a mask vector + unsafe { core::intrinsics::simd::simd_reduce_any(self.0) } } /// Returns true if all elements are set, or false otherwise. #[inline] #[must_use = "method returns a new bool and does not mutate the original value"] pub fn all(self) -> bool { - self.0.all() + // Safety: `self` is a mask vector + unsafe { core::intrinsics::simd::simd_reduce_all(self.0) } } /// Creates a bitmask from a mask. /// /// Each bit is set if the corresponding element in the mask is `true`. - /// If the mask contains more than 64 elements, the bitmask is truncated to the first 64. #[inline] #[must_use = "method returns a new integer and does not mutate the original value"] pub fn to_bitmask(self) -> u64 { - self.0.to_bitmask_integer() + const { + assert!(N <= 64, "number of elements can't be greater than 64"); + } + + #[inline] + unsafe fn to_bitmask_impl( + mask: Mask, + ) -> U + where + T: MaskElement, + { + let resized = mask.resize::(false); + + // Safety: `resized` is an integer vector with length M, which must match T + let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized.0) }; + + // LLVM assumes bit order should match endianness + bitmask.fix_endianness() + } + + // TODO modify simd_bitmask to zero-extend output, making this unnecessary + if N <= 8 { + // Safety: bitmask matches length + unsafe { to_bitmask_impl::(self) as u64 } + } else if N <= 16 { + // Safety: bitmask matches length + unsafe { to_bitmask_impl::(self) as u64 } + } else if N <= 32 { + // Safety: bitmask matches length + unsafe { to_bitmask_impl::(self) as u64 } + } else { + // Safety: bitmask matches length + unsafe { to_bitmask_impl::(self) } + } } /// Creates a mask from a bitmask. @@ -305,7 +340,7 @@ where #[inline] #[must_use = "method returns a new mask and does not mutate the original value"] pub fn from_bitmask(bitmask: u64) -> Self { - Self(mask_impl::Mask::from_bitmask_integer(bitmask)) + Self(bitmask.select(Simd::splat(T::TRUE), Simd::splat(T::FALSE))) } /// Finds the index of the first set element. @@ -351,7 +386,7 @@ where // Safety: the input and output are integer vectors let index: Simd = unsafe { core::intrinsics::simd::simd_cast(index) }; - let masked_index = self.select(index, Self::splat(true).to_int()); + let masked_index = self.select(index, Self::splat(true).to_simd()); // Safety: the input and output are integer vectors let masked_index: Simd = @@ -376,7 +411,6 @@ where impl From<[bool; N]> for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn from(array: [bool; N]) -> Self { @@ -387,7 +421,6 @@ where impl From> for [bool; N] where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn from(vector: Mask) -> Self { @@ -398,7 +431,6 @@ where impl Default for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn default() -> Self { @@ -409,7 +441,6 @@ where impl PartialEq for Mask where T: MaskElement + PartialEq, - LaneCount: SupportedLaneCount, { #[inline] fn eq(&self, other: &Self) -> bool { @@ -420,7 +451,6 @@ where impl PartialOrd for Mask where T: MaskElement + PartialOrd, - LaneCount: SupportedLaneCount, { #[inline] fn partial_cmp(&self, other: &Self) -> Option { @@ -431,7 +461,6 @@ where impl fmt::Debug for Mask where T: MaskElement + fmt::Debug, - LaneCount: SupportedLaneCount, { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -444,19 +473,18 @@ where impl core::ops::BitAnd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] fn bitand(self, rhs: Self) -> Self { - Self(self.0 & rhs.0) + // Safety: `self` is an integer vector + unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) } } } impl core::ops::BitAnd for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -468,7 +496,6 @@ where impl core::ops::BitAnd> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Mask; #[inline] @@ -480,19 +507,18 @@ where impl core::ops::BitOr for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] fn bitor(self, rhs: Self) -> Self { - Self(self.0 | rhs.0) + // Safety: `self` is an integer vector + unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) } } } impl core::ops::BitOr for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -504,7 +530,6 @@ where impl core::ops::BitOr> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Mask; #[inline] @@ -516,19 +541,18 @@ where impl core::ops::BitXor for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] fn bitxor(self, rhs: Self) -> Self::Output { - Self(self.0 ^ rhs.0) + // Safety: `self` is an integer vector + unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) } } } impl core::ops::BitXor for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -540,7 +564,6 @@ where impl core::ops::BitXor> for bool where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Mask; #[inline] @@ -552,30 +575,27 @@ where impl core::ops::Not for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { type Output = Mask; #[inline] fn not(self) -> Self::Output { - Self(!self.0) + Self::splat(true) ^ self } } impl core::ops::BitAndAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitand_assign(&mut self, rhs: Self) { - self.0 = self.0 & rhs.0; + *self = *self & rhs; } } impl core::ops::BitAndAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitand_assign(&mut self, rhs: bool) { @@ -586,18 +606,16 @@ where impl core::ops::BitOrAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitor_assign(&mut self, rhs: Self) { - self.0 = self.0 | rhs.0; + *self = *self | rhs; } } impl core::ops::BitOrAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitor_assign(&mut self, rhs: bool) { @@ -608,18 +626,16 @@ where impl core::ops::BitXorAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitxor_assign(&mut self, rhs: Self) { - self.0 = self.0 ^ rhs.0; + *self = *self ^ rhs; } } impl core::ops::BitXorAssign for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { #[inline] fn bitxor_assign(&mut self, rhs: bool) { @@ -631,8 +647,6 @@ macro_rules! impl_from { { $from:ty => $($to:ty),* } => { $( impl From> for Mask<$to, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn from(value: Mask<$from, N>) -> Self { diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs deleted file mode 100644 index 32d37b5533926..0000000000000 --- a/crates/core_simd/src/masks/bitmask.rs +++ /dev/null @@ -1,228 +0,0 @@ -#![allow(unused_imports)] -use super::MaskElement; -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; -use core::marker::PhantomData; - -/// A mask where each lane is represented by a single bit. -#[repr(transparent)] -pub(crate) struct Mask( - as SupportedLaneCount>::BitMask, - PhantomData, -) -where - T: MaskElement, - LaneCount: SupportedLaneCount; - -impl Copy for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} - -impl Clone for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn clone(&self) -> Self { - *self - } -} - -impl PartialEq for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn eq(&self, other: &Self) -> bool { - self.0.as_ref() == other.0.as_ref() - } -} - -impl PartialOrd for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - self.0.as_ref().partial_cmp(other.0.as_ref()) - } -} - -impl Eq for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} - -impl Ord for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn cmp(&self, other: &Self) -> core::cmp::Ordering { - self.0.as_ref().cmp(other.0.as_ref()) - } -} - -impl Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) fn splat(value: bool) -> Self { - let mut mask = as SupportedLaneCount>::BitMask::default(); - if value { - mask.as_mut().fill(u8::MAX) - } else { - mask.as_mut().fill(u8::MIN) - } - if N % 8 > 0 { - *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8); - } - Self(mask, PhantomData) - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool { - (self.0.as_ref()[lane / 8] >> (lane % 8)) & 0x1 > 0 - } - - #[inline] - pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { - unsafe { - self.0.as_mut()[lane / 8] ^= ((value ^ self.test_unchecked(lane)) as u8) << (lane % 8) - } - } - - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub(crate) fn to_int(self) -> Simd { - unsafe { - core::intrinsics::simd::simd_select_bitmask( - self.0, - Simd::splat(T::TRUE), - Simd::splat(T::FALSE), - ) - } - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) unsafe fn from_int_unchecked(value: Simd) -> Self { - unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) } - } - - #[inline] - pub(crate) fn to_bitmask_integer(self) -> u64 { - let mut bitmask = [0u8; 8]; - bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref()); - u64::from_ne_bytes(bitmask) - } - - #[inline] - pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { - let mut bytes = as SupportedLaneCount>::BitMask::default(); - let len = bytes.as_mut().len(); - bytes - .as_mut() - .copy_from_slice(&bitmask.to_ne_bytes()[..len]); - Self(bytes, PhantomData) - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) fn convert(self) -> Mask - where - U: MaskElement, - { - // Safety: bitmask layout does not depend on the element width - unsafe { core::mem::transmute_copy(&self) } - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) fn any(self) -> bool { - self != Self::splat(false) - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) fn all(self) -> bool { - self == Self::splat(true) - } -} - -impl core::ops::BitAnd for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, - as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, -{ - type Output = Self; - #[inline] - fn bitand(mut self, rhs: Self) -> Self { - for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { - *l &= r; - } - self - } -} - -impl core::ops::BitOr for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, - as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>, -{ - type Output = Self; - #[inline] - fn bitor(mut self, rhs: Self) -> Self { - for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { - *l |= r; - } - self - } -} - -impl core::ops::BitXor for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn bitxor(mut self, rhs: Self) -> Self::Output { - for (l, r) in self.0.as_mut().iter_mut().zip(rhs.0.as_ref().iter()) { - *l ^= r; - } - self - } -} - -impl core::ops::Not for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn not(mut self) -> Self::Output { - for x in self.0.as_mut() { - *x = !*x; - } - if N % 8 > 0 { - *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8); - } - self - } -} diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs deleted file mode 100644 index 4e98db4070a9d..0000000000000 --- a/crates/core_simd/src/masks/full_masks.rs +++ /dev/null @@ -1,296 +0,0 @@ -//! Masks that take up full SIMD vector registers. - -use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount}; - -#[repr(transparent)] -pub(crate) struct Mask(Simd) -where - T: MaskElement, - LaneCount: SupportedLaneCount; - -impl Copy for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ -} - -impl Clone for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn clone(&self) -> Self { - *self - } -} - -impl PartialEq for Mask -where - T: MaskElement + PartialEq, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn eq(&self, other: &Self) -> bool { - self.0.eq(&other.0) - } -} - -impl PartialOrd for Mask -where - T: MaskElement + PartialOrd, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - self.0.partial_cmp(&other.0) - } -} - -impl Eq for Mask -where - T: MaskElement + Eq, - LaneCount: SupportedLaneCount, -{ -} - -impl Ord for Mask -where - T: MaskElement + Ord, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn cmp(&self, other: &Self) -> core::cmp::Ordering { - self.0.cmp(&other.0) - } -} - -// Used for bitmask bit order workaround -pub(crate) trait ReverseBits { - // Reverse the least significant `n` bits of `self`. - // (Remaining bits must be 0.) - fn reverse_bits(self, n: usize) -> Self; -} - -macro_rules! impl_reverse_bits { - { $($int:ty),* } => { - $( - impl ReverseBits for $int { - #[inline(always)] - fn reverse_bits(self, n: usize) -> Self { - let rev = <$int>::reverse_bits(self); - let bitsize = size_of::<$int>() * 8; - if n < bitsize { - // Shift things back to the right - rev >> (bitsize - n) - } else { - rev - } - } - } - )* - } -} - -impl_reverse_bits! { u8, u16, u32, u64 } - -impl Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) fn splat(value: bool) -> Self { - Self(Simd::splat(if value { T::TRUE } else { T::FALSE })) - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) unsafe fn test_unchecked(&self, lane: usize) -> bool { - T::eq(self.0[lane], T::TRUE) - } - - #[inline] - pub(crate) unsafe fn set_unchecked(&mut self, lane: usize, value: bool) { - self.0[lane] = if value { T::TRUE } else { T::FALSE } - } - - #[inline] - #[must_use = "method returns a new vector and does not mutate the original value"] - pub(crate) fn to_int(self) -> Simd { - self.0 - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) unsafe fn from_int_unchecked(value: Simd) -> Self { - Self(value) - } - - #[inline] - #[must_use = "method returns a new mask and does not mutate the original value"] - pub(crate) fn convert(self) -> Mask - where - U: MaskElement, - { - // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type. - unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) } - } - - #[inline] - unsafe fn to_bitmask_impl(self) -> U - where - LaneCount: SupportedLaneCount, - { - let resized = self.to_int().resize::(T::FALSE); - - // Safety: `resized` is an integer vector with length M, which must match T - let bitmask: U = unsafe { core::intrinsics::simd::simd_bitmask(resized) }; - - // LLVM assumes bit order should match endianness - if cfg!(target_endian = "big") { - bitmask.reverse_bits(M) - } else { - bitmask - } - } - - #[inline] - unsafe fn from_bitmask_impl(bitmask: U) -> Self - where - LaneCount: SupportedLaneCount, - { - // LLVM assumes bit order should match endianness - let bitmask = if cfg!(target_endian = "big") { - bitmask.reverse_bits(M) - } else { - bitmask - }; - - // SAFETY: `mask` is the correct bitmask type for a u64 bitmask - let mask: Simd = unsafe { - core::intrinsics::simd::simd_select_bitmask( - bitmask, - Simd::::splat(T::TRUE), - Simd::::splat(T::FALSE), - ) - }; - - // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE` - unsafe { Self::from_int_unchecked(mask.resize::(T::FALSE)) } - } - - #[inline] - pub(crate) fn to_bitmask_integer(self) -> u64 { - // TODO modify simd_bitmask to zero-extend output, making this unnecessary - if N <= 8 { - // Safety: bitmask matches length - unsafe { self.to_bitmask_impl::() as u64 } - } else if N <= 16 { - // Safety: bitmask matches length - unsafe { self.to_bitmask_impl::() as u64 } - } else if N <= 32 { - // Safety: bitmask matches length - unsafe { self.to_bitmask_impl::() as u64 } - } else { - // Safety: bitmask matches length - unsafe { self.to_bitmask_impl::() } - } - } - - #[inline] - pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self { - // TODO modify simd_bitmask_select to truncate input, making this unnecessary - if N <= 8 { - // Safety: bitmask matches length - unsafe { Self::from_bitmask_impl::(bitmask as u8) } - } else if N <= 16 { - // Safety: bitmask matches length - unsafe { Self::from_bitmask_impl::(bitmask as u16) } - } else if N <= 32 { - // Safety: bitmask matches length - unsafe { Self::from_bitmask_impl::(bitmask as u32) } - } else { - // Safety: bitmask matches length - unsafe { Self::from_bitmask_impl::(bitmask) } - } - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) fn any(self) -> bool { - // Safety: use `self` as an integer vector - unsafe { core::intrinsics::simd::simd_reduce_any(self.to_int()) } - } - - #[inline] - #[must_use = "method returns a new bool and does not mutate the original value"] - pub(crate) fn all(self) -> bool { - // Safety: use `self` as an integer vector - unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) } - } -} - -impl From> for Simd -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - #[inline] - fn from(value: Mask) -> Self { - value.0 - } -} - -impl core::ops::BitAnd for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn bitand(self, rhs: Self) -> Self { - // Safety: `self` is an integer vector - unsafe { Self(core::intrinsics::simd::simd_and(self.0, rhs.0)) } - } -} - -impl core::ops::BitOr for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn bitor(self, rhs: Self) -> Self { - // Safety: `self` is an integer vector - unsafe { Self(core::intrinsics::simd::simd_or(self.0, rhs.0)) } - } -} - -impl core::ops::BitXor for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn bitxor(self, rhs: Self) -> Self { - // Safety: `self` is an integer vector - unsafe { Self(core::intrinsics::simd::simd_xor(self.0, rhs.0)) } - } -} - -impl core::ops::Not for Mask -where - T: MaskElement, - LaneCount: SupportedLaneCount, -{ - type Output = Self; - #[inline] - fn not(self) -> Self::Output { - Self::splat(true) ^ self - } -} diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs index 45b1a0f975141..5f635d80a178f 100644 --- a/crates/core_simd/src/mod.rs +++ b/crates/core_simd/src/mod.rs @@ -5,7 +5,6 @@ mod alias; mod cast; mod fmt; mod iter; -mod lane_count; mod masks; mod ops; mod select; @@ -27,8 +26,8 @@ pub mod simd { pub use crate::core_simd::alias::*; pub use crate::core_simd::cast::*; - pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount}; pub use crate::core_simd::masks::*; + pub use crate::core_simd::select::*; pub use crate::core_simd::swizzle::*; pub use crate::core_simd::to_bytes::ToBytes; pub use crate::core_simd::vector::*; diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index f36e8d01a73bb..eb6601f734831 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq}; +use crate::simd::{Select, Simd, SimdElement, cmp::SimdPartialEq}; use core::ops::{Add, Mul}; use core::ops::{BitAnd, BitOr, BitXor}; use core::ops::{Div, Rem, Sub}; @@ -12,7 +12,6 @@ mod unary; impl core::ops::Index for Simd where T: SimdElement, - LaneCount: SupportedLaneCount, I: core::slice::SliceIndex<[T]>, { type Output = I::Output; @@ -25,7 +24,6 @@ where impl core::ops::IndexMut for Simd where T: SimdElement, - LaneCount: SupportedLaneCount, I: core::slice::SliceIndex<[T]>, { #[inline] @@ -130,7 +128,6 @@ macro_rules! for_base_types { impl $op for Simd<$scalar, N> where $scalar: SimdElement, - LaneCount: SupportedLaneCount, { type Output = $out; diff --git a/crates/core_simd/src/ops/assign.rs b/crates/core_simd/src/ops/assign.rs index d21d867de26d6..c1830c35df778 100644 --- a/crates/core_simd/src/ops/assign.rs +++ b/crates/core_simd/src/ops/assign.rs @@ -21,7 +21,6 @@ macro_rules! assign_ops { where Self: $trait, T: SimdElement, - LaneCount: SupportedLaneCount, { #[inline] fn $assign_call(&mut self, rhs: U) { diff --git a/crates/core_simd/src/ops/deref.rs b/crates/core_simd/src/ops/deref.rs index 913cbbe977c46..360b83c403468 100644 --- a/crates/core_simd/src/ops/deref.rs +++ b/crates/core_simd/src/ops/deref.rs @@ -13,7 +13,6 @@ macro_rules! deref_lhs { where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, { type Output = Simd; @@ -33,7 +32,6 @@ macro_rules! deref_rhs { where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, { type Output = Simd; @@ -64,7 +62,6 @@ macro_rules! deref_ops { where T: SimdElement, $simd: $trait<$simd, Output = $simd>, - LaneCount: SupportedLaneCount, { type Output = $simd; diff --git a/crates/core_simd/src/ops/shift_scalar.rs b/crates/core_simd/src/ops/shift_scalar.rs index f5115a5a5e935..7ca83dc40f617 100644 --- a/crates/core_simd/src/ops/shift_scalar.rs +++ b/crates/core_simd/src/ops/shift_scalar.rs @@ -1,13 +1,11 @@ // Shift operations uniquely typically only have a scalar on the right-hand side. // Here, we implement shifts for scalar RHS arguments. -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::Simd; macro_rules! impl_splatted_shifts { { impl $trait:ident :: $trait_fn:ident for $ty:ty } => { impl core::ops::$trait<$ty> for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -17,8 +15,6 @@ macro_rules! impl_splatted_shifts { } impl core::ops::$trait<&$ty> for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Output = Self; #[inline] @@ -28,8 +24,6 @@ macro_rules! impl_splatted_shifts { } impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Output = Simd<$ty, N>; #[inline] @@ -39,8 +33,6 @@ macro_rules! impl_splatted_shifts { } impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Output = Simd<$ty, N>; #[inline] diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs index 412a5b801171b..e1c06167f9790 100644 --- a/crates/core_simd/src/ops/unary.rs +++ b/crates/core_simd/src/ops/unary.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{Simd, SimdElement}; use core::ops::{Neg, Not}; // unary ops macro_rules! neg { @@ -6,7 +6,6 @@ macro_rules! neg { $(impl Neg for Simd<$scalar, N> where $scalar: SimdElement, - LaneCount: SupportedLaneCount, { type Output = Self; @@ -40,7 +39,6 @@ macro_rules! not { $(impl Not for Simd<$scalar, N> where $scalar: SimdElement, - LaneCount: SupportedLaneCount, { type Output = Self; diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs index f33aa261a928f..404f54d8f3827 100644 --- a/crates/core_simd/src/select.rs +++ b/crates/core_simd/src/select.rs @@ -1,54 +1,155 @@ -use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{FixEndianness, Mask, MaskElement, Simd, SimdElement}; -impl Mask +/// Choose elements from two vectors using a mask. +/// +/// For each element in the mask, choose the corresponding element from `true_values` if +/// that element mask is true, and `false_values` if that element mask is false. +/// +/// If the mask is `u64`, it's treated as a bitmask with the least significant bit +/// corresponding to the first element. +/// +/// # Examples +/// +/// ## Selecting values from `Simd` +/// ``` +/// # #![feature(portable_simd)] +/// # #[cfg(feature = "as_crate")] use core_simd::simd; +/// # #[cfg(not(feature = "as_crate"))] use core::simd; +/// # use simd::{Simd, Mask, Select}; +/// let a = Simd::from_array([0, 1, 2, 3]); +/// let b = Simd::from_array([4, 5, 6, 7]); +/// let mask = Mask::::from_array([true, false, false, true]); +/// let c = mask.select(a, b); +/// assert_eq!(c.to_array(), [0, 5, 6, 3]); +/// ``` +/// +/// ## Selecting values from `Mask` +/// ``` +/// # #![feature(portable_simd)] +/// # #[cfg(feature = "as_crate")] use core_simd::simd; +/// # #[cfg(not(feature = "as_crate"))] use core::simd; +/// # use simd::{Mask, Select}; +/// let a = Mask::::from_array([true, true, false, false]); +/// let b = Mask::::from_array([false, false, true, true]); +/// let mask = Mask::::from_array([true, false, false, true]); +/// let c = mask.select(a, b); +/// assert_eq!(c.to_array(), [true, false, true, false]); +/// ``` +/// +/// ## Selecting with a bitmask +/// ``` +/// # #![feature(portable_simd)] +/// # #[cfg(feature = "as_crate")] use core_simd::simd; +/// # #[cfg(not(feature = "as_crate"))] use core::simd; +/// # use simd::{Mask, Select}; +/// let a = Mask::::from_array([true, true, false, false]); +/// let b = Mask::::from_array([false, false, true, true]); +/// let mask = 0b1001; +/// let c = mask.select(a, b); +/// assert_eq!(c.to_array(), [true, false, true, false]); +/// ``` +pub trait Select { + /// Choose elements + fn select(self, true_values: T, false_values: T) -> T; +} + +impl Select> for Mask +where + T: SimdElement, + U: MaskElement, +{ + #[inline] + fn select(self, true_values: Simd, false_values: Simd) -> Simd { + // Safety: + // simd_as between masks is always safe (they're vectors of ints). + // simd_select uses a mask that matches the width and number of elements + unsafe { + let mask: Simd = core::intrinsics::simd::simd_as(self.to_simd()); + core::intrinsics::simd::simd_select(mask, true_values, false_values) + } + } +} + +impl Select> for u64 +where + T: SimdElement, +{ + #[inline] + fn select(self, true_values: Simd, false_values: Simd) -> Simd { + const { + assert!(N <= 64, "number of elements can't be greater than 64"); + } + + #[inline] + unsafe fn select_impl( + bitmask: U, + true_values: Simd, + false_values: Simd, + ) -> Simd + where + T: SimdElement, + { + let default = true_values[0]; + let true_values = true_values.resize::(default); + let false_values = false_values.resize::(default); + + // LLVM assumes bit order should match endianness + let bitmask = bitmask.fix_endianness(); + + // Safety: the caller guarantees that the size of U matches M + let selected = unsafe { + core::intrinsics::simd::simd_select_bitmask(bitmask, true_values, false_values) + }; + + selected.resize::(default) + } + + // TODO modify simd_bitmask_select to truncate input, making this unnecessary + if N <= 8 { + let bitmask = self as u8; + // Safety: bitmask matches length + unsafe { select_impl::(bitmask, true_values, false_values) } + } else if N <= 16 { + let bitmask = self as u16; + // Safety: bitmask matches length + unsafe { select_impl::(bitmask, true_values, false_values) } + } else if N <= 32 { + let bitmask = self as u32; + // Safety: bitmask matches length + unsafe { select_impl::(bitmask, true_values, false_values) } + } else { + let bitmask = self; + // Safety: bitmask matches length + unsafe { select_impl::(bitmask, true_values, false_values) } + } + } +} + +impl Select> for Mask where T: MaskElement, - LaneCount: SupportedLaneCount, + U: MaskElement, { - /// Choose elements from two vectors. - /// - /// For each element in the mask, choose the corresponding element from `true_values` if - /// that element mask is true, and `false_values` if that element mask is false. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::{Simd, Mask}; - /// let a = Simd::from_array([0, 1, 2, 3]); - /// let b = Simd::from_array([4, 5, 6, 7]); - /// let mask = Mask::from_array([true, false, false, true]); - /// let c = mask.select(a, b); - /// assert_eq!(c.to_array(), [0, 5, 6, 3]); - /// ``` #[inline] - #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn select(self, true_values: Simd, false_values: Simd) -> Simd - where - U: SimdElement, - { - // Safety: The mask has been cast to a vector of integers, - // and the operands to select between are vectors of the same type and length. - unsafe { core::intrinsics::simd::simd_select(self.to_int(), true_values, false_values) } + fn select(self, true_values: Mask, false_values: Mask) -> Mask { + let selected: Simd = + Select::select(self, true_values.to_simd(), false_values.to_simd()); + + // Safety: all values come from masks + unsafe { Mask::from_simd_unchecked(selected) } } +} - /// Choose elements from two masks. - /// - /// For each element in the mask, choose the corresponding element from `true_values` if - /// that element mask is true, and `false_values` if that element mask is false. - /// - /// # Examples - /// ``` - /// # #![feature(portable_simd)] - /// # use core::simd::Mask; - /// let a = Mask::::from_array([true, true, false, false]); - /// let b = Mask::::from_array([false, false, true, true]); - /// let mask = Mask::::from_array([true, false, false, true]); - /// let c = mask.select_mask(a, b); - /// assert_eq!(c.to_array(), [true, false, true, false]); - /// ``` +impl Select> for u64 +where + T: MaskElement, +{ #[inline] - #[must_use = "method returns a new mask and does not mutate the original inputs"] - pub fn select_mask(self, true_values: Self, false_values: Self) -> Self { - self & true_values | !self & false_values + fn select(self, true_values: Mask, false_values: Mask) -> Mask { + let selected: Simd = + Select::select(self, true_values.to_simd(), false_values.to_simd()); + + // Safety: all values come from masks + unsafe { Mask::from_simd_unchecked(selected) } } } diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs index 2312ba401fa78..d553d6c040c91 100644 --- a/crates/core_simd/src/simd/cmp/eq.rs +++ b/crates/core_simd/src/simd/cmp/eq.rs @@ -1,5 +1,5 @@ use crate::simd::{ - LaneCount, Mask, Simd, SimdElement, SupportedLaneCount, + Mask, Simd, SimdElement, ptr::{SimdConstPtr, SimdMutPtr}, }; @@ -21,8 +21,6 @@ macro_rules! impl_number { { $($number:ty),* } => { $( impl SimdPartialEq for Simd<$number, N> - where - LaneCount: SupportedLaneCount, { type Mask = Mask<<$number as SimdElement>::Mask, N>; @@ -30,14 +28,14 @@ macro_rules! impl_number { fn simd_eq(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_eq(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_eq(self, other)) } } #[inline] fn simd_ne(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ne(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ne(self, other)) } } } )* @@ -50,8 +48,6 @@ macro_rules! impl_mask { { $($integer:ty),* } => { $( impl SimdPartialEq for Mask<$integer, N> - where - LaneCount: SupportedLaneCount, { type Mask = Self; @@ -59,14 +55,14 @@ macro_rules! impl_mask { fn simd_eq(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_eq(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_eq(self.to_simd(), other.to_simd())) } } #[inline] fn simd_ne(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ne(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_ne(self.to_simd(), other.to_simd())) } } } )* @@ -75,10 +71,7 @@ macro_rules! impl_mask { impl_mask! { i8, i16, i32, i64, isize } -impl SimdPartialEq for Simd<*const T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdPartialEq for Simd<*const T, N> { type Mask = Mask; #[inline] @@ -92,10 +85,7 @@ where } } -impl SimdPartialEq for Simd<*mut T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdPartialEq for Simd<*mut T, N> { type Mask = Mask; #[inline] diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs index e813e7613032c..5672fbbf54caa 100644 --- a/crates/core_simd/src/simd/cmp/ord.rs +++ b/crates/core_simd/src/simd/cmp/ord.rs @@ -1,5 +1,5 @@ use crate::simd::{ - LaneCount, Mask, Simd, SupportedLaneCount, + Mask, Select, Simd, cmp::SimdPartialEq, ptr::{SimdConstPtr, SimdMutPtr}, }; @@ -49,41 +49,37 @@ macro_rules! impl_integer { { $($integer:ty),* } => { $( impl SimdPartialOrd for Simd<$integer, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_lt(self, other)) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_le(self, other)) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_gt(self, other)) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ge(self, other)) } } } impl SimdOrd for Simd<$integer, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_max(self, other: Self) -> Self { @@ -115,35 +111,33 @@ macro_rules! impl_float { { $($float:ty),* } => { $( impl SimdPartialOrd for Simd<$float, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_lt(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_lt(self, other)) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_le(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_le(self, other)) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_gt(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_gt(self, other)) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Mask::from_int_unchecked(core::intrinsics::simd::simd_ge(self, other)) } + unsafe { Mask::from_simd_unchecked(core::intrinsics::simd::simd_ge(self, other)) } } } )* @@ -156,50 +150,46 @@ macro_rules! impl_mask { { $($integer:ty),* } => { $( impl SimdPartialOrd for Mask<$integer, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_lt(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_lt(self.to_simd(), other.to_simd())) } } #[inline] fn simd_le(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_le(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_le(self.to_simd(), other.to_simd())) } } #[inline] fn simd_gt(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_gt(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_gt(self.to_simd(), other.to_simd())) } } #[inline] fn simd_ge(self, other: Self) -> Self::Mask { // Safety: `self` is a vector, and the result of the comparison // is always a valid mask. - unsafe { Self::from_int_unchecked(core::intrinsics::simd::simd_ge(self.to_int(), other.to_int())) } + unsafe { Self::from_simd_unchecked(core::intrinsics::simd::simd_ge(self.to_simd(), other.to_simd())) } } } impl SimdOrd for Mask<$integer, N> - where - LaneCount: SupportedLaneCount, { #[inline] fn simd_max(self, other: Self) -> Self { - self.simd_gt(other).select_mask(other, self) + self.simd_gt(other).select(other, self) } #[inline] fn simd_min(self, other: Self) -> Self { - self.simd_lt(other).select_mask(other, self) + self.simd_lt(other).select(other, self) } #[inline] @@ -218,10 +208,7 @@ macro_rules! impl_mask { impl_mask! { i8, i16, i32, i64, isize } -impl SimdPartialOrd for Simd<*const T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdPartialOrd for Simd<*const T, N> { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { self.addr().simd_lt(other.addr()) @@ -243,10 +230,7 @@ where } } -impl SimdOrd for Simd<*const T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdOrd for Simd<*const T, N> { #[inline] fn simd_max(self, other: Self) -> Self { self.simd_lt(other).select(other, self) @@ -268,10 +252,7 @@ where } } -impl SimdPartialOrd for Simd<*mut T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdPartialOrd for Simd<*mut T, N> { #[inline] fn simd_lt(self, other: Self) -> Self::Mask { self.addr().simd_lt(other.addr()) @@ -293,10 +274,7 @@ where } } -impl SimdOrd for Simd<*mut T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdOrd for Simd<*mut T, N> { #[inline] fn simd_max(self, other: Self) -> Self { self.simd_lt(other).select(other, self) diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index b5972c47373bb..efd7c2469512b 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -1,6 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, + Mask, Select, Simd, SimdCast, SimdElement, cmp::{SimdPartialEq, SimdPartialOrd}, }; @@ -240,15 +240,9 @@ pub trait SimdFloat: Copy + Sealed { macro_rules! impl_trait { { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => { $( - impl Sealed for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, - { - } + impl Sealed for Simd<$ty, N> {} impl SimdFloat for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Mask = Mask<<$mask_ty as SimdElement>::Mask, N>; type Scalar = $ty; diff --git a/crates/core_simd/src/simd/num/int.rs b/crates/core_simd/src/simd/num/int.rs index e7253313f036c..eee54d3968808 100644 --- a/crates/core_simd/src/simd/num/int.rs +++ b/crates/core_simd/src/simd/num/int.rs @@ -1,7 +1,6 @@ use super::sealed::Sealed; use crate::simd::{ - LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount, cmp::SimdOrd, - cmp::SimdPartialOrd, num::SimdUint, + Mask, Select, Simd, SimdCast, SimdElement, cmp::SimdOrd, cmp::SimdPartialOrd, num::SimdUint, }; /// Operations on SIMD vectors of signed integers. @@ -242,16 +241,9 @@ pub trait SimdInt: Copy + Sealed { macro_rules! impl_trait { { $($ty:ident ($unsigned:ident)),* } => { $( - impl Sealed for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, - { - } + impl Sealed for Simd<$ty, N> {} - impl SimdInt for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, - { + impl SimdInt for Simd<$ty, N> { type Mask = Mask<<$ty as SimdElement>::Mask, N>; type Scalar = $ty; type Unsigned = Simd<$unsigned, N>; diff --git a/crates/core_simd/src/simd/num/uint.rs b/crates/core_simd/src/simd/num/uint.rs index e3ba8658bd803..606107a1f06f8 100644 --- a/crates/core_simd/src/simd/num/uint.rs +++ b/crates/core_simd/src/simd/num/uint.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount, cmp::SimdOrd}; +use crate::simd::{Simd, SimdCast, SimdElement, cmp::SimdOrd}; /// Operations on SIMD vectors of unsigned integers. pub trait SimdUint: Copy + Sealed { @@ -124,15 +124,9 @@ pub trait SimdUint: Copy + Sealed { macro_rules! impl_trait { { $($ty:ident ($signed:ident)),* } => { $( - impl Sealed for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, - { - } + impl Sealed for Simd<$ty, N> {} impl SimdUint for Simd<$ty, N> - where - LaneCount: SupportedLaneCount, { type Scalar = $ty; type Cast = Simd; diff --git a/crates/core_simd/src/simd/ptr/const_ptr.rs b/crates/core_simd/src/simd/ptr/const_ptr.rs index 36452e7ae920d..7ef9dc21373ec 100644 --- a/crates/core_simd/src/simd/ptr/const_ptr.rs +++ b/crates/core_simd/src/simd/ptr/const_ptr.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint}; +use crate::simd::{Mask, Simd, cmp::SimdPartialEq, num::SimdUint}; /// Operations on SIMD vectors of constant pointers. pub trait SimdConstPtr: Copy + Sealed { @@ -88,12 +88,9 @@ pub trait SimdConstPtr: Copy + Sealed { fn wrapping_sub(self, count: Self::Usize) -> Self; } -impl Sealed for Simd<*const T, N> where LaneCount: SupportedLaneCount {} +impl Sealed for Simd<*const T, N> {} -impl SimdConstPtr for Simd<*const T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdConstPtr for Simd<*const T, N> { type Usize = Simd; type Isize = Simd; type CastPtr = Simd<*const U, N>; diff --git a/crates/core_simd/src/simd/ptr/mut_ptr.rs b/crates/core_simd/src/simd/ptr/mut_ptr.rs index c644f390c20a5..3b9b75ddf5660 100644 --- a/crates/core_simd/src/simd/ptr/mut_ptr.rs +++ b/crates/core_simd/src/simd/ptr/mut_ptr.rs @@ -1,5 +1,5 @@ use super::sealed::Sealed; -use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount, cmp::SimdPartialEq, num::SimdUint}; +use crate::simd::{Mask, Simd, cmp::SimdPartialEq, num::SimdUint}; /// Operations on SIMD vectors of mutable pointers. pub trait SimdMutPtr: Copy + Sealed { @@ -85,12 +85,9 @@ pub trait SimdMutPtr: Copy + Sealed { fn wrapping_sub(self, count: Self::Usize) -> Self; } -impl Sealed for Simd<*mut T, N> where LaneCount: SupportedLaneCount {} +impl Sealed for Simd<*mut T, N> {} -impl SimdMutPtr for Simd<*mut T, N> -where - LaneCount: SupportedLaneCount, -{ +impl SimdMutPtr for Simd<*mut T, N> { type Usize = Simd; type Isize = Simd; type CastPtr = Simd<*mut U, N>; diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs index dbdd6ef40eba7..02dcd71356dd6 100644 --- a/crates/core_simd/src/swizzle.rs +++ b/crates/core_simd/src/swizzle.rs @@ -1,4 +1,4 @@ -use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount}; +use crate::simd::{Mask, MaskElement, Simd, SimdElement}; /// Constructs a new SIMD vector by copying elements from selected elements in other vectors. /// @@ -82,8 +82,6 @@ pub trait Swizzle { fn swizzle(vector: Simd) -> Simd where T: SimdElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, { // Safety: `vector` is a vector, and the index is a const vector of u32. unsafe { @@ -122,8 +120,6 @@ pub trait Swizzle { fn concat_swizzle(first: Simd, second: Simd) -> Simd where T: SimdElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, { // Safety: `first` and `second` are vectors, and the index is a const vector of u32. unsafe { @@ -161,11 +157,9 @@ pub trait Swizzle { fn swizzle_mask(mask: Mask) -> Mask where T: MaskElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, { // SAFETY: all elements of this mask come from another mask - unsafe { Mask::from_int_unchecked(Self::swizzle(mask.to_int())) } + unsafe { Mask::from_simd_unchecked(Self::swizzle(mask.to_simd())) } } /// Creates a new mask from the elements of `first` and `second`. @@ -177,18 +171,17 @@ pub trait Swizzle { fn concat_swizzle_mask(first: Mask, second: Mask) -> Mask where T: MaskElement, - LaneCount: SupportedLaneCount, - LaneCount: SupportedLaneCount, { // SAFETY: all elements of this mask come from another mask - unsafe { Mask::from_int_unchecked(Self::concat_swizzle(first.to_int(), second.to_int())) } + unsafe { + Mask::from_simd_unchecked(Self::concat_swizzle(first.to_simd(), second.to_simd())) + } } } impl Simd where T: SimdElement, - LaneCount: SupportedLaneCount, { /// Reverse the order of the elements in the vector. #[inline] @@ -462,10 +455,7 @@ where /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn resize(self, value: T) -> Simd - where - LaneCount: SupportedLaneCount, - { + pub fn resize(self, value: T) -> Simd { struct Resize; impl Swizzle for Resize { const INDEX: [usize; M] = const { @@ -493,10 +483,7 @@ where /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn extract(self) -> Simd - where - LaneCount: SupportedLaneCount, - { + pub fn extract(self) -> Simd { struct Extract; impl Swizzle for Extract { const INDEX: [usize; LEN] = const { @@ -517,14 +504,13 @@ where impl Mask where T: MaskElement, - LaneCount: SupportedLaneCount, { /// Reverse the order of the elements in the mask. #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn reverse(self) -> Self { // Safety: swizzles are safe for masks - unsafe { Self::from_int_unchecked(self.to_int().reverse()) } + unsafe { Self::from_simd_unchecked(self.to_simd().reverse()) } } /// Rotates the mask such that the first `OFFSET` elements of the slice move to the end @@ -534,7 +520,7 @@ where #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn rotate_elements_left(self) -> Self { // Safety: swizzles are safe for masks - unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_left::()) } + unsafe { Self::from_simd_unchecked(self.to_simd().rotate_elements_left::()) } } /// Rotates the mask such that the first `self.len() - OFFSET` elements of the mask move to @@ -544,7 +530,7 @@ where #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn rotate_elements_right(self) -> Self { // Safety: swizzles are safe for masks - unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_right::()) } + unsafe { Self::from_simd_unchecked(self.to_simd().rotate_elements_right::()) } } /// Shifts the mask elements to the left by `OFFSET`, filling in with @@ -554,7 +540,7 @@ where pub fn shift_elements_left(self, padding: bool) -> Self { // Safety: swizzles are safe for masks unsafe { - Self::from_int_unchecked(self.to_int().shift_elements_left::(if padding { + Self::from_simd_unchecked(self.to_simd().shift_elements_left::(if padding { T::TRUE } else { T::FALSE @@ -569,7 +555,7 @@ where pub fn shift_elements_right(self, padding: bool) -> Self { // Safety: swizzles are safe for masks unsafe { - Self::from_int_unchecked(self.to_int().shift_elements_right::(if padding { + Self::from_simd_unchecked(self.to_simd().shift_elements_right::(if padding { T::TRUE } else { T::FALSE @@ -598,9 +584,9 @@ where #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn interleave(self, other: Self) -> (Self, Self) { - let (lo, hi) = self.to_int().interleave(other.to_int()); + let (lo, hi) = self.to_simd().interleave(other.to_simd()); // Safety: swizzles are safe for masks - unsafe { (Self::from_int_unchecked(lo), Self::from_int_unchecked(hi)) } + unsafe { (Self::from_simd_unchecked(lo), Self::from_simd_unchecked(hi)) } } /// Deinterleave two masks. @@ -627,12 +613,12 @@ where #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] pub fn deinterleave(self, other: Self) -> (Self, Self) { - let (even, odd) = self.to_int().deinterleave(other.to_int()); + let (even, odd) = self.to_simd().deinterleave(other.to_simd()); // Safety: swizzles are safe for masks unsafe { ( - Self::from_int_unchecked(even), - Self::from_int_unchecked(odd), + Self::from_simd_unchecked(even), + Self::from_simd_unchecked(odd), ) } } @@ -653,13 +639,10 @@ where /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn resize(self, value: bool) -> Mask - where - LaneCount: SupportedLaneCount, - { + pub fn resize(self, value: bool) -> Mask { // Safety: swizzles are safe for masks unsafe { - Mask::::from_int_unchecked(self.to_int().resize::(if value { + Mask::::from_simd_unchecked(self.to_simd().resize::(if value { T::TRUE } else { T::FALSE @@ -679,11 +662,8 @@ where /// ``` #[inline] #[must_use = "method returns a new vector and does not mutate the original inputs"] - pub fn extract(self) -> Mask - where - LaneCount: SupportedLaneCount, - { + pub fn extract(self) -> Mask { // Safety: swizzles are safe for masks - unsafe { Mask::::from_int_unchecked(self.to_int().extract::()) } + unsafe { Mask::::from_simd_unchecked(self.to_simd().extract::()) } } } diff --git a/crates/core_simd/src/swizzle_dyn.rs b/crates/core_simd/src/swizzle_dyn.rs index 773bd028bae09..ae0b174973da7 100644 --- a/crates/core_simd/src/swizzle_dyn.rs +++ b/crates/core_simd/src/swizzle_dyn.rs @@ -1,10 +1,7 @@ -use crate::simd::{LaneCount, Simd, SupportedLaneCount}; +use crate::simd::Simd; use core::mem; -impl Simd -where - LaneCount: SupportedLaneCount, -{ +impl Simd { /// Swizzle a vector of bytes according to the index vector. /// Indices within range select the appropriate byte. /// Indices "out of bounds" instead select 0. @@ -139,7 +136,7 @@ unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd, idxs: Simd) -> S #[inline] #[allow(clippy::let_and_return)] unsafe fn avx2_pshufb(bytes: Simd, idxs: Simd) -> Simd { - use crate::simd::cmp::SimdPartialOrd; + use crate::simd::{Select, cmp::SimdPartialOrd}; #[cfg(target_arch = "x86")] use core::arch::x86; #[cfg(target_arch = "x86_64")] @@ -184,10 +181,7 @@ unsafe fn transize( f: unsafe fn(T, T) -> T, a: Simd, b: Simd, -) -> Simd -where - LaneCount: SupportedLaneCount, -{ +) -> Simd { // SAFETY: Same obligation to use this function as to use mem::transmute_copy. unsafe { mem::transmute_copy(&f(mem::transmute_copy(&a), mem::transmute_copy(&b))) } } @@ -196,11 +190,8 @@ where #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[allow(unused)] #[inline(always)] -fn zeroing_idxs(idxs: Simd) -> Simd -where - LaneCount: SupportedLaneCount, -{ - use crate::simd::cmp::SimdPartialOrd; +fn zeroing_idxs(idxs: Simd) -> Simd { + use crate::simd::{Select, cmp::SimdPartialOrd}; idxs.simd_lt(Simd::splat(N as u8)) .select(idxs, Simd::splat(u8::MAX)) } diff --git a/crates/core_simd/src/to_bytes.rs b/crates/core_simd/src/to_bytes.rs index fee2cc06c5b09..1fd285e457db8 100644 --- a/crates/core_simd/src/to_bytes.rs +++ b/crates/core_simd/src/to_bytes.rs @@ -1,12 +1,12 @@ use crate::simd::{ - LaneCount, Simd, SimdElement, SupportedLaneCount, + Simd, SimdElement, num::{SimdFloat, SimdInt, SimdUint}, }; mod sealed { use super::*; pub trait Sealed {} - impl Sealed for Simd where LaneCount: SupportedLaneCount {} + impl Sealed for Simd {} } use sealed::Sealed; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index f40031f8c4da7..5b3a689f3611b 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,5 +1,7 @@ +use core::intrinsics::simd::SimdAlign; + use crate::simd::{ - LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, + Mask, MaskElement, cmp::SimdPartialOrd, num::SimdUint, ptr::{SimdConstPtr, SimdMutPtr}, @@ -51,6 +53,8 @@ use crate::simd::{ /// Thus it is sound to [`transmute`] `Simd` to `[T; N]` and should optimize to "zero cost", /// but the reverse transmutation may require a copy the compiler cannot simply elide. /// +/// `N` cannot be 0 and may be at most 64. This limit may be increased in the future. +/// /// # ABI "Features" /// Due to Rust's safety guarantees, `Simd` is currently passed and returned via memory, /// not SIMD registers, except as an optimization. Using `#[inline]` on functions that accept @@ -100,14 +104,13 @@ use crate::simd::{ // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also // causes rustc to emit illegal LLVM IR in some cases. #[repr(simd, packed)] +#[rustc_simd_monomorphize_lane_limit = "64"] pub struct Simd([T; N]) where - LaneCount: SupportedLaneCount, T: SimdElement; impl Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { /// Number of elements in this vector. @@ -146,30 +149,8 @@ where #[inline] #[rustc_const_unstable(feature = "portable_simd", issue = "86656")] pub const fn splat(value: T) -> Self { - const fn splat_const(value: T) -> Simd - where - T: SimdElement, - LaneCount: SupportedLaneCount, - { - Simd::from_array([value; N]) - } - - fn splat_rt(value: T) -> Simd - where - T: SimdElement, - LaneCount: SupportedLaneCount, - { - // This is preferred over `[value; N]`, since it's explicitly a splat: - // https://github.com/rust-lang/rust/issues/97804 - struct Splat; - impl Swizzle for Splat { - const INDEX: [usize; N] = [0; N]; - } - - Splat::swizzle::(Simd::::from([value])) - } - - core::intrinsics::const_eval_select((value,), splat_const, splat_rt) + // SAFETY: T is a SimdElement, and the item type of Self. + unsafe { core::intrinsics::simd::simd_splat(value) } } /// Returns an array reference containing the entire SIMD vector. @@ -195,7 +176,7 @@ where /// Returns a mutable array reference containing the entire SIMD vector. #[inline] - pub fn as_mut_array(&mut self) -> &mut [T; N] { + pub const fn as_mut_array(&mut self) -> &mut [T; N] { // SAFETY: `Simd` is just an overaligned `[T; N]` with // potential padding at the end, so pointer casting to a // `&mut [T; N]` is safe. @@ -324,7 +305,7 @@ where /// ``` #[inline] #[track_caller] - pub fn copy_to_slice(self, slice: &mut [T]) { + pub const fn copy_to_slice(self, slice: &mut [T]) { assert!( slice.len() >= Self::LEN, "slice length must be at least the number of elements" @@ -465,7 +446,7 @@ where /// value from `or` is passed through. /// /// # Safety - /// Enabled `ptr` elements must be safe to read as if by `std::ptr::read`. + /// Enabled `ptr` elements must be safe to read as if by `core::ptr::read`. #[must_use] #[inline] pub unsafe fn load_select_ptr( @@ -475,12 +456,11 @@ where ) -> Self { // SAFETY: The safety of reading elements through `ptr` is ensured by the caller. unsafe { - core::intrinsics::simd::simd_masked_load::< - _, - _, - _, - { core::intrinsics::simd::SimdAlign::Element }, - >(enable.to_int(), ptr, or) + core::intrinsics::simd::simd_masked_load::<_, _, _, { SimdAlign::Element }>( + enable.to_simd(), + ptr, + or, + ) } } @@ -659,7 +639,7 @@ where or: Self, ) -> Self { // Safety: The caller is responsible for upholding all invariants - unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) } + unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_simd()) } } /// Conditionally write contiguous elements to `slice`. The `enable` mask controls @@ -731,12 +711,11 @@ where pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<::Mask, N>) { // SAFETY: The safety of writing elements through `ptr` is ensured by the caller. unsafe { - core::intrinsics::simd::simd_masked_store::< - _, - _, - _, - { core::intrinsics::simd::SimdAlign::Element }, - >(enable.to_int(), ptr, self) + core::intrinsics::simd::simd_masked_store::<_, _, _, { SimdAlign::Element }>( + enable.to_simd(), + ptr, + self, + ) } } @@ -896,20 +875,14 @@ where #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces pub unsafe fn scatter_select_ptr(self, dest: Simd<*mut T, N>, enable: Mask) { // Safety: The caller is responsible for upholding all invariants - unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_int()) } + unsafe { core::intrinsics::simd::simd_scatter(self, dest, enable.to_simd()) } } } -impl Copy for Simd -where - LaneCount: SupportedLaneCount, - T: SimdElement, -{ -} +impl Copy for Simd where T: SimdElement {} impl Clone for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -920,7 +893,6 @@ where impl Default for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + Default, { #[inline] @@ -931,7 +903,6 @@ where impl PartialEq for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + PartialEq, { #[inline] @@ -940,7 +911,7 @@ where let mask = unsafe { let tfvec: Simd<::Mask, N> = core::intrinsics::simd::simd_eq(*self, *other); - Mask::from_int_unchecked(tfvec) + Mask::from_simd_unchecked(tfvec) }; // Two vectors are equal if all elements are equal when compared elementwise @@ -954,7 +925,7 @@ where let mask = unsafe { let tfvec: Simd<::Mask, N> = core::intrinsics::simd::simd_ne(*self, *other); - Mask::from_int_unchecked(tfvec) + Mask::from_simd_unchecked(tfvec) }; // Two vectors are non-equal if any elements are non-equal when compared elementwise @@ -965,7 +936,6 @@ where /// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead. impl PartialOrd for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + PartialOrd, { #[inline] @@ -975,17 +945,11 @@ where } } -impl Eq for Simd -where - LaneCount: SupportedLaneCount, - T: SimdElement + Eq, -{ -} +impl Eq for Simd where T: SimdElement + Eq {} /// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead. impl Ord for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + Ord, { #[inline] @@ -997,7 +961,6 @@ where impl core::hash::Hash for Simd where - LaneCount: SupportedLaneCount, T: SimdElement + core::hash::Hash, { #[inline] @@ -1012,7 +975,6 @@ where // array references impl AsRef<[T; N]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1023,7 +985,6 @@ where impl AsMut<[T; N]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1035,7 +996,6 @@ where // slice references impl AsRef<[T]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1046,7 +1006,6 @@ where impl AsMut<[T]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1058,7 +1017,6 @@ where // vector/array conversion impl From<[T; N]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1069,7 +1027,6 @@ where impl From> for [T; N] where - LaneCount: SupportedLaneCount, T: SimdElement, { #[inline] @@ -1080,7 +1037,6 @@ where impl TryFrom<&[T]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { type Error = core::array::TryFromSliceError; @@ -1093,7 +1049,6 @@ where impl TryFrom<&mut [T]> for Simd where - LaneCount: SupportedLaneCount, T: SimdElement, { type Error = core::array::TryFromSliceError; @@ -1231,10 +1186,7 @@ where } #[inline] -fn lane_indices() -> Simd -where - LaneCount: SupportedLaneCount, -{ +fn lane_indices() -> Simd { #![allow(clippy::needless_range_loop)] let mut index = [0; N]; for i in 0..N { @@ -1246,7 +1198,6 @@ where #[inline] fn mask_up_to(len: usize) -> Mask where - LaneCount: SupportedLaneCount, M: MaskElement, { let index = lane_indices::(); diff --git a/crates/core_simd/src/vendor/loongarch64.rs b/crates/core_simd/src/vendor/loongarch64.rs index 1290bc166b2b8..1f84cdb971ecb 100644 --- a/crates/core_simd/src/vendor/loongarch64.rs +++ b/crates/core_simd/src/vendor/loongarch64.rs @@ -1,31 +1,26 @@ use crate::simd::*; use core::arch::loongarch64::*; -from_transmute! { unsafe u8x16 => v16u8 } -from_transmute! { unsafe u8x32 => v32u8 } -from_transmute! { unsafe i8x16 => v16i8 } -from_transmute! { unsafe i8x32 => v32i8 } +from_transmute! { unsafe u8x16 => m128i } +from_transmute! { unsafe u8x32 => m256i } +from_transmute! { unsafe i8x16 => m128i } +from_transmute! { unsafe i8x32 => m256i } -from_transmute! { unsafe u16x8 => v8u16 } -from_transmute! { unsafe u16x16 => v16u16 } -from_transmute! { unsafe i16x8 => v8i16 } -from_transmute! { unsafe i16x16 => v16i16 } +from_transmute! { unsafe u16x8 => m128i } +from_transmute! { unsafe u16x16 => m256i } +from_transmute! { unsafe i16x8 => m128i } +from_transmute! { unsafe i16x16 => m256i } -from_transmute! { unsafe u32x4 => v4u32 } -from_transmute! { unsafe u32x8 => v8u32 } -from_transmute! { unsafe i32x4 => v4i32 } -from_transmute! { unsafe i32x8 => v8i32 } -from_transmute! { unsafe f32x4 => v4f32 } -from_transmute! { unsafe f32x8 => v8f32 } +from_transmute! { unsafe u32x4 => m128i } +from_transmute! { unsafe u32x8 => m256i } +from_transmute! { unsafe i32x4 => m128i } +from_transmute! { unsafe i32x8 => m256i } +from_transmute! { unsafe f32x4 => m128 } +from_transmute! { unsafe f32x8 => m256 } -from_transmute! { unsafe u64x2 => v2u64 } -from_transmute! { unsafe u64x4 => v4u64 } -from_transmute! { unsafe i64x2 => v2i64 } -from_transmute! { unsafe i64x4 => v4i64 } -from_transmute! { unsafe f64x2 => v2f64 } -from_transmute! { unsafe f64x4 => v4f64 } - -from_transmute! { unsafe usizex2 => v2u64 } -from_transmute! { unsafe usizex4 => v4u64 } -from_transmute! { unsafe isizex2 => v2i64 } -from_transmute! { unsafe isizex4 => v4i64 } +from_transmute! { unsafe u64x2 => m128i } +from_transmute! { unsafe u64x4 => m256i } +from_transmute! { unsafe i64x2 => m128i } +from_transmute! { unsafe i64x4 => m256i } +from_transmute! { unsafe f64x2 => m128d } +from_transmute! { unsafe f64x4 => m256d } diff --git a/crates/core_simd/src/vendor/wasm32.rs b/crates/core_simd/src/vendor/wasm32.rs index ef3baf885b0fb..1fdb2bc86d346 100644 --- a/crates/core_simd/src/vendor/wasm32.rs +++ b/crates/core_simd/src/vendor/wasm32.rs @@ -14,17 +14,3 @@ from_transmute! { unsafe f32x4 => v128 } from_transmute! { unsafe u64x2 => v128 } from_transmute! { unsafe i64x2 => v128 } from_transmute! { unsafe f64x2 => v128 } - -#[cfg(target_pointer_width = "32")] -mod p32 { - use super::*; - from_transmute! { unsafe usizex4 => v128 } - from_transmute! { unsafe isizex4 => v128 } -} - -#[cfg(target_pointer_width = "64")] -mod p64 { - use super::*; - from_transmute! { unsafe usizex2 => v128 } - from_transmute! { unsafe isizex2 => v128 } -} diff --git a/crates/core_simd/src/vendor/x86.rs b/crates/core_simd/src/vendor/x86.rs index 66aaf90eef597..eae42e6fd0d0a 100644 --- a/crates/core_simd/src/vendor/x86.rs +++ b/crates/core_simd/src/vendor/x86.rs @@ -39,25 +39,3 @@ from_transmute! { unsafe i64x8 => __m512i } from_transmute! { unsafe f64x2 => __m128d } from_transmute! { unsafe f64x4 => __m256d } from_transmute! { unsafe f64x8 => __m512d } - -#[cfg(target_pointer_width = "32")] -mod p32 { - use super::*; - from_transmute! { unsafe usizex4 => __m128i } - from_transmute! { unsafe usizex8 => __m256i } - from_transmute! { unsafe Simd => __m512i } - from_transmute! { unsafe isizex4 => __m128i } - from_transmute! { unsafe isizex8 => __m256i } - from_transmute! { unsafe Simd => __m512i } -} - -#[cfg(target_pointer_width = "64")] -mod p64 { - use super::*; - from_transmute! { unsafe usizex2 => __m128i } - from_transmute! { unsafe usizex4 => __m256i } - from_transmute! { unsafe usizex8 => __m512i } - from_transmute! { unsafe isizex2 => __m128i } - from_transmute! { unsafe isizex4 => __m256i } - from_transmute! { unsafe isizex8 => __m512i } -} diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 48786d02440b3..53fb2367b6055 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -65,9 +65,9 @@ macro_rules! test_mask_api { fn roundtrip_int_conversion() { let values = [true, false, false, true, false, false, true, false]; let mask = Mask::<$type, 8>::from_array(values); - let int = mask.to_int(); + let int = mask.to_simd(); assert_eq!(int.to_array(), [-1, 0, 0, -1, 0, 0, -1, 0]); - assert_eq!(Mask::<$type, 8>::from_int(int), mask); + assert_eq!(Mask::<$type, 8>::from_simd(int), mask); } #[test] diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index 148aa5f9f1771..b269efc9b1d76 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -11,7 +11,7 @@ use core_simd::simd; use core::intrinsics::simd as intrinsics; -use simd::{LaneCount, Simd, SupportedLaneCount}; +use simd::Simd; #[cfg(feature = "as_crate")] mod experimental { @@ -66,28 +66,43 @@ pub trait StdFloat: Sealed + Sized { /// Produces a vector where every element has the sine of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn sin(self) -> Self; + fn sin(self) -> Self { + unsafe { intrinsics::simd_fsin(self) } + } /// Produces a vector where every element has the cosine of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn cos(self) -> Self; + fn cos(self) -> Self { + unsafe { intrinsics::simd_fcos(self) } + } /// Produces a vector where every element has the exponential (base e) of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn exp(self) -> Self; + fn exp(self) -> Self { + unsafe { intrinsics::simd_fexp(self) } + } /// Produces a vector where every element has the exponential (base 2) of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn exp2(self) -> Self; + fn exp2(self) -> Self { + unsafe { intrinsics::simd_fexp2(self) } + } /// Produces a vector where every element has the natural logarithm of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn ln(self) -> Self; + fn ln(self) -> Self { + unsafe { intrinsics::simd_flog(self) } + } /// Produces a vector where every element has the logarithm with respect to an arbitrary /// in the equivalently-indexed elements in `self` and `base`. @@ -99,13 +114,19 @@ pub trait StdFloat: Sealed + Sized { /// Produces a vector where every element has the base-2 logarithm of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn log2(self) -> Self; + fn log2(self) -> Self { + unsafe { intrinsics::simd_flog2(self) } + } /// Produces a vector where every element has the base-10 logarithm of the value /// in the equivalently-indexed element in `self`. + #[inline] #[must_use = "method returns a new vector and does not mutate the original value"] - fn log10(self) -> Self; + fn log10(self) -> Self { + unsafe { intrinsics::simd_flog10(self) } + } /// Returns the smallest integer greater than or equal to each element. #[must_use = "method returns a new vector and does not mutate the original value"] @@ -140,68 +161,19 @@ pub trait StdFloat: Sealed + Sized { fn fract(self) -> Self; } -impl Sealed for Simd where LaneCount: SupportedLaneCount {} -impl Sealed for Simd where LaneCount: SupportedLaneCount {} - -macro_rules! impl_float { - { - $($fn:ident: $intrinsic:ident,)* - } => { - impl StdFloat for Simd - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn fract(self) -> Self { - self - self.trunc() - } - - $( - #[inline] - fn $fn(self) -> Self { - unsafe { intrinsics::$intrinsic(self) } - } - )* - } - - impl StdFloat for Simd - where - LaneCount: SupportedLaneCount, - { - #[inline] - fn fract(self) -> Self { - self - self.trunc() - } - - $( - #[inline] - fn $fn(self) -> Self { - // https://github.com/llvm/llvm-project/issues/83729 - #[cfg(target_arch = "aarch64")] - { - let mut ln = Self::splat(0f64); - for i in 0..N { - ln[i] = self[i].$fn() - } - ln - } - - #[cfg(not(target_arch = "aarch64"))] - { - unsafe { intrinsics::$intrinsic(self) } - } - } - )* - } +impl Sealed for Simd {} +impl Sealed for Simd {} + +impl StdFloat for Simd { + #[inline] + fn fract(self) -> Self { + self - self.trunc() } } -impl_float! { - sin: simd_fsin, - cos: simd_fcos, - exp: simd_fexp, - exp2: simd_fexp2, - ln: simd_flog, - log2: simd_flog2, - log10: simd_flog10, +impl StdFloat for Simd { + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } } diff --git a/crates/std_float/tests/float.rs b/crates/std_float/tests/float.rs index c66c968f8c667..c608ba49564e0 100644 --- a/crates/std_float/tests/float.rs +++ b/crates/std_float/tests/float.rs @@ -16,15 +16,33 @@ macro_rules! unary_test { } } -macro_rules! binary_test { +macro_rules! unary_approx_test { { $scalar:tt, $($func:tt),+ } => { test_helpers::test_lanes! { $( fn $func() { - test_helpers::test_binary_elementwise( + test_helpers::test_unary_elementwise_approx( + &core_simd::simd::Simd::<$scalar, LANES>::$func, + &$scalar::$func, + &|_| true, + 8, + ) + } + )* + } + } +} + +macro_rules! binary_approx_test { + { $scalar:tt, $($func:tt),+ } => { + test_helpers::test_lanes! { + $( + fn $func() { + test_helpers::test_binary_elementwise_approx( &core_simd::simd::Simd::<$scalar, LANES>::$func, &$scalar::$func, &|_, _| true, + 16, ) } )* @@ -53,10 +71,13 @@ macro_rules! impl_tests { mod $scalar { use std_float::StdFloat; - unary_test! { $scalar, sqrt, sin, cos, exp, exp2, ln, log2, log10, ceil, floor, round, trunc } - binary_test! { $scalar, log } + unary_test! { $scalar, sqrt, ceil, floor, round, trunc } ternary_test! { $scalar, mul_add } + // https://github.com/rust-lang/miri/issues/3555 + unary_approx_test! { $scalar, sin, cos, exp, exp2, ln, log2, log10 } + binary_approx_test! { $scalar, log } + test_helpers::test_lanes! { fn fract() { test_helpers::test_unary_elementwise_flush_subnormals( diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml index a5359b9abc84d..408bb04c7aa40 100644 --- a/crates/test_helpers/Cargo.toml +++ b/crates/test_helpers/Cargo.toml @@ -6,3 +6,4 @@ publish = false [dependencies] proptest = { version = "0.10", default-features = false, features = ["alloc"] } +float-cmp = "0.10" diff --git a/crates/test_helpers/src/approxeq.rs b/crates/test_helpers/src/approxeq.rs new file mode 100644 index 0000000000000..57b43a16bc6fe --- /dev/null +++ b/crates/test_helpers/src/approxeq.rs @@ -0,0 +1,110 @@ +//! Compare numeric types approximately. + +use float_cmp::Ulps; + +pub trait ApproxEq { + fn approxeq(&self, other: &Self, _ulps: i64) -> bool; + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result; +} + +impl ApproxEq for bool { + fn approxeq(&self, other: &Self, _ulps: i64) -> bool { + self == other + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} + +macro_rules! impl_integer_approxeq { + { $($type:ty),* } => { + $( + impl ApproxEq for $type { + fn approxeq(&self, other: &Self, _ulps: i64) -> bool { + self == other + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?} ({:x})", self, self) + } + } + )* + }; +} + +impl_integer_approxeq! { u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize } + +macro_rules! impl_float_approxeq { + { $($type:ty),* } => { + $( + impl ApproxEq for $type { + fn approxeq(&self, other: &Self, ulps: i64) -> bool { + if self.is_nan() && other.is_nan() { + true + } else { + (self.ulps(other) as i64).abs() <= ulps + } + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "{:?} ({:x})", self, self.to_bits()) + } + } + )* + }; +} + +impl_float_approxeq! { f32, f64 } + +impl ApproxEq for [T; N] { + fn approxeq(&self, other: &Self, ulps: i64) -> bool { + self.iter() + .zip(other.iter()) + .fold(true, |value, (left, right)| { + value && left.approxeq(right, ulps) + }) + } + + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + #[repr(transparent)] + struct Wrapper<'a, T: ApproxEq>(&'a T); + + impl core::fmt::Debug for Wrapper<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + self.0.fmt(f) + } + } + + f.debug_list() + .entries(self.iter().map(|x| Wrapper(x))) + .finish() + } +} + +#[doc(hidden)] +pub struct ApproxEqWrapper<'a, T>(pub &'a T, pub i64); + +impl PartialEq for ApproxEqWrapper<'_, T> { + fn eq(&self, other: &T) -> bool { + self.0.approxeq(other, self.1) + } +} + +impl core::fmt::Debug for ApproxEqWrapper<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + self.0.fmt(f) + } +} + +#[macro_export] +macro_rules! prop_assert_approxeq { + { $a:expr, $b:expr, $ulps:expr $(,)? } => { + { + use $crate::approxeq::ApproxEqWrapper; + let a = $a; + let b = $b; + proptest::prop_assert_eq!(ApproxEqWrapper(&a, $ulps), b); + } + }; +} diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 197c920e11eac..eb3d3f68bc2ea 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -12,6 +12,9 @@ pub mod wasm; #[macro_use] pub mod biteq; +#[macro_use] +pub mod approxeq; + pub mod subnormals; use subnormals::FlushSubnormals; @@ -185,6 +188,41 @@ pub fn test_unary_elementwise( + fv: &dyn Fn(Vector) -> VectorResult, + fs: &dyn Fn(Scalar) -> ScalarResult, + check: &dyn Fn([Scalar; LANES]) -> bool, + ulps: i64, +) where + Scalar: Copy + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + approxeq::ApproxEq + core::fmt::Debug + DefaultStrategy, + Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_1(&|x: [Scalar; LANES]| { + proptest::prop_assume!(check(x)); + let result_1: [ScalarResult; LANES] = fv(x.into()).into(); + let result_2: [ScalarResult; LANES] = x + .iter() + .copied() + .map(fs) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_approxeq!(result_1, result_2, ulps); + Ok(()) + }); +} + /// Test a unary vector function against a unary scalar function, applied elementwise. /// /// Where subnormals are flushed, use approximate equality. @@ -290,6 +328,44 @@ pub fn test_binary_elementwise< }); } +/// Test a binary vector function against a binary scalar function, applied elementwise. +pub fn test_binary_elementwise_approx< + Scalar1, + Scalar2, + ScalarResult, + Vector1, + Vector2, + VectorResult, + const LANES: usize, +>( + fv: &dyn Fn(Vector1, Vector2) -> VectorResult, + fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult, + check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool, + ulps: i64, +) where + Scalar1: Copy + core::fmt::Debug + DefaultStrategy, + Scalar2: Copy + core::fmt::Debug + DefaultStrategy, + ScalarResult: Copy + approxeq::ApproxEq + core::fmt::Debug + DefaultStrategy, + Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy, + Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy, + VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy, +{ + test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| { + proptest::prop_assume!(check(x, y)); + let result_1: [ScalarResult; LANES] = fv(x.into(), y.into()).into(); + let result_2: [ScalarResult; LANES] = x + .iter() + .copied() + .zip(y.iter().copied()) + .map(|(x, y)| fs(x, y)) + .collect::>() + .try_into() + .unwrap(); + crate::prop_assert_approxeq!(result_1, result_2, ulps); + Ok(()) + }); +} + /// Test a binary vector function against a binary scalar function, applied elementwise. /// /// Where subnormals are flushed, use approximate equality. @@ -528,8 +604,6 @@ macro_rules! test_lanes { use super::*; fn implementation() - where - core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount, $body #[cfg(target_arch = "wasm32")] @@ -628,8 +702,6 @@ macro_rules! test_lanes_panic { use super::*; fn implementation() - where - core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount, $body // test some odd and even non-power-of-2 lengths on miri diff --git a/rust-toolchain.toml b/rust-toolchain.toml index d17c6d2e88946..639d07df73374 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2025-01-16" +channel = "nightly-2026-01-26" components = ["rustfmt", "clippy", "miri", "rust-src"] From e33d70223e372bd93d76934242617e413a4cdcbb Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 18 Mar 2026 15:04:47 +0100 Subject: [PATCH 2/9] simd_fmin/fmax: make semantics and name consistent with scalar intrinsics --- crates/core_simd/src/simd/num/float.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index efd7c2469512b..175cbce4f58be 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -385,13 +385,13 @@ macro_rules! impl_trait { #[inline] fn simd_min(self, other: Self) -> Self { // Safety: `self` and `other` are float vectors - unsafe { core::intrinsics::simd::simd_fmin(self, other) } + unsafe { core::intrinsics::simd::simd_minimum_number_nsz(self, other) } } #[inline] fn simd_max(self, other: Self) -> Self { // Safety: `self` and `other` are floating point vectors - unsafe { core::intrinsics::simd::simd_fmax(self, other) } + unsafe { core::intrinsics::simd::simd_maximum_number_nsz(self, other) } } #[inline] From a52ff55f3888ea0bb15155b318a9ff99b3d436c5 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 11 Apr 2026 20:28:28 +0200 Subject: [PATCH 3/9] Merge commit '0557e3478104037c76c2e5be7ea21e56ebbaff6e' into sync-from-portable-simd-2026-04-11 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- Cargo.lock | 418 ++++++++++++++++--------- Cargo.toml | 5 + beginners-guide.md | 2 +- crates/core_simd/Cargo.toml | 8 +- crates/core_simd/src/alias.rs | 10 + crates/core_simd/src/cast.rs | 3 + crates/core_simd/src/lib.rs | 6 +- crates/core_simd/src/masks.rs | 26 +- crates/core_simd/src/ops.rs | 2 +- crates/core_simd/src/ops/unary.rs | 2 + crates/core_simd/src/simd/cmp/eq.rs | 2 +- crates/core_simd/src/simd/cmp/ord.rs | 2 +- crates/core_simd/src/simd/num/float.rs | 2 +- crates/core_simd/src/simd/prelude.rs | 2 +- crates/core_simd/src/vector.rs | 18 +- crates/core_simd/src/vendor.rs | 3 + crates/core_simd/src/vendor/hexagon.rs | 40 +++ crates/core_simd/tests/f16_ops.rs | 10 + crates/core_simd/tests/masks.rs | 13 + crates/core_simd/tests/round.rs | 8 + crates/std_float/src/lib.rs | 17 + crates/std_float/tests/float.rs | 4 +- crates/test_helpers/Cargo.toml | 2 +- crates/test_helpers/src/biteq.rs | 2 +- crates/test_helpers/src/lib.rs | 5 +- crates/test_helpers/src/subnormals.rs | 2 +- rust-toolchain.toml | 2 +- triagebot.toml | 44 +++ 29 files changed, 469 insertions(+), 193 deletions(-) create mode 100644 crates/core_simd/src/vendor/hexagon.rs create mode 100644 crates/core_simd/tests/f16_ops.rs create mode 100644 triagebot.toml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 31422b7934501..5d354305e56de 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,7 +10,7 @@ For a given vector math operation on TxN, please add tests for interactions with - [ ] 0 -For a given vector math operation on TxN where T is a float, please add tests for test interactions with: +For a given vector math operation on TxN where T is a float, please add tests for interactions with: - [ ] a really large number, larger than the mantissa - [ ] a really small "subnormal" number - [ ] NaN diff --git a/Cargo.lock b/Cargo.lock index 5a5f0d8907ae3..c3b950bd5069c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -10,41 +21,43 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" -version = "1.3.2" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] -name = "byteorder" -version = "1.5.0" +name = "cast" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.33" +version = "1.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" dependencies = [ + "find-msvc-tools", "shlex", ] [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "core_simd" version = "0.1.0" dependencies = [ + "getrandom", "proptest", "std_float", "test_helpers", @@ -52,6 +65,12 @@ dependencies = [ "wasm-bindgen-test", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "float-cmp" version = "0.10.0" @@ -61,32 +80,99 @@ dependencies = [ "num-traits", ] +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] [[package]] -name = "log" -version = "0.4.27" +name = "libc" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "minicov" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b" +checksum = "4869b6a491569605d66d3952bcdf03df789e5b536e5f0cf7758a7f08a55ae24d" dependencies = [ "cc", "walkdir", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -94,13 +180,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + +[[package]] +name = "pin-project-lite" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "ppv-lite86" @@ -113,52 +212,58 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" -version = "0.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e6c80c1139113c28ee4670dc50cc42915228b51f56a9e407f0ec60f966646f" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" dependencies = [ "bitflags", - "byteorder", "num-traits", "rand", "rand_chacha", "rand_xorshift", + "regex-syntax", + "unarray", ] [[package]] name = "quote" -version = "1.0.40" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "rand" -version = "0.7.3" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", "rand_core", - "rand_hc", ] [[package]] name = "rand_chacha" -version = "0.2.2" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core", @@ -166,28 +271,28 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.5.1" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ - "rand_core", + "getrandom", ] [[package]] name = "rand_xorshift" -version = "0.2.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77d416b86801d23dde1aa643023b775c3a462efc0ed96443add11546cdf1dca8" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" dependencies = [ "rand_core", ] +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "rustversion" version = "1.0.22" @@ -203,12 +308,61 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "std_float" version = "0.1.0" @@ -221,9 +375,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.106" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -238,11 +392,17 @@ dependencies = [ "proptest", ] +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "walkdir" @@ -255,49 +415,42 @@ dependencies = [ ] [[package]] -name = "wasm-bindgen" -version = "0.2.100" +name = "wasip2" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", + "wit-bindgen", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" +name = "wasm-bindgen" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a" dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "2d1faf851e778dfa54db7cd438b70758eba9755cb47403f3496edd7c8fc212f0" dependencies = [ - "cfg-if", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -305,44 +458,53 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.50" +version = "0.3.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3" +checksum = "1138411301a026d6662dc44e7076a74dbaa76a369312275eea5dee4d7dc68c7c" dependencies = [ + "async-trait", + "cast", "js-sys", + "libm", "minicov", + "nu-ansi-term", + "num-traits", + "oorandom", + "serde", + "serde_json", "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", + "wasm-bindgen-test-shared", ] [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.50" +version = "0.3.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" +checksum = "186ddfe8383ba7ae7927bae3bb7343fd1f03ba2dbaf1474410f0d831131c269b" dependencies = [ "proc-macro2", "quote", @@ -350,113 +512,63 @@ dependencies = [ ] [[package]] -name = "web-sys" -version = "0.3.77" +name = "wasm-bindgen-test-shared" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" -dependencies = [ - "js-sys", - "wasm-bindgen", -] +checksum = "f032e076ceb8d36d5921c6cef5bf447f2ca2bbd5439ce1683d68d1c99cc2be16" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ "windows-sys", ] [[package]] -name = "windows-sys" -version = "0.59.0" +name = "windows-link" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets", -] +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] -name = "windows-targets" -version = "0.52.6" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows-link", ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" +name = "wit-bindgen" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", "syn", ] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index 21d4584a9f4d9..883140bae3f62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,8 @@ opt-level = 2 [profile.test.package.test_helpers] opt-level = 2 + +[workspace.dependencies.proptest] +version = "1.11" +default-features = false +features = ["alloc", "f16"] diff --git a/beginners-guide.md b/beginners-guide.md index 4250a18315a6e..c56873ea4b8d7 100644 --- a/beginners-guide.md +++ b/beginners-guide.md @@ -56,7 +56,7 @@ The list notes the bit widths available at each feature level, though the operat ### Selecting Additional Target Features -If you want to enable support for a target feature within your build, generally you should use a [target-feature](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html#target-feature) setting within you `RUSTFLAGS` setting. +If you want to enable support for a target feature within your build, generally you should use a [target-feature](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html#target-feature) setting within your `RUSTFLAGS` setting. If you know that you're targeting a specific CPU you can instead use the [target-cpu](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html#target-cpu) flag and the compiler will enable the correct set of features for that CPU. diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml index 537ce459c07cd..6e576084ecfba 100644 --- a/crates/core_simd/Cargo.toml +++ b/crates/core_simd/Cargo.toml @@ -18,9 +18,13 @@ wasm-bindgen = "0.2" wasm-bindgen-test = "0.3" [dev-dependencies.proptest] -version = "0.10" +workspace = true + +# Enable the `wasm_js` feature so that getrandom works on wasm32-unknown-unknown. +[dev-dependencies.getrandom] +version = "0.3.4" default-features = false -features = ["alloc"] +features = ["wasm_js"] [dev-dependencies.test_helpers] path = "../test_helpers" diff --git a/crates/core_simd/src/alias.rs b/crates/core_simd/src/alias.rs index 23f121c46197c..6dcfcb660c26a 100644 --- a/crates/core_simd/src/alias.rs +++ b/crates/core_simd/src/alias.rs @@ -153,6 +153,16 @@ alias! { usizex64 64 } + f16 = { + f16x1 1 + f16x2 2 + f16x4 4 + f16x8 8 + f16x16 16 + f16x32 32 + f16x64 64 + } + f32 = { f32x1 1 f32x2 2 diff --git a/crates/core_simd/src/cast.rs b/crates/core_simd/src/cast.rs index 1c3592f807578..69dc7ba50d58d 100644 --- a/crates/core_simd/src/cast.rs +++ b/crates/core_simd/src/cast.rs @@ -44,6 +44,9 @@ impl SimdCast for u64 {} unsafe impl Sealed for usize {} impl SimdCast for usize {} // Safety: primitive number types can be cast to other primitive number types +unsafe impl Sealed for f16 {} +impl SimdCast for f16 {} +// Safety: primitive number types can be cast to other primitive number types unsafe impl Sealed for f32 {} impl SimdCast for f32 {} // Safety: primitive number types can be cast to other primitive number types diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs index fe26d99b9194c..413a886f6c5b8 100644 --- a/crates/core_simd/src/lib.rs +++ b/crates/core_simd/src/lib.rs @@ -1,17 +1,16 @@ #![no_std] #![feature( - const_eval_select, convert_float_to_int, + f16, core_intrinsics, decl_macro, - intra_doc_pointers, repr_simd, - simd_ffi, staged_api, prelude_import, ptr_metadata, rustc_attrs )] +#![cfg_attr(doc, feature(intra_doc_pointers))] #![cfg_attr( all( any(target_arch = "aarch64", target_arch = "arm64ec", target_arch = "arm",), @@ -31,6 +30,7 @@ any(target_arch = "powerpc", target_arch = "powerpc64"), feature(stdarch_powerpc) )] +#![cfg_attr(target_arch = "hexagon", feature(stdarch_hexagon))] #![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really #![deny( unsafe_op_in_unsafe_fn, diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index 3e2209556b66b..a5334afbe5f8b 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -371,22 +371,20 @@ where // * perform _unsigned_ reduce-min // * check if the result is -1 or an index - let index = Simd::from_array( - const { - let mut index = [0; N]; - let mut i = 0; - while i < N { - index[i] = i; - i += 1; - } - index - }, - ); + let index: Simd = const { + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = i; + i += 1; + } + // Safety: the input and output are integer vectors + unsafe { core::intrinsics::simd::simd_cast(Simd::from_array(index)) } + }; // Safety: the input and output are integer vectors - let index: Simd = unsafe { core::intrinsics::simd::simd_cast(index) }; - - let masked_index = self.select(index, Self::splat(true).to_simd()); + let masked_index: Simd = + unsafe { core::intrinsics::simd::simd_or((!self).to_simd(), index) }; // Safety: the input and output are integer vectors let masked_index: Simd = diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs index eb6601f734831..c0a06ed465129 100644 --- a/crates/core_simd/src/ops.rs +++ b/crates/core_simd/src/ops.rs @@ -245,7 +245,7 @@ for_base_ops! { // We don't need any special precautions here: // Floats always accept arithmetic ops, but may become NaN. for_base_ops! { - T = (f32, f64); + T = (f16, f32, f64); type Lhs = Simd; type Rhs = Simd; type Output = Self; diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs index e1c06167f9790..af7aa8a823d99 100644 --- a/crates/core_simd/src/ops/unary.rs +++ b/crates/core_simd/src/ops/unary.rs @@ -19,6 +19,8 @@ macro_rules! neg { } neg! { + impl Neg for Simd + impl Neg for Simd impl Neg for Simd diff --git a/crates/core_simd/src/simd/cmp/eq.rs b/crates/core_simd/src/simd/cmp/eq.rs index d553d6c040c91..76836404cbc4e 100644 --- a/crates/core_simd/src/simd/cmp/eq.rs +++ b/crates/core_simd/src/simd/cmp/eq.rs @@ -42,7 +42,7 @@ macro_rules! impl_number { } } -impl_number! { f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize } +impl_number! { f16, f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize } macro_rules! impl_mask { { $($integer:ty),* } => { diff --git a/crates/core_simd/src/simd/cmp/ord.rs b/crates/core_simd/src/simd/cmp/ord.rs index 5672fbbf54caa..5a4e74c753b5a 100644 --- a/crates/core_simd/src/simd/cmp/ord.rs +++ b/crates/core_simd/src/simd/cmp/ord.rs @@ -144,7 +144,7 @@ macro_rules! impl_float { } } -impl_float! { f32, f64 } +impl_float! { f16, f32, f64 } macro_rules! impl_mask { { $($integer:ty),* } => { diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index 175cbce4f58be..510f4c9eea393 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -444,4 +444,4 @@ macro_rules! impl_trait { } } -impl_trait! { f32 { bits: u32, mask: i32 }, f64 { bits: u64, mask: i64 } } +impl_trait! { f16 { bits: u16, mask: i16 }, f32 { bits: u32, mask: i32 }, f64 { bits: u64, mask: i64 } } diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs index e5d7a2aeb73df..6e93f16e10b1c 100644 --- a/crates/core_simd/src/simd/prelude.rs +++ b/crates/core_simd/src/simd/prelude.rs @@ -7,7 +7,7 @@ #[doc(no_inline)] pub use super::{ - Mask, Simd, + Mask, Select, Simd, ToBytes, cmp::{SimdOrd, SimdPartialEq, SimdPartialOrd}, num::{SimdFloat, SimdInt, SimdUint}, ptr::{SimdConstPtr, SimdMutPtr}, diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 5b3a689f3611b..fbef69f267aa5 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -363,7 +363,7 @@ where /// corresponding element in `enable` is `true`. /// /// When the element is disabled or out of bounds for the slice, that memory location - /// is not accessed and the corresponding value from `or` is passed through. + /// is not accessed and the default value for the element type is returned. /// /// # Examples /// ``` @@ -371,12 +371,11 @@ where /// # #[cfg(feature = "as_crate")] use core_simd::simd; /// # #[cfg(not(feature = "as_crate"))] use core::simd; /// # use simd::{Simd, Mask}; - /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; - /// let enable = Mask::from_array([true, true, false, true]); - /// let or = Simd::from_array([-5, -4, -3, -2]); + /// let vec: Vec = vec![10, 11, 12]; + /// let enable = Mask::from_array([false, true, true, true]); /// - /// let result = Simd::load_select(&vec, enable, or); - /// assert_eq!(result, Simd::from_array([10, 11, -3, 13])); + /// let result = Simd::load_select_or_default(&vec, enable); + /// assert_eq!(result, Simd::from_array([0, 11, 12, 0])); /// ``` #[must_use] #[inline] @@ -1147,6 +1146,13 @@ unsafe impl SimdElement for isize { type Mask = isize; } +impl Sealed for f16 {} + +// Safety: f16 is a valid SIMD element type, and is supported by this API +unsafe impl SimdElement for f16 { + type Mask = i16; +} + impl Sealed for f32 {} // Safety: f32 is a valid SIMD element type, and is supported by this API diff --git a/crates/core_simd/src/vendor.rs b/crates/core_simd/src/vendor.rs index 57536e4fc77dc..6b3c640c2f7c5 100644 --- a/crates/core_simd/src/vendor.rs +++ b/crates/core_simd/src/vendor.rs @@ -32,3 +32,6 @@ mod powerpc; #[cfg(target_arch = "loongarch64")] mod loongarch64; + +#[cfg(target_arch = "hexagon")] +mod hexagon; diff --git a/crates/core_simd/src/vendor/hexagon.rs b/crates/core_simd/src/vendor/hexagon.rs new file mode 100644 index 0000000000000..2b8ea55fde659 --- /dev/null +++ b/crates/core_simd/src/vendor/hexagon.rs @@ -0,0 +1,40 @@ +//! Conversions to Hexagon HVX SIMD types. + +use crate::simd::*; + +// HVX 128-byte mode (1024-bit vectors) +// Enable with: -C target-feature=+hvx-length128b +#[cfg(target_feature = "hvx-length128b")] +mod hvx_128b { + use super::*; + use core::arch::hexagon::v128::HvxVector; + + // Full vectors (1024-bit) map to HvxVector + from_transmute! { unsafe u16x64 => HvxVector } + from_transmute! { unsafe i16x64 => HvxVector } + from_transmute! { unsafe u32x32 => HvxVector } + from_transmute! { unsafe i32x32 => HvxVector } + from_transmute! { unsafe u64x16 => HvxVector } + from_transmute! { unsafe i64x16 => HvxVector } + + // FIXME: u8x128/i8x128 don't exist in portable-simd (max lane count is 64) + // u8x64/i8x64 are only 512-bit (half of HvxVector in 128B mode) +} + +// HVX 64-byte mode (512-bit vectors) +// Default when hvx-length128b is not specified +#[cfg(not(target_feature = "hvx-length128b"))] +mod hvx_64b { + use super::*; + use core::arch::hexagon::v64::HvxVector; + + // Full vectors (512-bit) map to HvxVector + from_transmute! { unsafe u8x64 => HvxVector } + from_transmute! { unsafe i8x64 => HvxVector } + from_transmute! { unsafe u16x32 => HvxVector } + from_transmute! { unsafe i16x32 => HvxVector } + from_transmute! { unsafe u32x16 => HvxVector } + from_transmute! { unsafe i32x16 => HvxVector } + from_transmute! { unsafe u64x8 => HvxVector } + from_transmute! { unsafe i64x8 => HvxVector } +} diff --git a/crates/core_simd/tests/f16_ops.rs b/crates/core_simd/tests/f16_ops.rs new file mode 100644 index 0000000000000..f89bdf4738f8b --- /dev/null +++ b/crates/core_simd/tests/f16_ops.rs @@ -0,0 +1,10 @@ +#![feature(portable_simd)] +#![feature(f16)] + +#[macro_use] +mod ops_macros; + +// FIXME: some f16 operations cause rustc to hang on wasm simd +// https://github.com/llvm/llvm-project/issues/189251 +#[cfg(not(all(target_arch = "wasm32", target_feature = "simd128")))] +impl_float_tests! { f16, i16 } diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs index 53fb2367b6055..98a74be8e3955 100644 --- a/crates/core_simd/tests/masks.rs +++ b/crates/core_simd/tests/masks.rs @@ -133,6 +133,19 @@ macro_rules! test_mask_api { cast_impl::(); cast_impl::(); } + + #[test] + fn first_set() { + for bitmask in 0..=u8::MAX { + let mask = Mask::<$type, 8>::from_bitmask(bitmask as u64); + let expected = if bitmask == 0 { + None + } else { + Some(bitmask.trailing_zeros() as usize) + }; + assert_eq!(mask.first_set(), expected); + } + } } } } diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs index 4c1ac3c36f894..95b17f415822b 100644 --- a/crates/core_simd/tests/round.rs +++ b/crates/core_simd/tests/round.rs @@ -42,6 +42,14 @@ macro_rules! float_rounding_test { ) } + fn round_ties_even() { + test_helpers::test_unary_elementwise( + &Vector::::round_ties_even, + &Scalar::round_ties_even, + &|_| true, + ) + } + fn fract() { test_helpers::test_unary_elementwise_flush_subnormals( &Vector::::fract, diff --git a/crates/std_float/src/lib.rs b/crates/std_float/src/lib.rs index b269efc9b1d76..ff3525452231a 100644 --- a/crates/std_float/src/lib.rs +++ b/crates/std_float/src/lib.rs @@ -2,6 +2,7 @@ feature = "as_crate", feature(core_intrinsics), feature(portable_simd), + feature(f16), allow(internal_features) )] #[cfg(not(feature = "as_crate"))] @@ -156,14 +157,30 @@ pub trait StdFloat: Sealed + Sized { unsafe { intrinsics::simd_trunc(self) } } + /// Rounds each element to the nearest integer-valued float. + /// Ties are resolved by rounding to the number with an even least significant digit. + #[must_use = "method returns a new vector and does not mutate the original value"] + #[inline] + fn round_ties_even(self) -> Self { + unsafe { intrinsics::simd_round_ties_even(self) } + } + /// Returns the floating point's fractional value, with its integer part removed. #[must_use = "method returns a new vector and does not mutate the original value"] fn fract(self) -> Self; } +impl Sealed for Simd {} impl Sealed for Simd {} impl Sealed for Simd {} +impl StdFloat for Simd { + #[inline] + fn fract(self) -> Self { + self - self.trunc() + } +} + impl StdFloat for Simd { #[inline] fn fract(self) -> Self { diff --git a/crates/std_float/tests/float.rs b/crates/std_float/tests/float.rs index c608ba49564e0..0fa5da3dca506 100644 --- a/crates/std_float/tests/float.rs +++ b/crates/std_float/tests/float.rs @@ -25,7 +25,7 @@ macro_rules! unary_approx_test { &core_simd::simd::Simd::<$scalar, LANES>::$func, &$scalar::$func, &|_| true, - 8, + 16, ) } )* @@ -71,7 +71,7 @@ macro_rules! impl_tests { mod $scalar { use std_float::StdFloat; - unary_test! { $scalar, sqrt, ceil, floor, round, trunc } + unary_test! { $scalar, sqrt, ceil, floor, round, trunc, round_ties_even } ternary_test! { $scalar, mul_add } // https://github.com/rust-lang/miri/issues/3555 diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml index 408bb04c7aa40..da7ef7bd9945c 100644 --- a/crates/test_helpers/Cargo.toml +++ b/crates/test_helpers/Cargo.toml @@ -5,5 +5,5 @@ edition = "2021" publish = false [dependencies] -proptest = { version = "0.10", default-features = false, features = ["alloc"] } +proptest = { workspace = true, features = ["alloc", "std"] } float-cmp = "0.10" diff --git a/crates/test_helpers/src/biteq.rs b/crates/test_helpers/src/biteq.rs index cbc20cda0d626..36761e37dea76 100644 --- a/crates/test_helpers/src/biteq.rs +++ b/crates/test_helpers/src/biteq.rs @@ -53,7 +53,7 @@ macro_rules! impl_float_biteq { }; } -impl_float_biteq! { f32, f64 } +impl_float_biteq! { f16, f32, f64 } impl BitEq for *const T { fn biteq(&self, other: &Self) -> bool { diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index eb3d3f68bc2ea..82adb06d8a9d4 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -1,7 +1,7 @@ -#![feature(powerpc_target_feature)] +#![feature(f16)] #![cfg_attr( any(target_arch = "powerpc", target_arch = "powerpc64"), - feature(stdarch_powerpc) + feature(powerpc_target_feature, stdarch_powerpc) )] pub mod array; @@ -47,6 +47,7 @@ impl_num! { u16 } impl_num! { u32 } impl_num! { u64 } impl_num! { usize } +impl_num! { f16 } impl_num! { f32 } impl_num! { f64 } diff --git a/crates/test_helpers/src/subnormals.rs b/crates/test_helpers/src/subnormals.rs index b5f19ba47b819..44dfbb3d6c955 100644 --- a/crates/test_helpers/src/subnormals.rs +++ b/crates/test_helpers/src/subnormals.rs @@ -39,7 +39,7 @@ macro_rules! impl_else { } } -impl_float! { f32, f64 } +impl_float! { f16, f32, f64 } impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize } /// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs. diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 639d07df73374..6a58e59fb93ef 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2026-01-26" +channel = "nightly-2026-03-18" components = ["rustfmt", "clippy", "miri", "rust-src"] diff --git a/triagebot.toml b/triagebot.toml new file mode 100644 index 0000000000000..43048b5e4514c --- /dev/null +++ b/triagebot.toml @@ -0,0 +1,44 @@ +## See for documentation +## of these features. + +# Allow users to use labels commands. +# Documentation at: https://forge.rust-lang.org/triagebot/labeling.html +[relabel] +allow-unauthenticated = [ + "A-*", + "C-*", + "E-*", + "F-*", + "I-*", + "ISA-*", + "O-*", +] + +# Allow users to assign 'r?` someone to an issue or PR. +# Documentation at: https://forge.rust-lang.org/triagebot/issue-assignment.html +[assign] +warn_non_default_branch = true + +# Warns when a PR contains merge commits +# Documentation at: https://forge.rust-lang.org/triagebot/no-merge.html +[no-merges] +exclude_titles = ["Sync from"] + +# Canonicalize issue numbers to avoid closing the wrong issue +# when commits are included in upstream sync, as well as warning links in commits. +# Documentation at: https://forge.rust-lang.org/triagebot/issue-links.html +[issue-links] +check-commits = "uncanonicalized" + +# Enable issue transfers within the org +# Documentation at: https://forge.rust-lang.org/triagebot/transfer.html +[transfer] + +# Enable comments linking to triagebot range-diff when a PR is rebased +# onto a different base commit +# Documentation at: https://forge.rust-lang.org/triagebot/range-diff.html +[range-diff] + +# Add link to the review body to review changes since posting it. +# Documentation at: https://forge.rust-lang.org/triagebot/review-changes-since.html +[review-changes-since] From 81e7d7d99ed1f2890f8e2a32291a27d7088e094e Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 12 Apr 2026 01:43:23 +0200 Subject: [PATCH 4/9] improve how `proptest` errors are displayed (#517) --- crates/test_helpers/src/lib.rs | 39 +++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs index 82adb06d8a9d4..ce3680ac2c306 100644 --- a/crates/test_helpers/src/lib.rs +++ b/crates/test_helpers/src/lib.rs @@ -122,12 +122,23 @@ pub fn make_runner() -> proptest::test_runner::TestRunner { proptest::test_runner::TestRunner::new(proptest::test_runner::Config::with_cases(4)) } +#[track_caller] +fn unwrap_test_error( + x: Result>, +) -> T { + // Using the `Display` instance of the error is much more readable. + match x { + Ok(v) => v, + Err(e) => panic!("{e}"), + } +} + /// Test a function that takes a single value. pub fn test_1( f: &dyn Fn(A) -> proptest::test_runner::TestCaseResult, ) { let mut runner = make_runner(); - runner.run(&A::default_strategy(), f).unwrap(); + unwrap_test_error(runner.run(&A::default_strategy(), f)) } /// Test a function that takes two values. @@ -135,11 +146,11 @@ pub fn test_2 proptest::test_runner::TestCaseResult, ) { let mut runner = make_runner(); - runner - .run(&(A::default_strategy(), B::default_strategy()), |(a, b)| { + unwrap_test_error( + runner.run(&(A::default_strategy(), B::default_strategy()), |(a, b)| { f(a, b) - }) - .unwrap(); + }), + ) } /// Test a function that takes two values. @@ -151,16 +162,14 @@ pub fn test_3< f: &dyn Fn(A, B, C) -> proptest::test_runner::TestCaseResult, ) { let mut runner = make_runner(); - runner - .run( - &( - A::default_strategy(), - B::default_strategy(), - C::default_strategy(), - ), - |(a, b, c)| f(a, b, c), - ) - .unwrap(); + unwrap_test_error(runner.run( + &( + A::default_strategy(), + B::default_strategy(), + C::default_strategy(), + ), + |(a, b, c)| f(a, b, c), + )); } /// Test a unary vector function against a unary scalar function, applied elementwise. From 3bd3fffa7fdcab25692afb45776a1d5fbad620df Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Thu, 16 Apr 2026 17:47:51 +0200 Subject: [PATCH 5/9] float reduce_max/min: fix SNaN treatment (#515) --- crates/core_simd/src/simd/num/float.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs index 9f27e527f00fc..1fa7991920fa7 100644 --- a/crates/core_simd/src/simd/num/float.rs +++ b/crates/core_simd/src/simd/num/float.rs @@ -430,14 +430,14 @@ macro_rules! impl_trait { #[inline] fn reduce_max(self) -> Self::Scalar { - // Safety: `self` is a float vector - unsafe { core::intrinsics::simd::simd_reduce_max(self) } + // LLVM has no intrinsic we can use here + // (https://github.com/llvm/llvm-project/issues/185827). + self.as_array().iter().copied().fold(Self::Scalar::NAN, Self::Scalar::max) } #[inline] fn reduce_min(self) -> Self::Scalar { - // Safety: `self` is a float vector - unsafe { core::intrinsics::simd::simd_reduce_min(self) } + self.as_array().iter().copied().fold(Self::Scalar::NAN, Self::Scalar::min) } } )* From f7893ba153232b3fbf52e2b03e1c1f4407d000ee Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 20 Apr 2026 17:00:51 +0200 Subject: [PATCH 6/9] add `f16` types to the prelude (#521) --- crates/core_simd/src/simd/prelude.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/core_simd/src/simd/prelude.rs b/crates/core_simd/src/simd/prelude.rs index 6e93f16e10b1c..51b8def3d6eed 100644 --- a/crates/core_simd/src/simd/prelude.rs +++ b/crates/core_simd/src/simd/prelude.rs @@ -14,6 +14,10 @@ pub use super::{ simd_swizzle, }; +#[rustfmt::skip] +#[doc(no_inline)] +pub use super::{f16x1, f16x2, f16x4, f16x8, f16x16, f16x32, f16x64}; + #[rustfmt::skip] #[doc(no_inline)] pub use super::{f32x1, f32x2, f32x4, f32x8, f32x16, f32x32, f32x64}; From 9447da6f88b2f6cf22cbf7d01b3cc19978967001 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Thu, 23 Apr 2026 15:04:48 -0400 Subject: [PATCH 7/9] Add bound check hint to mask first_set (#523) --- crates/core_simd/src/masks.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index a5334afbe5f8b..cb5d54020f7fc 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -400,7 +400,15 @@ where if min_index.eq(T::TRUE) { None } else { - Some(min_index.to_usize()) + let min_index = min_index.to_usize(); + + // Allow eliminating bounds checks when using the index + // Safety: the index can't exceed the number of elements in the vector + unsafe { + core::hint::assert_unchecked(min_index < N); + } + + Some(min_index) } } } From 82ee1e04c63f893668962e5abc2696ea9b2ce00a Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 12 Apr 2026 23:47:43 +0200 Subject: [PATCH 8/9] use larger ulps for `log` because it uses 2 inexact operations --- crates/std_float/tests/float.rs | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/crates/std_float/tests/float.rs b/crates/std_float/tests/float.rs index 0fa5da3dca506..f97e1123c8527 100644 --- a/crates/std_float/tests/float.rs +++ b/crates/std_float/tests/float.rs @@ -33,23 +33,6 @@ macro_rules! unary_approx_test { } } -macro_rules! binary_approx_test { - { $scalar:tt, $($func:tt),+ } => { - test_helpers::test_lanes! { - $( - fn $func() { - test_helpers::test_binary_elementwise_approx( - &core_simd::simd::Simd::<$scalar, LANES>::$func, - &$scalar::$func, - &|_, _| true, - 16, - ) - } - )* - } - } -} - macro_rules! ternary_test { { $scalar:tt, $($func:tt),+ } => { test_helpers::test_lanes! { @@ -76,7 +59,19 @@ macro_rules! impl_tests { // https://github.com/rust-lang/miri/issues/3555 unary_approx_test! { $scalar, sin, cos, exp, exp2, ln, log2, log10 } - binary_approx_test! { $scalar, log } + + // The implementation of log is a.ln() / b.ln(), so there are 2 inexact operations, + // hence a larger ulps is needed. + test_helpers::test_lanes! { + fn log() { + test_helpers::test_binary_elementwise_approx( + &core_simd::simd::Simd::<$scalar, LANES>::log, + &$scalar::log, + &|_, _| true, + 32, + ) + } + } test_helpers::test_lanes! { fn fract() { From b1042604758be230eb352551451695ce923d3251 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 11 Apr 2026 20:33:22 +0200 Subject: [PATCH 9/9] bump toolchain to `nightly-2026-04-28` --- rust-toolchain.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 6a58e59fb93ef..27d2dd6efbbb8 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2026-03-18" +channel = "nightly-2026-04-28" components = ["rustfmt", "clippy", "miri", "rust-src"]