mirror of
https://github.com/huggingface/xet-core.git
synced 2026-06-04 13:30:29 +08:00
Currently, the full test validation is rather heavy, but running local
tests often fails to catch many issues due to the tests that probe the
full stack. This PR adds a smoke-test path that runs a meaningful subset
of the tests across the workspace that covers most errors. This runs in
about 1/8 of the time as cargo test, so it's useful to use in speeding
up AI model iteration.
In addition, a few intermittent failures were also fixed.
There should be no runtime functionality change.
<!-- CURSOR_SUMMARY -->
---
> [!NOTE]
> **Low Risk**
> Low risk since changes are limited to Cargo configuration and test
gating; no production code paths are modified. Main risk is accidentally
skipping too much coverage or misconfiguring feature flags in CI/local
workflows.
>
> **Overview**
> Adds a new `cargo smoke-test` workflow by introducing a `smoke-test`
Cargo profile and a `cargo` alias that runs `test` with per-crate
`smoke-test` features enabled.
>
> Defines `smoke-test` features across multiple crates and uses
`#[cfg_attr(feature = "smoke-test", ignore)]` / `#[cfg(... not(feature =
"smoke-test"))]` to skip long-running, concurrency-heavy, or full-stack
integration tests during smoke runs.
>
> Tightens test robustness by making `SafeFileCreator` permission
assertions umask-tolerant (require owner read/write rather than an exact
`0o644`).
>
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
5d53009652. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
---------
Co-authored-by: Hoyt Koepke <hoytak@xethub.com>
89 lines
2.5 KiB
TOML
89 lines
2.5 KiB
TOML
[package]
|
|
name = "xet-core-structures"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
license.workspace = true
|
|
repository.workspace = true
|
|
description = "Core data structures including MerkleHash, metadata shards, and xorb objects"
|
|
|
|
[lib]
|
|
name = "xet_core_structures"
|
|
path = "src/lib.rs"
|
|
|
|
[[bench]]
|
|
name = "compression_bench"
|
|
harness = false
|
|
bench = true
|
|
|
|
[[bench]]
|
|
name = "bg_split_regroup_bench"
|
|
harness = false
|
|
bench = true
|
|
|
|
[dependencies]
|
|
xet-runtime = { version = "1.4.0", path = "../xet_runtime" }
|
|
|
|
async-trait = { workspace = true }
|
|
base64 = { workspace = true }
|
|
blake3 = { workspace = true }
|
|
bytes = { workspace = true }
|
|
clap = { workspace = true }
|
|
countio = { workspace = true }
|
|
csv = { workspace = true }
|
|
futures = { workspace = true }
|
|
futures-util = { workspace = true }
|
|
half = { workspace = true }
|
|
heapify = { workspace = true }
|
|
itertools = { workspace = true }
|
|
lazy_static = { workspace = true }
|
|
lz4_flex = { workspace = true }
|
|
more-asserts = { workspace = true }
|
|
rand = { workspace = true, features = ["small_rng"] }
|
|
regex = { workspace = true }
|
|
safe-transmute = { workspace = true }
|
|
serde = { workspace = true }
|
|
static_assertions = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
thiserror = { workspace = true }
|
|
tracing = { workspace = true }
|
|
|
|
[target.'cfg(not(target_family = "wasm"))'.dependencies]
|
|
bincode = { workspace = true }
|
|
bytemuck = { workspace = true }
|
|
heed = { workspace = true }
|
|
tokio = { workspace = true, features = ["time", "rt", "macros", "sync", "test-util", "io-util", "rt-multi-thread"] }
|
|
tokio-util = { workspace = true, features = ["io"] }
|
|
uuid = { workspace = true, features = ["v4"] }
|
|
|
|
[target.'cfg(target_family = "wasm")'.dependencies]
|
|
getrandom = { workspace = true, features = ["wasm_js"] }
|
|
tokio = { workspace = true, features = ["sync", "macros", "io-util", "rt", "time"] }
|
|
uuid = { workspace = true, features = ["v4", "js"] }
|
|
web-time = { workspace = true }
|
|
|
|
[dev-dependencies]
|
|
bincode = { workspace = true }
|
|
futures-util = { workspace = true }
|
|
rand = { workspace = true }
|
|
serial_test = { workspace = true }
|
|
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
|
|
|
|
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
|
|
tempfile = { workspace = true }
|
|
|
|
[[bin]]
|
|
name = "shard_benchmark"
|
|
path = "src/metadata_shard/shard_benchmark.rs"
|
|
|
|
[[bin]]
|
|
path = "src/xorb_object/byte_grouping/compression_stats/collect_compression_stats.rs"
|
|
name = "collect_compression_stats"
|
|
|
|
[[bin]]
|
|
path = "src/xorb_object/byte_grouping/bg4_prediction_benchmark.rs"
|
|
name = "bg4_prediction_benchmark"
|
|
|
|
[features]
|
|
strict = []
|
|
smoke-test = []
|