mirror of
https://github.com/huggingface/xet-core.git
synced 2026-06-04 13:30:29 +08:00
Currently, the full test validation is rather heavy, but running local
tests often fails to catch many issues due to the tests that probe the
full stack. This PR adds a smoke-test path that runs a meaningful subset
of the tests across the workspace that covers most errors. This runs in
about 1/8 of the time as cargo test, so it's useful to use in speeding
up AI model iteration.
In addition, a few intermittent failures were also fixed.
There should be no runtime functionality change.
<!-- CURSOR_SUMMARY -->
---
> [!NOTE]
> **Low Risk**
> Low risk since changes are limited to Cargo configuration and test
gating; no production code paths are modified. Main risk is accidentally
skipping too much coverage or misconfiguring feature flags in CI/local
workflows.
>
> **Overview**
> Adds a new `cargo smoke-test` workflow by introducing a `smoke-test`
Cargo profile and a `cargo` alias that runs `test` with per-crate
`smoke-test` features enabled.
>
> Defines `smoke-test` features across multiple crates and uses
`#[cfg_attr(feature = "smoke-test", ignore)]` / `#[cfg(... not(feature =
"smoke-test"))]` to skip long-running, concurrency-heavy, or full-stack
integration tests during smoke runs.
>
> Tightens test robustness by making `SafeFileCreator` permission
assertions umask-tolerant (require owner read/write rather than an exact
`0o644`).
>
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
5d53009652. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
---------
Co-authored-by: Hoyt Koepke <hoytak@xethub.com>
88 lines
2.3 KiB
TOML
88 lines
2.3 KiB
TOML
[package]
|
|
name = "xet-data"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
license.workspace = true
|
|
repository.workspace = true
|
|
description = "Data processing pipeline for chunking, deduplication, and file reconstruction; used in the Hugging Face Xet client tools"
|
|
|
|
[lib]
|
|
name = "xet_data"
|
|
path = "src/lib.rs"
|
|
doctest = false
|
|
|
|
[dependencies]
|
|
xet-runtime = { version = "1.4.0", path = "../xet_runtime" }
|
|
xet-core-structures = { version = "1.4.0", path = "../xet_core_structures" }
|
|
xet-client = { version = "1.4.0", path = "../xet_client" }
|
|
|
|
anyhow = { workspace = true }
|
|
async-trait = { workspace = true }
|
|
bytes = { workspace = true }
|
|
chrono = { workspace = true }
|
|
clap = { workspace = true }
|
|
gearhash = { workspace = true }
|
|
http = { workspace = true }
|
|
itertools = { workspace = true }
|
|
lazy_static = { workspace = true }
|
|
more-asserts = { workspace = true }
|
|
prometheus = { workspace = true }
|
|
rand = { workspace = true }
|
|
regex = { workspace = true }
|
|
serde = { workspace = true }
|
|
serde_json = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
thiserror = { workspace = true }
|
|
tokio-util = { workspace = true }
|
|
tracing = { workspace = true }
|
|
ulid = {workspace = true }
|
|
url = { workspace = true }
|
|
walkdir = { workspace = true }
|
|
pyo3 = { version = "0.26", features = ["abi3-py37"], optional = true }
|
|
|
|
[target.'cfg(target_family = "wasm")'.dependencies]
|
|
tokio = { workspace = true, features = ["sync", "macros", "io-util", "rt", "time"] }
|
|
|
|
[target.'cfg(not(target_family = "wasm"))'.dependencies]
|
|
tokio = { workspace = true, features = ["rt-multi-thread", "rt", "time"] }
|
|
|
|
[target.'cfg(not(target_os = "windows"))'.dependencies]
|
|
sha2 = { workspace = true, features = ["asm"] }
|
|
|
|
[target.'cfg(target_os = "windows")'.dependencies]
|
|
sha2 = { workspace = true }
|
|
|
|
[[bin]]
|
|
name = "x"
|
|
path = "src/processing/bin/example.rs"
|
|
|
|
[[bin]]
|
|
name = "xtool"
|
|
path = "src/processing/bin/xtool.rs"
|
|
|
|
[[example]]
|
|
name = "chunk"
|
|
path = "examples/chunk/main.rs"
|
|
|
|
[[example]]
|
|
name = "hash"
|
|
path = "examples/hash/main.rs"
|
|
|
|
[[example]]
|
|
name = "xorb-check"
|
|
path = "examples/xorb-check/main.rs"
|
|
|
|
[dev-dependencies]
|
|
ctor = { workspace = true }
|
|
dirs = { workspace = true }
|
|
rand = { workspace = true }
|
|
serial_test = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
tracing-test = { workspace = true }
|
|
|
|
[features]
|
|
strict = []
|
|
smoke-test = []
|
|
expensive_tests = []
|
|
python = ["dep:pyo3"]
|