Files
xet-core/xet_data/Cargo.toml
Hoyt Koepke 602d7679f6 Add cargo smoke-test for rapid full-workspace testing. (#741)
Currently, the full test validation is rather heavy, but running local
tests often fails to catch many issues due to the tests that probe the
full stack. This PR adds a smoke-test path that runs a meaningful subset
of the tests across the workspace that covers most errors. This runs in
about 1/8 of the time as cargo test, so it's useful to use in speeding
up AI model iteration.

In addition, a few intermittent failures were also fixed. 

There should be no runtime functionality change.

<!-- CURSOR_SUMMARY -->
---

> [!NOTE]
> **Low Risk**
> Low risk since changes are limited to Cargo configuration and test
gating; no production code paths are modified. Main risk is accidentally
skipping too much coverage or misconfiguring feature flags in CI/local
workflows.
> 
> **Overview**
> Adds a new `cargo smoke-test` workflow by introducing a `smoke-test`
Cargo profile and a `cargo` alias that runs `test` with per-crate
`smoke-test` features enabled.
> 
> Defines `smoke-test` features across multiple crates and uses
`#[cfg_attr(feature = "smoke-test", ignore)]` / `#[cfg(... not(feature =
"smoke-test"))]` to skip long-running, concurrency-heavy, or full-stack
integration tests during smoke runs.
> 
> Tightens test robustness by making `SafeFileCreator` permission
assertions umask-tolerant (require owner read/write rather than an exact
`0o644`).
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
5d53009652. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->

---------

Co-authored-by: Hoyt Koepke <hoytak@xethub.com>
2026-03-20 13:32:38 -07:00

88 lines
2.3 KiB
TOML

[package]
name = "xet-data"
version.workspace = true
edition.workspace = true
license.workspace = true
repository.workspace = true
description = "Data processing pipeline for chunking, deduplication, and file reconstruction; used in the Hugging Face Xet client tools"
[lib]
name = "xet_data"
path = "src/lib.rs"
doctest = false
[dependencies]
xet-runtime = { version = "1.4.0", path = "../xet_runtime" }
xet-core-structures = { version = "1.4.0", path = "../xet_core_structures" }
xet-client = { version = "1.4.0", path = "../xet_client" }
anyhow = { workspace = true }
async-trait = { workspace = true }
bytes = { workspace = true }
chrono = { workspace = true }
clap = { workspace = true }
gearhash = { workspace = true }
http = { workspace = true }
itertools = { workspace = true }
lazy_static = { workspace = true }
more-asserts = { workspace = true }
prometheus = { workspace = true }
rand = { workspace = true }
regex = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tempfile = { workspace = true }
thiserror = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
ulid = {workspace = true }
url = { workspace = true }
walkdir = { workspace = true }
pyo3 = { version = "0.26", features = ["abi3-py37"], optional = true }
[target.'cfg(target_family = "wasm")'.dependencies]
tokio = { workspace = true, features = ["sync", "macros", "io-util", "rt", "time"] }
[target.'cfg(not(target_family = "wasm"))'.dependencies]
tokio = { workspace = true, features = ["rt-multi-thread", "rt", "time"] }
[target.'cfg(not(target_os = "windows"))'.dependencies]
sha2 = { workspace = true, features = ["asm"] }
[target.'cfg(target_os = "windows")'.dependencies]
sha2 = { workspace = true }
[[bin]]
name = "x"
path = "src/processing/bin/example.rs"
[[bin]]
name = "xtool"
path = "src/processing/bin/xtool.rs"
[[example]]
name = "chunk"
path = "examples/chunk/main.rs"
[[example]]
name = "hash"
path = "examples/hash/main.rs"
[[example]]
name = "xorb-check"
path = "examples/xorb-check/main.rs"
[dev-dependencies]
ctor = { workspace = true }
dirs = { workspace = true }
rand = { workspace = true }
serial_test = { workspace = true }
tempfile = { workspace = true }
tracing-test = { workspace = true }
[features]
strict = []
smoke-test = []
expensive_tests = []
python = ["dep:pyo3"]