mirror of
https://github.com/huggingface/xet-core.git
synced 2026-06-04 13:30:29 +08:00
This PR adds crates.io-facing metadata (homepage, readme, keywords, categories) for the publishable crates, along with crate README files and concise crate-level docs so crates.io and docs.rs pages have better context.
104 lines
2.7 KiB
TOML
104 lines
2.7 KiB
TOML
[package]
|
|
name = "xet-data"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
license.workspace = true
|
|
homepage.workspace = true
|
|
repository.workspace = true
|
|
description = "Data processing pipeline for chunking, deduplication, and file reconstruction; used in the Hugging Face Xet client tools. Intended to be used through the API in the hf-xet package."
|
|
readme = "README.md"
|
|
keywords = ["huggingface"]
|
|
categories = ["artificial-intelligence", "data-structures", "filesystem"]
|
|
|
|
[lib]
|
|
name = "xet_data"
|
|
path = "src/lib.rs"
|
|
doctest = false
|
|
|
|
[dependencies]
|
|
xet-runtime = { version = "1.4.0", path = "../xet_runtime" }
|
|
xet-core-structures = { version = "1.4.0", path = "../xet_core_structures" }
|
|
xet-client = { version = "1.4.0", path = "../xet_client" }
|
|
|
|
anyhow = { workspace = true }
|
|
async-trait = { workspace = true }
|
|
bytes = { workspace = true }
|
|
chrono = { workspace = true }
|
|
clap = { workspace = true }
|
|
gearhash = { workspace = true }
|
|
http = { workspace = true }
|
|
itertools = { workspace = true }
|
|
lazy_static = { workspace = true }
|
|
more-asserts = { workspace = true }
|
|
rand = { workspace = true }
|
|
serde = { workspace = true }
|
|
serde_json = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
thiserror = { workspace = true }
|
|
tokio-util = { workspace = true }
|
|
tracing = { workspace = true }
|
|
ulid = { workspace = true }
|
|
url = { workspace = true }
|
|
walkdir = { workspace = true }
|
|
pyo3 = { version = "0.26", features = ["abi3-py37"], optional = true }
|
|
|
|
[target.'cfg(target_family = "wasm")'.dependencies]
|
|
tokio = { workspace = true, features = [
|
|
"sync",
|
|
"macros",
|
|
"io-util",
|
|
"rt",
|
|
"time",
|
|
] }
|
|
|
|
[target.'cfg(not(target_family = "wasm"))'.dependencies]
|
|
tokio = { workspace = true, features = ["rt-multi-thread", "rt", "time"] }
|
|
|
|
[target.'cfg(not(target_os = "windows"))'.dependencies]
|
|
sha2 = { workspace = true, features = ["asm"] }
|
|
|
|
[target.'cfg(target_os = "windows")'.dependencies]
|
|
sha2 = { workspace = true }
|
|
|
|
[[bin]]
|
|
name = "x"
|
|
path = "src/processing/bin/example.rs"
|
|
|
|
[[bin]]
|
|
name = "xtool"
|
|
path = "src/processing/bin/xtool.rs"
|
|
|
|
[[example]]
|
|
name = "chunk"
|
|
path = "examples/chunk/main.rs"
|
|
|
|
[[example]]
|
|
name = "hash"
|
|
path = "examples/hash/main.rs"
|
|
|
|
[[example]]
|
|
name = "xorb-check"
|
|
path = "examples/xorb-check/main.rs"
|
|
|
|
[dev-dependencies]
|
|
criterion = { version = "0.4", features = ["async_tokio"] }
|
|
ctor = { workspace = true }
|
|
dirs = { workspace = true }
|
|
rand = { workspace = true }
|
|
regex = { workspace = true }
|
|
serial_test = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
tracing-test = { workspace = true }
|
|
|
|
[[bench]]
|
|
name = "reconstruction_bench"
|
|
harness = false
|
|
|
|
[features]
|
|
strict = []
|
|
smoke-test = []
|
|
expensive_tests = []
|
|
fd-track = ["xet-runtime/fd-track", "xet-client/fd-track"]
|
|
python = ["dep:pyo3"]
|
|
simulation = ["xet-client/simulation", "xet-core-structures/simulation"]
|