mirror of
https://github.com/huggingface/xet-core.git
synced 2026-06-04 13:30:29 +08:00
This PR is a massive rearrangement of the code base into 5 packages intended for release on cargo. The directories and corresponding packages are: 1. xet_runtime/ — compiles into the xet-runtime package. Contains the runtime, config, and logging management. 2. xet_core_structures/ — compiles into the xet-core-structures package. Contains core data structures for hashing, shards, and xorbs as well as internal data structures that depend on these. 3. xet_client/ — compiles into the xet-client package, contains client code for remotely connecting to the Hugging Face servers. 4. xet_data/ — compiles into the xet-data package, contains the data processing pipeline: chunking/deduplication, file reconstruction, clean/smudge operations, and progress tracking. 5. xet_pkg/ — compiles into the hf-xet package, provides the top-level session-based API for file upload and download with user-facing error categorization. This is the primary package downstream dependencies would use. This also contains a single summary error type, XetError, that translates cleanly into python error types. In addition, the other tools are: - git_xet/ — the git_xet CLI binary crate (location preserved). - hf_xet/ -- the hf_xet python package (location preserved). - simulation/ — the simulation crate for upload scenario benchmarking. - wasm/ -- the wasm objects. The full description — and information for an AI agent to use to update downstream dependencies — is at api_changes/update_260309_package_restructure.md. Summary of moves: - xet_runtime: became xet_runtime::core inside xet_runtime/. - utils: became xet_runtime::utils inside xet_runtime/. - xet_config: became xet_runtime::config inside xet_runtime/. - xet_logging: became xet_runtime::logging inside xet_runtime/. - error_printer: became xet_runtime::error_printer inside xet_runtime/. - file_utils: became xet_runtime::file_utils inside xet_runtime/. - merklehash: became xet_core_structures::merklehash inside xet_core_structures/. - mdb_shard: became xet_core_structures::metadata_shard inside xet_core_structures/. - xorb_object: became xet_core_structures::xorb_object inside xet_core_structures/. - cas_client: became xet_client::cas_client inside xet_client/. - hub_client: became xet_client::hub_client inside xet_client/. - cas_types: became xet_client::cas_types inside xet_client/. - chunk_cache: became xet_client::chunk_cache inside xet_client/. - data: became xet_data::processing inside xet_data/. - deduplication: became xet_data::deduplication inside xet_data/. - file_reconstruction: became xet_data::file_reconstruction inside xet_data/. - progress_tracking: became xet_data::progress_tracking inside xet_data/. - xet_session: became xet::xet_session inside xet_pkg/. - Wasm packages (hf_xet_wasm, hf_xet_thin_wasm): moved from top-level into wasm/; internal imports updated, public APIs unchanged.
85 lines
2.2 KiB
TOML
85 lines
2.2 KiB
TOML
[package]
|
|
name = "xet-data"
|
|
version.workspace = true
|
|
edition.workspace = true
|
|
license.workspace = true
|
|
repository.workspace = true
|
|
description = "Data processing pipeline for chunking, deduplication, and file reconstruction; used in the Hugging Face Xet client tools"
|
|
|
|
[lib]
|
|
name = "xet_data"
|
|
path = "src/lib.rs"
|
|
doctest = false
|
|
|
|
[dependencies]
|
|
xet-runtime = { version = "1.4.0", path = "../xet_runtime" }
|
|
xet-core-structures = { version = "1.4.0", path = "../xet_core_structures" }
|
|
xet-client = { version = "1.4.0", path = "../xet_client" }
|
|
|
|
anyhow = { workspace = true }
|
|
async-trait = { workspace = true }
|
|
bytes = { workspace = true }
|
|
chrono = { workspace = true }
|
|
clap = { workspace = true }
|
|
gearhash = { workspace = true }
|
|
http = { workspace = true }
|
|
itertools = { workspace = true }
|
|
lazy_static = { workspace = true }
|
|
more-asserts = { workspace = true }
|
|
prometheus = { workspace = true }
|
|
rand = { workspace = true }
|
|
regex = { workspace = true }
|
|
serde = { workspace = true }
|
|
serde_json = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
thiserror = { workspace = true }
|
|
tokio-util = { workspace = true }
|
|
tracing = { workspace = true }
|
|
ulid = { workspace = true }
|
|
walkdir = { workspace = true }
|
|
|
|
[target.'cfg(target_family = "wasm")'.dependencies]
|
|
tokio = { workspace = true, features = ["sync", "macros", "io-util", "rt", "time"] }
|
|
|
|
[target.'cfg(not(target_family = "wasm"))'.dependencies]
|
|
tokio = { workspace = true, features = ["rt-multi-thread", "rt", "time"] }
|
|
|
|
[target.'cfg(not(target_os = "windows"))'.dependencies]
|
|
sha2 = { workspace = true, features = ["asm"] }
|
|
|
|
[target.'cfg(target_os = "windows")'.dependencies]
|
|
sha2 = { workspace = true }
|
|
|
|
[[bin]]
|
|
name = "x"
|
|
path = "src/processing/bin/example.rs"
|
|
|
|
[[bin]]
|
|
name = "xtool"
|
|
path = "src/processing/bin/xtool.rs"
|
|
|
|
[[example]]
|
|
name = "chunk"
|
|
path = "examples/chunk/main.rs"
|
|
|
|
[[example]]
|
|
name = "hash"
|
|
path = "examples/hash/main.rs"
|
|
|
|
[[example]]
|
|
name = "xorb-check"
|
|
path = "examples/xorb-check/main.rs"
|
|
|
|
[dev-dependencies]
|
|
ctor = { workspace = true }
|
|
dirs = { workspace = true }
|
|
rand = { workspace = true }
|
|
serial_test = { workspace = true }
|
|
tempfile = { workspace = true }
|
|
tracing-test = { workspace = true }
|
|
ulid = { workspace = true }
|
|
|
|
[features]
|
|
strict = []
|
|
expensive_tests = []
|