mirror of
https://github.com/huggingface/xet-core.git
synced 2026-06-04 13:30:29 +08:00
add cas_types (#8)
This commit is contained in:
54
Cargo.lock
generated
54
Cargo.lock
generated
@@ -403,7 +403,7 @@ dependencies = [
|
||||
"chrono",
|
||||
"lazy_static",
|
||||
"lru",
|
||||
"merklehash",
|
||||
"merklehash 0.14.5",
|
||||
"mockall",
|
||||
"prometheus",
|
||||
"rand 0.8.5",
|
||||
@@ -443,7 +443,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"lz4",
|
||||
"merkledb",
|
||||
"merklehash",
|
||||
"merklehash 0.14.5",
|
||||
"opentelemetry",
|
||||
"opentelemetry-http",
|
||||
"opentelemetry-jaeger",
|
||||
@@ -478,13 +478,23 @@ dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"http 1.1.0",
|
||||
"merklehash",
|
||||
"merklehash 0.14.5",
|
||||
"rand 0.8.5",
|
||||
"tempfile",
|
||||
"tracing",
|
||||
"xet_error",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cas_types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"merklehash 0.14.5 (git+https://github.com/xetdata/xet-core)",
|
||||
"serde",
|
||||
"serde_repr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.3.0"
|
||||
@@ -892,7 +902,7 @@ dependencies = [
|
||||
"lz4",
|
||||
"mdb_shard",
|
||||
"merkledb",
|
||||
"merklehash",
|
||||
"merklehash 0.14.5",
|
||||
"mockall",
|
||||
"mockall_double",
|
||||
"mockstream",
|
||||
@@ -2032,7 +2042,7 @@ dependencies = [
|
||||
"binary-heap-plus",
|
||||
"clap 3.2.25",
|
||||
"lazy_static",
|
||||
"merklehash",
|
||||
"merklehash 0.14.5",
|
||||
"more-asserts",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
@@ -2066,7 +2076,7 @@ dependencies = [
|
||||
"gearhash",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"merklehash",
|
||||
"merklehash 0.14.5",
|
||||
"parutils",
|
||||
"rand 0.8.5",
|
||||
"rand_chacha",
|
||||
@@ -2099,6 +2109,23 @@ dependencies = [
|
||||
"structopt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "merklehash"
|
||||
version = "0.14.5"
|
||||
source = "git+https://github.com/xetdata/xet-core#469028007186fd2cbcce3cbda4b78ea8b99c5818"
|
||||
dependencies = [
|
||||
"blake3",
|
||||
"generic-array",
|
||||
"heed",
|
||||
"rand 0.8.5",
|
||||
"rand_chacha",
|
||||
"rand_core 0.6.4",
|
||||
"safe-transmute",
|
||||
"serde",
|
||||
"sha3",
|
||||
"structopt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
@@ -3462,6 +3489,17 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_repr"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.77",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "0.6.7"
|
||||
@@ -3559,7 +3597,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"mdb_shard",
|
||||
"merkledb",
|
||||
"merklehash",
|
||||
"merklehash 0.14.5",
|
||||
"opentelemetry",
|
||||
"opentelemetry-http",
|
||||
"opentelemetry-jaeger",
|
||||
@@ -4554,7 +4592,7 @@ dependencies = [
|
||||
"http 0.2.12",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"merklehash",
|
||||
"merklehash 0.14.5",
|
||||
"parking_lot 0.11.2",
|
||||
"pin-project",
|
||||
"prost",
|
||||
|
||||
@@ -16,7 +16,9 @@ members = [
|
||||
"retry_strategy",
|
||||
"shard_client",
|
||||
"utils",
|
||||
"xet_error", "cas_object",
|
||||
"xet_error",
|
||||
"cas_object",
|
||||
"cas_types",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
|
||||
10
cas_types/Cargo.toml
Normal file
10
cas_types/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "cas_types"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
serde = { version = "1.0.208", features = ["derive"] }
|
||||
merklehash = { git = "https://github.com/xetdata/xet-core" }
|
||||
serde_repr = "0.1.19"
|
||||
93
cas_types/src/compression_scheme.rs
Normal file
93
cas_types/src/compression_scheme.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use anyhow::anyhow;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum CompressionScheme {
|
||||
None,
|
||||
LZ4,
|
||||
}
|
||||
|
||||
impl From<&CompressionScheme> for &'static str {
|
||||
fn from(value: &CompressionScheme) -> Self {
|
||||
match value {
|
||||
CompressionScheme::None => "none",
|
||||
CompressionScheme::LZ4 => "lz4",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CompressionScheme> for &'static str {
|
||||
fn from(value: CompressionScheme) -> Self {
|
||||
From::from(&value)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for CompressionScheme {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s.to_lowercase().as_str() {
|
||||
"" | "none" => Ok(CompressionScheme::None),
|
||||
"lz4" => Ok(CompressionScheme::LZ4),
|
||||
_ => Err(anyhow!("invalid value for compression scheme: {s}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// in the header value, we will consider
|
||||
pub fn multiple_accepted_encoding_header_value(list: Vec<CompressionScheme>) -> String {
|
||||
let as_strs: Vec<&str> = list.iter().map(Into::into).collect();
|
||||
as_strs.join(";").to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{multiple_accepted_encoding_header_value, CompressionScheme};
|
||||
use std::str::FromStr;
|
||||
|
||||
#[test]
|
||||
fn test_from_str() {
|
||||
assert_eq!(
|
||||
CompressionScheme::from_str("LZ4").unwrap(),
|
||||
CompressionScheme::LZ4
|
||||
);
|
||||
assert_eq!(
|
||||
CompressionScheme::from_str("NONE").unwrap(),
|
||||
CompressionScheme::None
|
||||
);
|
||||
assert_eq!(
|
||||
CompressionScheme::from_str("NoNE").unwrap(),
|
||||
CompressionScheme::None
|
||||
);
|
||||
assert_eq!(
|
||||
CompressionScheme::from_str("none").unwrap(),
|
||||
CompressionScheme::None
|
||||
);
|
||||
assert_eq!(
|
||||
CompressionScheme::from_str("").unwrap(),
|
||||
CompressionScheme::None
|
||||
);
|
||||
assert!(CompressionScheme::from_str("not-scheme").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_str() {
|
||||
assert_eq!(Into::<&str>::into(CompressionScheme::LZ4), "lz4");
|
||||
assert_eq!(Into::<&str>::into(CompressionScheme::None), "none");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_accepted_encoding_header_value() {
|
||||
let multi = vec![CompressionScheme::LZ4, CompressionScheme::None];
|
||||
assert_eq!(
|
||||
multiple_accepted_encoding_header_value(multi),
|
||||
"lz4;none".to_string()
|
||||
);
|
||||
|
||||
let singular = vec![CompressionScheme::LZ4];
|
||||
assert_eq!(
|
||||
multiple_accepted_encoding_header_value(singular),
|
||||
"lz4".to_string()
|
||||
);
|
||||
}
|
||||
}
|
||||
93
cas_types/src/key.rs
Normal file
93
cas_types/src/key.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use merklehash::MerkleHash;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A Key indicates a prefixed merkle hash for some data stored in the CAS DB.
|
||||
#[derive(Debug, PartialEq, Default, Serialize, Deserialize, Ord, PartialOrd, Eq, Hash, Clone)]
|
||||
pub struct Key {
|
||||
pub prefix: String,
|
||||
pub hash: MerkleHash,
|
||||
}
|
||||
|
||||
impl Display for Key {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}/{:x}", self.prefix, self.hash)
|
||||
}
|
||||
}
|
||||
|
||||
mod hex {
|
||||
pub mod serde {
|
||||
use merklehash::MerkleHash;
|
||||
use serde::de::{self, Visitor};
|
||||
use serde::{Deserializer, Serializer};
|
||||
use std::fmt;
|
||||
|
||||
pub fn serialize<S>(value: &MerkleHash, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
let hex = value.hex();
|
||||
serializer.serialize_str(&hex)
|
||||
}
|
||||
|
||||
pub fn deserialize<'de, D>(deserializer: D) -> Result<MerkleHash, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_str(HexVisitor)
|
||||
}
|
||||
|
||||
// Visitor for deserialization
|
||||
struct HexVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for HexVisitor {
|
||||
type Value = MerkleHash;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("a merklehash")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
MerkleHash::from_hex(v).map_err(|e| serde::de::Error::custom(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct HexMerkleHash(#[serde(with = "hex::serde")] pub MerkleHash);
|
||||
|
||||
impl From<MerkleHash> for HexMerkleHash {
|
||||
fn from(value: MerkleHash) -> Self {
|
||||
HexMerkleHash(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<HexMerkleHash> for MerkleHash {
|
||||
fn from(value: HexMerkleHash) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct HexKey {
|
||||
pub prefix: String,
|
||||
#[serde(with = "hex::serde")]
|
||||
pub hash: MerkleHash,
|
||||
}
|
||||
|
||||
impl From<HexKey> for Key {
|
||||
fn from(HexKey { prefix, hash }: HexKey) -> Self {
|
||||
Key { prefix, hash }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Key> for HexKey {
|
||||
fn from(Key { prefix, hash }: Key) -> Self {
|
||||
HexKey { prefix, hash }
|
||||
}
|
||||
}
|
||||
51
cas_types/src/lib.rs
Normal file
51
cas_types/src/lib.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
use serde_repr::{Deserialize_repr, Serialize_repr};
|
||||
|
||||
use merklehash::MerkleHash;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod compression_scheme;
|
||||
mod key;
|
||||
pub use key::*;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct UploadXorbResponse {
|
||||
pub was_inserted: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct Range {
|
||||
pub start: u64,
|
||||
pub end: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct CASReconstructionTerm {
|
||||
pub hash: HexMerkleHash,
|
||||
pub unpacked_length: u64,
|
||||
pub range: Range,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct QueryReconstructionResponse {
|
||||
pub reconstruction: Vec<CASReconstructionTerm>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize_repr, Deserialize_repr, Clone, Copy)]
|
||||
#[repr(u8)]
|
||||
pub enum UploadShardResponseType {
|
||||
Exists = 0,
|
||||
SyncPerformed = 1,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct UploadShardResponse {
|
||||
pub result: UploadShardResponseType,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct QueryChunkResponse {
|
||||
pub shard: MerkleHash,
|
||||
}
|
||||
|
||||
pub type Salt = [u8; 32];
|
||||
Reference in New Issue
Block a user