diff --git a/Cargo.lock b/Cargo.lock index f7c45f1c..b3c04d75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -403,7 +403,7 @@ dependencies = [ "chrono", "lazy_static", "lru", - "merklehash", + "merklehash 0.14.5", "mockall", "prometheus", "rand 0.8.5", @@ -443,7 +443,7 @@ dependencies = [ "lazy_static", "lz4", "merkledb", - "merklehash", + "merklehash 0.14.5", "opentelemetry", "opentelemetry-http", "opentelemetry-jaeger", @@ -478,13 +478,23 @@ dependencies = [ "anyhow", "bincode", "http 1.1.0", - "merklehash", + "merklehash 0.14.5", "rand 0.8.5", "tempfile", "tracing", "xet_error", ] +[[package]] +name = "cas_types" +version = "0.1.0" +dependencies = [ + "anyhow", + "merklehash 0.14.5 (git+https://github.com/xetdata/xet-core)", + "serde", + "serde_repr", +] + [[package]] name = "cast" version = "0.3.0" @@ -892,7 +902,7 @@ dependencies = [ "lz4", "mdb_shard", "merkledb", - "merklehash", + "merklehash 0.14.5", "mockall", "mockall_double", "mockstream", @@ -2032,7 +2042,7 @@ dependencies = [ "binary-heap-plus", "clap 3.2.25", "lazy_static", - "merklehash", + "merklehash 0.14.5", "more-asserts", "rand 0.8.5", "regex", @@ -2066,7 +2076,7 @@ dependencies = [ "gearhash", "itertools 0.10.5", "lazy_static", - "merklehash", + "merklehash 0.14.5", "parutils", "rand 0.8.5", "rand_chacha", @@ -2099,6 +2109,23 @@ dependencies = [ "structopt", ] +[[package]] +name = "merklehash" +version = "0.14.5" +source = "git+https://github.com/xetdata/xet-core#469028007186fd2cbcce3cbda4b78ea8b99c5818" +dependencies = [ + "blake3", + "generic-array", + "heed", + "rand 0.8.5", + "rand_chacha", + "rand_core 0.6.4", + "safe-transmute", + "serde", + "sha3", + "structopt", +] + [[package]] name = "mime" version = "0.3.17" @@ -3462,6 +3489,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "serde_spanned" version = "0.6.7" @@ -3559,7 +3597,7 @@ dependencies = [ "lazy_static", "mdb_shard", "merkledb", - "merklehash", + "merklehash 0.14.5", "opentelemetry", "opentelemetry-http", "opentelemetry-jaeger", @@ -4554,7 +4592,7 @@ dependencies = [ "http 0.2.12", "itertools 0.10.5", "lazy_static", - "merklehash", + "merklehash 0.14.5", "parking_lot 0.11.2", "pin-project", "prost", diff --git a/Cargo.toml b/Cargo.toml index d0c9b664..cc1e11b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,9 @@ members = [ "retry_strategy", "shard_client", "utils", - "xet_error", "cas_object", + "xet_error", + "cas_object", + "cas_types", ] [profile.release] diff --git a/cas_types/Cargo.toml b/cas_types/Cargo.toml new file mode 100644 index 00000000..84ad3d6d --- /dev/null +++ b/cas_types/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "cas_types" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.86" +serde = { version = "1.0.208", features = ["derive"] } +merklehash = { git = "https://github.com/xetdata/xet-core" } +serde_repr = "0.1.19" diff --git a/cas_types/src/compression_scheme.rs b/cas_types/src/compression_scheme.rs new file mode 100644 index 00000000..5bbdd1b5 --- /dev/null +++ b/cas_types/src/compression_scheme.rs @@ -0,0 +1,93 @@ +use anyhow::anyhow; +use std::str::FromStr; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum CompressionScheme { + None, + LZ4, +} + +impl From<&CompressionScheme> for &'static str { + fn from(value: &CompressionScheme) -> Self { + match value { + CompressionScheme::None => "none", + CompressionScheme::LZ4 => "lz4", + } + } +} + +impl From for &'static str { + fn from(value: CompressionScheme) -> Self { + From::from(&value) + } +} + +impl FromStr for CompressionScheme { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "" | "none" => Ok(CompressionScheme::None), + "lz4" => Ok(CompressionScheme::LZ4), + _ => Err(anyhow!("invalid value for compression scheme: {s}")), + } + } +} + +// in the header value, we will consider +pub fn multiple_accepted_encoding_header_value(list: Vec) -> String { + let as_strs: Vec<&str> = list.iter().map(Into::into).collect(); + as_strs.join(";").to_string() +} + +#[cfg(test)] +mod tests { + use super::{multiple_accepted_encoding_header_value, CompressionScheme}; + use std::str::FromStr; + + #[test] + fn test_from_str() { + assert_eq!( + CompressionScheme::from_str("LZ4").unwrap(), + CompressionScheme::LZ4 + ); + assert_eq!( + CompressionScheme::from_str("NONE").unwrap(), + CompressionScheme::None + ); + assert_eq!( + CompressionScheme::from_str("NoNE").unwrap(), + CompressionScheme::None + ); + assert_eq!( + CompressionScheme::from_str("none").unwrap(), + CompressionScheme::None + ); + assert_eq!( + CompressionScheme::from_str("").unwrap(), + CompressionScheme::None + ); + assert!(CompressionScheme::from_str("not-scheme").is_err()); + } + + #[test] + fn test_to_str() { + assert_eq!(Into::<&str>::into(CompressionScheme::LZ4), "lz4"); + assert_eq!(Into::<&str>::into(CompressionScheme::None), "none"); + } + + #[test] + fn test_multiple_accepted_encoding_header_value() { + let multi = vec![CompressionScheme::LZ4, CompressionScheme::None]; + assert_eq!( + multiple_accepted_encoding_header_value(multi), + "lz4;none".to_string() + ); + + let singular = vec![CompressionScheme::LZ4]; + assert_eq!( + multiple_accepted_encoding_header_value(singular), + "lz4".to_string() + ); + } +} diff --git a/cas_types/src/key.rs b/cas_types/src/key.rs new file mode 100644 index 00000000..e3e6741e --- /dev/null +++ b/cas_types/src/key.rs @@ -0,0 +1,93 @@ +use std::fmt::{Display, Formatter}; + +use merklehash::MerkleHash; +use serde::{Deserialize, Serialize}; + +/// A Key indicates a prefixed merkle hash for some data stored in the CAS DB. +#[derive(Debug, PartialEq, Default, Serialize, Deserialize, Ord, PartialOrd, Eq, Hash, Clone)] +pub struct Key { + pub prefix: String, + pub hash: MerkleHash, +} + +impl Display for Key { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}/{:x}", self.prefix, self.hash) + } +} + +mod hex { + pub mod serde { + use merklehash::MerkleHash; + use serde::de::{self, Visitor}; + use serde::{Deserializer, Serializer}; + use std::fmt; + + pub fn serialize(value: &MerkleHash, serializer: S) -> Result + where + S: Serializer, + { + let hex = value.hex(); + serializer.serialize_str(&hex) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(HexVisitor) + } + + // Visitor for deserialization + struct HexVisitor; + + impl<'de> Visitor<'de> for HexVisitor { + type Value = MerkleHash; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a merklehash") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + MerkleHash::from_hex(v).map_err(|e| serde::de::Error::custom(e)) + } + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct HexMerkleHash(#[serde(with = "hex::serde")] pub MerkleHash); + +impl From for HexMerkleHash { + fn from(value: MerkleHash) -> Self { + HexMerkleHash(value) + } +} + +impl From for MerkleHash { + fn from(value: HexMerkleHash) -> Self { + value.0 + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct HexKey { + pub prefix: String, + #[serde(with = "hex::serde")] + pub hash: MerkleHash, +} + +impl From for Key { + fn from(HexKey { prefix, hash }: HexKey) -> Self { + Key { prefix, hash } + } +} + +impl From for HexKey { + fn from(Key { prefix, hash }: Key) -> Self { + HexKey { prefix, hash } + } +} diff --git a/cas_types/src/lib.rs b/cas_types/src/lib.rs new file mode 100644 index 00000000..96b6d6dd --- /dev/null +++ b/cas_types/src/lib.rs @@ -0,0 +1,51 @@ +use serde_repr::{Deserialize_repr, Serialize_repr}; + +use merklehash::MerkleHash; +use serde::{Deserialize, Serialize}; + +pub mod compression_scheme; +mod key; +pub use key::*; + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UploadXorbResponse { + pub was_inserted: bool, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Range { + pub start: u64, + pub end: u64, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct CASReconstructionTerm { + pub hash: HexMerkleHash, + pub unpacked_length: u64, + pub range: Range, + pub url: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct QueryReconstructionResponse { + pub reconstruction: Vec, +} + +#[derive(Debug, Serialize_repr, Deserialize_repr, Clone, Copy)] +#[repr(u8)] +pub enum UploadShardResponseType { + Exists = 0, + SyncPerformed = 1, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct UploadShardResponse { + pub result: UploadShardResponseType, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct QueryChunkResponse { + pub shard: MerkleHash, +} + +pub type Salt = [u8; 32];